In [38]:
import os
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, models, transforms
from sklearn.model_selection import train_test_split
import numpy as np
from torch.utils.data import Subset
import matplotlib.pyplot as plt

import pickle


In [39]:
device = torch.device('cuda')

In [3]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),   # изменим размер изображений
    transforms.ToTensor(),           # преобразуем изображения в тензоры
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # нормализация по стандартам ImageNet
])


In [4]:
data_dir = 'biometry/antifrod_biometry_dataset_publish/train'

# Загружаем все данные с трансформациями
full_dataset = datasets.ImageFolder(root=data_dir, transform=transform)

# Разделим класс 0 и класс 1
class_0_indices = [i for i, target in enumerate(full_dataset.targets) if target == 0]
class_1_indices = [i for i, target in enumerate(full_dataset.targets) if target == 1]

# Оставляем только 20% из класса 0 и все данные из класса 1
class_0_subset = class_0_indices[:len(class_0_indices)//5]  # 20% данных класса 0
class_1_subset = class_1_indices  # все данные класса 1

# Собираем финальный набор данных
final_indices = class_0_subset + class_1_subset
subset_data = Subset(full_dataset, final_indices)

# Разделяем данные на train и validation
train_size = int(0.8 * len(subset_data))
val_size = len(subset_data) - train_size
train_data, val_data = random_split(subset_data, [train_size, val_size])

# # Даталоадеры
# train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
# val_loader = DataLoader(val_data, batch_size=16, shuffle=False)


In [5]:
# Указываем количество потоков для загрузки данных
train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False, num_workers=4)


In [6]:
from torchvision import models

# Используем MobileNetV2 (легкая модель)
model = models.mobilenet_v2(pretrained=True)

# Заменяем последний слой на классификатор с двумя выходами
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)

# Переносим модель на GPU, если доступен
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)




In [7]:
criterion = nn.CrossEntropyLoss()  # кросс-энтропия для классификации
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [10]:
num_epochs = 5

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=5):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        
        # Тренировка
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        # Проверка на валидации
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in tqdm(val_loader):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        epoch_loss = running_loss / len(train_loader)
        epoch_val_loss = val_loss / len(val_loader)
        epoch_accuracy = correct / total
        
        print(f"Epoch {epoch+1}/{num_epochs} => "
              f"Train Loss: {epoch_loss:.4f}, Val Loss: {epoch_val_loss:.4f}, Val Accuracy: {epoch_accuracy:.4f}")
        
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=5)


  0%|          | 0/311 [00:00<?, ?it/s]

Epoch 1/5 => Train Loss: 0.0809, Val Loss: 0.0609, Val Accuracy: 0.9760


  0%|          | 0/311 [00:00<?, ?it/s]

Epoch 2/5 => Train Loss: 0.0506, Val Loss: 0.0512, Val Accuracy: 0.9802


  0%|          | 0/311 [00:00<?, ?it/s]

Epoch 3/5 => Train Loss: 0.0405, Val Loss: 0.0678, Val Accuracy: 0.9758


  0%|          | 0/311 [00:00<?, ?it/s]

Epoch 4/5 => Train Loss: 0.0318, Val Loss: 0.0244, Val Accuracy: 0.9918


  0%|          | 0/311 [00:00<?, ?it/s]

Epoch 5/5 => Train Loss: 0.0277, Val Loss: 0.0488, Val Accuracy: 0.9838


In [14]:
import torch.nn.functional as F

def get_probabilities(model, data_loader):
    model.eval()
    probs = []
    true_labels = []
    
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            probabilities = F.softmax(outputs, dim=1)  # Преобразуем в вероятности
            probs.extend(probabilities.cpu().numpy())
            true_labels.extend(labels.numpy())
    
    return probs, true_labels


In [18]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Получаем предсказания
y_pred, y_true = get_probabilities(model, val_loader)

# # Метрики
# print("Accuracy:", accuracy_score(y_true, y_pred))
# print("Precision:", precision_score(y_true, y_pred, average='macro'))  # 'macro' или 'weighted'
# print("Recall:", recall_score(y_true, y_pred, average='macro'))
# print("F1 Score:", f1_score(y_true, y_pred, average='macro'))

# # Полный отчёт
# print("\nClassification Report:\n", classification_report(y_true, y_pred))


In [20]:
from sklearn.metrics import roc_auc_score

In [30]:
roc_auc_score(y_true, np.array(y_pred)[:, 1])

0.996002433875815

In [32]:
from sklearn.metrics import precision_recall_curve, auc

In [33]:
def pr_auc(y, pred):
    precision, recall, _ = precision_recall_curve(y, pred)
    return auc(recall, precision)

In [34]:
pr_auc(y_true, np.array(y_pred)[:, 1])

0.9765331843999076

In [35]:
len(y_true)

9936

In [37]:
pickle.dump(model, open('model_biometry.pkl', 'wb'))