In [None]:
import sys
import os
sys.path.append('../src')

In [None]:
import torch
import numpy as np
import pandas as pd
import random

import global_var
import augment
import dataset
import modeling

import cv2
from torch.utils.data import DataLoader
from torchvision import models as models
import torch.nn as nn
import torch.optim as optim

from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score

%matplotlib inline

In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

In [None]:
train_cl_nm = pd.read_csv('train.csv')
test_nm = pd.read_csv('test.csv')

In [None]:
train_cl_nm.head()

In [None]:
X_train, y_train = [], []

In [None]:
for index, row in tqdm(train_cl_nm.iterrows()):
    im = cv2.imread(global_var.TRAIN_DIR + row['filename'] + '.png')
    X_train.append(im)
    y_train.append(row['sign'])

In [None]:
X = np.array(X_train)
y = np.array(y_train).reshape(-1,1)

In [None]:
net = models.resnet18(pretrained=True).to(global_var.DEVICE)  # загружаем предобученную на ImageNet resnet152 сразу на GPU
fc_inputs = net.fc.in_features

net.fc = nn.Sequential(
    nn.Linear(fc_inputs, 1)
).to(global_var.DEVICE)
net.aux_logits=False

for param in net.parameters():
    param.requires_grad = False
    
for param in net.fc.parameters():  # включаем последний слой (классификатор)
    param.requires_grad = True

for param in net.layer1.parameters():
    param.requires_grad = True
    
for param in net.layer2.parameters():
    param.requires_grad = True

for param in net.layer3.parameters():
    param.requires_grad = True
    
for param in net.layer4.parameters():
    param.requires_grad = True

# Stratified kfold

In [None]:
def train_model(model, device, loss_fn, optimizer, train_loader, val_loader, num_epoch, n_fold):
    train_losses = []
    test_losses = []
    acc = []

    for i in range(num_epoch):
        epoch_train_losses = []
        model.train(True)
        for X_train, y_train in tqdm(train_loader):
            # Посчитаем предсказание и лосс
            X_train = X_train.to(device)
            y_train = y_train.to(device)
            y_pred = model(X_train)
            loss = loss_fn(y_pred, y_train)
            del y_pred

            # зануляем градиент
            optimizer.zero_grad()

            # backward
            loss.backward()

            # ОБНОВЛЯЕМ веса
            optimizer.step()

            # Запишем число (не тензор) в наши батчевые лоссы
            epoch_train_losses.append(loss.item())   
                    
        train_losses.append(np.mean(epoch_train_losses))
        
        # Теперь посчитаем лосс на вал
        with torch.no_grad():
            model.eval()
            epoch_test_losses = []
            epoch_acc = []
            for X_val, y_val in val_loader:
                X_val, y_val = X_val.to(device), y_val.to(device)
                y_pred = model(X_val)
                loss = loss_fn(y_pred, y_val)
            
                epoch_test_losses.append(loss.item())
                y_pred = y_pred.sigmoid().detach().cpu().numpy()
                y_pred = (y_pred>=0.5).astype(int)
                epoch_acc.append(accuracy_score(y_val.cpu(), y_pred))
                del y_pred

            test_losses.append(np.mean(epoch_test_losses))
            acc.append(np.mean(epoch_acc))
            
            torch.save(model.state_dict(), f'epoch_{i}_fold_{n_fold}.pth')  # сохраняем веса эпох

            print(
                'Train loss =', train_losses[-1],
                'Val loss =', test_losses[-1],
                'Val accuracy score =', acc[-1]
            )
        if i == 5:
            for g in optimizer.param_groups:
                g['lr'] = g['lr']*0.8
            
    return train_losses, test_losses, acc

In [None]:
test = []
for index, row in tqdm(test_nm.iterrows()):
    im = cv2.imread(global_var.TEST_DIR + row['filename'] + '.png')
    test.append(im)
X_test = np.array(test)
inference_data = dataset.Airplane_test(X_test, augmentation = augment.valid_augmentation())

# dataloaders - с помощью нашего класса датасета сэмплируют данные в батчи
inference_dataloader = DataLoader(inference_data, batch_size=global_var.BATCH_SIZE, shuffle=False)

In [None]:
# Обучение
#
kfold = StratifiedKFold(n_splits=5)
n_fold = 0
y_preds = np.zeros(1000)
best_epochs = []

for train_index, test_index in kfold.split(X, y):
    print("Fold", n_fold)
    X_train, y_train = X[train_index], y[train_index]
    X_valid, y_valid = X[test_index], y[test_index]
    
    
    train_data = dataset.Airplane(X_train, y_train, augmentation=augment.train_augmentation())
    val_data = dataset.Airplane(X_valid, y_valid, augmentation=augment.alid_augmentation())
    trainloader = DataLoader(train_data, batch_size=global_var.BATCH_SIZE, shuffle=False)  
    valloader = DataLoader(val_data, batch_size=global_var.BATCH_SIZE, shuffle=False)
    
    net = models.resnet18(pretrained=True).to(global_var.DEVICE)
    fc_inputs = net.fc.in_features

    net.fc = nn.Sequential(
        nn.Linear(fc_inputs, 1)
    ).to(global_var.DEVICE)
    net.aux_logits=False
    optimizer = optim.Adam(net.parameters(), lr = 0.00075)
    criterion = nn.BCEWithLogitsLoss()
    
    train_losses, val_losses, roc_score = train_model(net, device, criterion, optimizer, trainloader, valloader, 10,n_fold)
    best_epochs.append(f'epoch_{np.array(roc_score).argmax()}_fold_{n_fold}.pth')
    net.load_state_dict(torch.load(f'epoch_{np.array(roc_score).argmax()}_fold_{n_fold}.pth'))
    y_preds += modeling.inference_fn(net, inference_dataloader, global_var.DEVICE).reshape(-1)
    n_fold+=1

# Check

In [None]:
best_epochs