In [None]:
import sys
import os
sys.path.append('../src')

In [None]:
import torch
import numpy as np
import pandas as pd
import random

import global_var
import augment
import dataset
import modeling

import cv2
from torch.utils.data import DataLoader
from torchvision import models as models
import torch.nn as nn
import torch.optim as optim

from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score

%matplotlib inline

In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=256)

# Load data

In [None]:
train_cl_nm = pd.read_csv('train.csv')
test_nm = pd.read_csv('test.csv')
test_extended_nm = pd.read_csv('test_extended.csv')

In [None]:
X_train, y_train = [], []

In [None]:
for index, row in tqdm(train_cl_nm.iterrows()):
    im = cv2.imread(global_var.TRAIN_DIR + row['filename'] + '.png')
    X_train.append(im)
    y_train.append(row['sign'])

In [None]:
X = np.array(X_train)
y = np.array(y_train).reshape(-1,1)

In [None]:
test = []
for index, row in tqdm(test_nm.iterrows()):
    im = cv2.imread(global_var.TEST_DIR + row['filename'] + '.png')
    test.append(im)

In [None]:
test_extended = []
for index, row in tqdm(test_extended_nm.iterrows()):
    if row['filename'] in test_nm.filename.values:
        im = cv2.imread(global_var.TEST_DIR + row['filename'] + '.png')
    else:
        im = cv2.imread(global_var.TEST_EXT_DIR + row['filename'] + '.jpeg')
    test_extended.append(im)

In [None]:
X_test = np.array(test)
inference_data = dataset.Airplane_test(X_test, augmentation = augment.valid_augmentation(global_var.IMAGE_SIZE))
inference_dataloader = DataLoader(inference_data, batch_size=global_var.BATCH_SIZE, shuffle=False)

In [None]:
X_test_ext = np.array(test_extended)
inference_data = dataset.Airplane_test(X_test_ext, augmentation = augment.valid_augmentation(global_var.IMAGE_SIZE))
inference_ext_dataloader = DataLoader(inference_data, batch_size=global_var.BATCH_SIZE, shuffle=False)

# Training

In [None]:
def train_model(model, device, loss_fn, optimizer, scheduler, train_loader, val_loader, num_epoch, DECAY, n_fold):
    train_losses = []
    test_losses = []
    acc = []
    f = []
    mean_score = []

    for i in range(num_epoch):
        print('Epoch №:', i)
        epoch_train_losses = []
        model.train(True)
        for X_train, y_train in tqdm(train_loader):
            # Посчитаем предсказание и лосс
            X_train = X_train.to(device)
            y_train = y_train.to(device)
            y_pred = model(X_train)
            loss = loss_fn(y_pred, y_train)
            del y_pred

            # зануляем градиент
            optimizer.zero_grad()

            # backward
            loss.backward()

            # ОБНОВЛЯЕМ веса
            optimizer.step()
            scheduler.step()

            # Запишем число (не тензор) в наши батчевые лоссы
            epoch_train_losses.append(loss.item())   
                    
        train_losses.append(np.mean(epoch_train_losses))
        
        # Теперь посчитаем лосс на вал
        with torch.no_grad():
            model.eval()
            epoch_test_losses = []
            epoch_acc = []
            epoch_f = []
            epoch_mean = []
            for X_val, y_val in val_loader:
                X_val, y_val = X_val.to(device), y_val.to(device)
                y_pred = model(X_val)
                loss = loss_fn(y_pred, y_val)
            
                epoch_test_losses.append(loss.item())
                y_pred = y_pred.tanh().detach().cpu().numpy()
                y_pred = (y_pred>=0.4).astype(int)
                accuracy = accuracy_score(y_val.cpu(), y_pred)
                f1 = f1_score(y_val.cpu(), y_pred)
                epoch_acc.append(accuracy)
                epoch_f.append(f1)
                epoch_mean.append((accuracy+f1)/2)
                del y_pred

            test_losses.append(np.mean(epoch_test_losses))
            acc.append(np.mean(epoch_acc))
            f.append(np.mean(epoch_f))
            mean_score.append(np.mean(epoch_mean))
            
            torch.save(model.state_dict(), f'epoch_{i}_fold_{n_fold}.pth')  # сохраняем веса эпох

            print(
                'Train loss =', train_losses[-1],
                'Val loss =', test_losses[-1],
                f'Val scores: accuracy:{acc[-1]}, f1: {f[-1]}, mean: {mean_score[-1]}'
            )
                
    return train_losses, test_losses, acc, f, mean_score

In [None]:
# Обучение модели
#
kfold = StratifiedKFold(n_splits=7)
n_fold = 0
y_preds = np.zeros(1000)
best_epochs = []

for train_index, test_index in kfold.split(X, y):
    print("Fold", n_fold)
    X_train, y_train = X[train_index], y[train_index]
    X_valid, y_valid = X[test_index], y[test_index]
    
    
    train_data = dataset.Airplane(X_train, y_train, augmentation=augment.train_augmentation(global_var.IMAGE_SIZE))
    val_data = dataset.Airplane(X_valid, y_valid, augmentation=augment.valid_augmentation(global_var.IMAGE_SIZE))
    trainloader = DataLoader(train_data, batch_size=global_var.BATCH_SIZE, shuffle=False)  
    valloader = DataLoader(val_data, batch_size=global_var.BATCH_SIZE, shuffle=False)
    
    net = models.resnet18(pretrained=True).to(global_var.DEVICE)
    fc_inputs = net.fc.in_features

    net.fc = nn.Sequential(
        nn.Linear(fc_inputs, 1)
    ).to(global_var.DEVICE)
    net.aux_logits = False

    optimizer = optim.Adam(net.parameters(), lr = 0.00075)
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3,
                                              max_lr=1e-3, epochs=10, steps_per_epoch=len(trainloader))

    criterion = nn.BCEWithLogitsLoss()

    train_losses, val_losses, accuracy, f1, mean_score = train_model(net, global_var.DEVICE, criterion, optimizer, scheduler, trainloader, valloader, 10, global_var.DECAY, n_fold)

    best_epochs.append(f'epoch_{np.array(mean_score).argmax()}_fold_{n_fold}.pth')

    net.load_state_dict(torch.load(f'epoch_{np.array(accuracy).argmax()}_fold_{n_fold}.pth'))
    y_preds += modeling.inference_fn(net, inference_dataloader, global_var.DEVICE).reshape(-1)

    n_fold+=1

In [None]:
best_epochs