### NN

In [1]:
import os
import pandas as pd
import numpy as np
import imageio
import random
import torch
import torch.nn as nn
from sklearn.metrics import roc_curve, roc_auc_score

In [2]:
train_path = './train_data'
df_train = pd.read_csv('./df_train.csv')

val_path = './validation_data'
df_val = pd.read_csv('./df_validation.csv')

small_path = './small_sample'
df_small = pd.read_csv('./df_small.csv')

In [3]:
def read_batch_images(images, df, path):
    features_target = []
    for im in images:
        current_image = np.array(imageio.imread(os.path.join(path, im)))/255
        image_id = int(im.split('.')[0])
        target = int(df[df.id == image_id]['broken'])
        features_target.append((current_image, target))
    return features_target

In [4]:
def batch_generator(df, path, batch_size):
    images_name = [f for f in os.listdir(path) if f.split('.')[1] == 'png']
    random.shuffle(images_name)
    n = len(df)
    for i in range(0, n, batch_size):
        images_filter = images_name[i:(i+batch_size)]
        data = read_batch_images(images_filter, df, path)
        yield data

In [5]:
class BasicNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.conv2 = nn.Conv2d(32, 16, 3)
        self.conv3 = nn.Conv2d(16, 8, 3)
        self.fc1 = nn.Linear(12800,512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 1)
        self.pool = nn.MaxPool2d(2)
        self.relu = nn.ReLU()
        self.drp1 = nn.Dropout(0.25)
        self.drp2 = nn.Dropout(0.25)
        
    def forward(self, x):
        fo = self.pool(self.relu(self.conv1(x)))
        so = self.pool(self.relu(self.conv2(fo)))
        to = self.pool(self.relu(self.conv3(so))).view(-1,12800)
        fc1_out = self.drp1(self.relu(self.fc1(to)))
        fc2_out = self.drp2(self.relu(self.fc2(fc1_out)))
        out = self.fc3(fc2_out)
        return out

In [12]:
def get_performance_metrics(target, score):
    fpr, tpr, threshold = roc_curve(target, score)
    tnr = [1-f for f in fpr]
    auc_roc = roc_auc_score(target, score)
    df = pd.DataFrame({'sens' : tpr, 'spec': tnr, 'threshold': threshold})
    good_metrics = df[df.sens >= 0.85] 
    index_best = np.argmax(good_metrics.spec)
    df_best = good_metrics.iloc[index_best]
    best_sens = df_best.sens
    best_spec = df_best.spec
    best_th = df_best.threshold

    return  auc_roc, best_sens, best_spec, best_th
    
def train(model, optimizer, criterion, df_train, train_path, df_val, val_path, num_epochs, batch_size, device):
    for i in range(num_epochs):
        total_loss = 0
        val_losses = []
        tr_losses = []
        model.train()
        for batch in batch_generator(df_train, train_path, batch_size):
            x = [d[0] for d in batch]
            y = [d[1] for d in batch]
            n_batch_size = len(x) if len(x) < batch_size else batch_size
            x_t = torch.tensor(x, dtype=torch.float, device=device).view(n_batch_size, 1, 340, 340)
            y_t = torch.tensor(y, dtype=torch.float, device=device)
            model.zero_grad()
            output = model(x_t)
            loss = criterion(output.view(-1), y_t)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        val_probs, ys, val_loss = validate(model, criterion, df_val, val_path, batch_size, device)
        val_losses += [val_loss / df_val.shape[0]]
        tr_losses += [total_loss / df_train.shape[0]]
        auc_roc, sens, spec, th = get_performance_metrics(ys, val_probs)
        
        print('Epoch {}, avg train loss per image {}, avg valid loss per image {}, auc {}, sens {}, spec {}, th {}'.format(
            i+1, tr_losses[-1], val_losses[-1], auc_roc, sens, spec, th
        ))
    return tr_losses[-1], val_losses[-1]

def validate(model, criterion, df_val, val_path, batch_size, device):
    model.eval()
    val_total_loss = 0
    probs = []
    ys = []
    sigmoid = torch.nn.Sigmoid()
    with torch.no_grad():
        for batch in batch_generator(df_val, val_path, batch_size):
            x = [d[0] for d in batch]
            y = [d[1] for d in batch]
            n_batch_size = len(x) if len(x) < batch_size else batch_size
            x_t = torch.tensor(x, dtype=torch.float, device=device).view(n_batch_size, 1, 340, 340)
            y_t = torch.tensor(y, dtype=torch.float, device=device)
            output = model(x_t)
            probs += list(sigmoid(output).view(-1).detach().cpu().numpy())
            ys += y
            loss = criterion(output.view(-1), y_t)
            val_total_loss += loss.item()
    return probs, ys, val_total_loss

In [18]:
epochs = 100
batch_size = 100
learning_rate = 0.001
device = torch.device('cuda')
model = BasicNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.BCEWithLogitsLoss(reduction='sum')

In [None]:
train(model=model, optimizer=optimizer, criterion=criterion, 
      df_train=df_train, train_path='./train_data', df_val=df_val, val_path='./validation_data', 
      num_epochs=epochs, batch_size=batch_size, device=device)

Epoch 1, avg train loss per image 0.4506478410819032, avg valid loss per image 0.40465728914563875, auc 0.754888771830301, sens 0.8521739130434782, spec 0.548928238583411, th 0.1781543642282486
Epoch 2, avg train loss per image 0.39391232807594295, avg valid loss per image 0.40668944411797425, auc 0.782017099558329, sens 0.8565217391304348, spec 0.5890027958993476, th 0.09073594212532043
Epoch 3, avg train loss per image 0.3602186768712682, avg valid loss per image 0.3536726556552527, auc 0.837031484257871, sens 0.8565217391304348, spec 0.6700838769804287, th 0.2089560627937317
Epoch 4, avg train loss per image 0.30663807099146717, avg valid loss per image 0.3163128809707492, auc 0.8705052878965923, sens 0.8521739130434782, spec 0.6924510717614165, th 0.13388581573963165
Epoch 5, avg train loss per image 0.24161777803822948, avg valid loss per image 0.3129659590589387, auc 0.8814173994084039, sens 0.8521739130434782, spec 0.7325256290773532, th 0.17592306435108185
Epoch 6, avg train lo

In [17]:
4 + 4

8