In [1]:
!pip install torch torchvision -q

In [2]:
import os
from PIL import Image
import gc

import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, f1_score
import torch.nn.functional as F


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import sklearn
from sklearn.metrics import average_precision_score


import random
import warnings
from tqdm import tqdm
warnings.filterwarnings("ignore")

In [3]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
seed_everything()

### Multi-class probplem

### Resnet training

In [8]:
!nvidia-smi

Tue May 23 11:48:34 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.105.01   Driver Version: 515.105.01   CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:41:00.0 Off |                  N/A |
|  0%   20C    P8    14W / 370W |    895MiB / 24576MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  Off  | 00000000:61:00.0 Off |                  N/A |
|  0%   20C    P8    11W / 370W |   7135MiB / 24576MiB |      0%      Default |
|       

In [9]:
def clean():
    torch.cuda.empty_cache()
    gc.collect()

In [10]:
clean()

In [11]:
os.environ['CUDA_VISIABLE_DEVICES']='3'

In [12]:
device = torch.device('cuda')

### Efficientnet

In [135]:
from effnet.efficientnet import efficientnet

from utils.trainer import Trainer
from data.multi_class_build_data import build_dataloader

BATCH_SIZE=8

transfrom = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(0.5, 0.5)
])

train_dataloader, test_dataloader, trainset_len, testset_len, NUM_CLASS = build_dataloader(transfrom, BATCH_SIZE)

In [136]:
criteriation=nn.CrossEntropyLoss()

In [144]:
effnet = efficientnet()
effnet.classifier[1] = nn.Linear(1280, NUM_CLASS)

In [145]:
effnet = effnet.to(device)

In [146]:
optimizer = torch.optim.Adam(effnet.parameters(), lr=1e-4)

### Training 

In [147]:
NAME = 'effnet'

trainer = Trainer(effnet, 
                  criteriation,
                  device,
                  train_dataloader,
                  test_dataloader,
                  trainset_len,
                  testset_len,
                  optimizer,
                  epochs=2,
                  path_output='multi_class_output/{name}.pt'.format(name=NAME),
                  multi_label=False
                 )

In [148]:
trainer.training()

[1]/[2] Epoch starts
	 Batch train loss: 1.4173368215560913, accuracy 0.125
	 Batch train loss: 0.6500968933105469, accuracy 0.625
	 Batch train loss: 0.6747581362724304, accuracy 0.875
	 Batch train loss: 1.0785253047943115, accuracy 0.5
	 Batch train loss: 0.22702595591545105, accuracy 0.875
[1]/[2] End epoch: train loss: 0.693294952304126, val loss: 0.3635874504292515
	 Epoch train accuracy: 0.6995078921318054, val accuracy: 0.8468033775633294

[2]/[2] Epoch starts
	 Batch train loss: 0.24580885469913483, accuracy 0.875
	 Batch train loss: 0.37618398666381836, accuracy 0.875
	 Batch train loss: 0.242514967918396, accuracy 0.875
	 Batch train loss: 0.17320388555526733, accuracy 0.875
	 Batch train loss: 0.1986524909734726, accuracy 0.875
[2]/[2] End epoch: train loss: 0.2528687315517201, val loss: 0.3794699323693939
	 Epoch train accuracy: 0.908406138420105, val accuracy: 0.8584640128669079



### Evaluate model

In [149]:
effnet.load_state_dict(torch.load(f'multi_class_output/effnet.pt')['model_state_dict'])
effnet = effnet.to(device)

In [150]:
trainer = Trainer(effnet, 
                  criteriation,
                  device,
                  train_dataloader,
                  test_dataloader,
                  trainset_len,
                  testset_len,
                  path_output='multi_class_output/{name}.pt'.format(name=NAME),
                  multi_label=False
                 )

In [151]:
_, _, preds = trainer.val()

In [152]:
prediciton = pd.DataFrame([preds[0].cpu().numpy(), preds[1].cpu().numpy()]).T
prediciton.rename = ['labels', 'predictions']

prediciton.to_csv('multi_class_output/{name}.csv'.format(name=NAME), index=False)

### Multi-label problem

In [153]:
data = pd.read_csv('sample_labels.csv')
data['lables'] = data['Finding Labels'].str.split('|')

labels = []
for lable in data['lables'].values:
    labels.extend(lable)   
    
labels = pd.DataFrame(labels, columns=['labels'])

### Losses

In [154]:
weights = 1 / (labels.value_counts()/ labels.shape[0])
weights = torch.tensor(weights.reset_index().sort_values(by='labels')[0].values)

class WeightedMultilabel(nn.Module):  
    def __init__(self, weights: torch.Tensor):  
        super(WeightedMultilabel, self).__init__()  
        self.cerition = nn.BCEWithLogitsLoss(reduction='none')  
        self.weights = weights  
  
    def forward(self, outputs, targets):  
        loss = self.cerition(outputs, targets)  
        return (loss * self.weights).mean()  

weights = weights.to(device)

criteriation = WeightedMultilabel(weights)

In [156]:
from utils.trainer import Trainer
from data.multi_label_build_data import build_dataloader
from effnet.blocks import SamePadConv2d


BATCH_SIZE=4

transfrom = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomRotation(30),
    transforms.ColorJitter(0.5),
    transforms.ToTensor(),
    transforms.Normalize(0.5, 0.5)
])

train_dataloader, test_dataloader, trainset_len, testset_len, NUM_CLASS = build_dataloader(transfrom, BATCH_SIZE)

In [158]:
del effnet
effnet = efficientnet()
effnet.classifier[1] = nn.Linear(1280, NUM_CLASS)

In [173]:
effnet.features[0][0] = SamePadConv2d(1, 32, 3, 2)

In [174]:
effnet = effnet.to(device)

In [175]:
optimizer = torch.optim.Adam(effnet.parameters(), lr=1e-4)

In [176]:
NAME = 'effnet'

trainer = Trainer(effnet , 
                  criteriation,
                  device,
                  train_dataloader,
                  test_dataloader,
                  trainset_len,
                  testset_len,
                  optimizer,
                  epochs=5,
                  path_output='multi_label_output/{name}.pt'.format(name=NAME),
                  multi_label=True
                 )

In [177]:
trainer.training()

[1]/[5] Epoch starts
	 Batch train loss: 69.15490364463922, accuracy 0.1598151848151848
	 Batch train loss: 50.426439402602576, accuracy 0.8333333333333333
	 Batch train loss: 49.835138517160864, accuracy 0.7083333333333334
	 Batch train loss: 49.70812711591691, accuracy 0.5834821428571428
	 Batch train loss: 49.66802338512578, accuracy 0.6129629629629629
[1]/[5] End epoch: train loss: 50.88828334548714, val loss: 49.64276266034121
	 Epoch train accuracy: 0.6645671908974975, val accuracy: 0.672954740026551

[2]/[5] Epoch starts
	 Batch train loss: 49.643475355910496, accuracy 0.6674603174603175
	 Batch train loss: 49.62960854456948, accuracy 0.6375
	 Batch train loss: 49.605470529265446, accuracy 1.0
	 Batch train loss: 49.60904559464066, accuracy 0.7666666666666666
	 Batch train loss: 49.604250253061586, accuracy 0.5736111111111112
[2]/[5] End epoch: train loss: 49.62056342002089, val loss: 49.605529587336456
	 Epoch train accuracy: 0.6804248909457943, val accuracy: 0.6749370505101995

### Evaluate model

In [179]:
effnet.load_state_dict(torch.load(f'multi_label_output/effnet.pt')['model_state_dict'])
effnet = effnet.to(device)

In [180]:
trainer = Trainer(effnet , 
                  criteriation,
                  device,
                  train_dataloader,
                  test_dataloader,
                  trainset_len,
                  testset_len,
                  path_output='multi_label_output/{name}.pt'.format(name=NAME),
                  multi_label=True
                 )

In [181]:
_, _, preds = trainer.val()

In [182]:
pd.DataFrame(preds[0].cpu().numpy()).to_csv('multi_label_output/true_{name}.csv'.format(name=NAME), index=False)
pd.DataFrame(preds[1].cpu().numpy()).to_csv('multi_label_output/pred_{name}.csv'.format(name=NAME), index=False)