In [1]:
from pathlib import Path
import pandas as pd
import os
from torch.utils.data import Dataset,DataLoader
from PIL import Image
from torchvision import transforms as T
import torch.nn as nn
import torch
import torch.nn.functional as F
from sklearn.model_selection import GroupKFold, KFold, StratifiedKFold
from sklearn.model_selection import train_test_split
import numpy as np
from fastprogress.fastprogress import master_bar, progress_bar
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score
from torchvision import models
import pdb
import albumentations as A
from albumentations.pytorch.transforms import ToTensor
import matplotlib.pyplot as plt
import pickle 

In [None]:
!pip install efficientnet_pytorch
!pip install densenet_pytorch
!pip install resnet_pytorch

In [3]:
from efficientnet_pytorch import EfficientNet
from densenet_pytorch import DenseNet 
from resnet_pytorch import ResNet 

In [4]:
# import os, glob, shutil
# for idx, file in enumerate(glob.glob('./MURA-v1.1/valid/XR_WRIST/patient*/*/*')):
#     splitted = file.split('/')
#     label = [0 if splitted[-2].split('_')[1]=="negative" else 1][0]
#     new_filename=str(label)+"_"+str(idx+17000)+".png"
#     shutil.copy(file, './muradatavalid/'+new_filename)

# import os, glob, shutil
# for idx, file in enumerate(glob.glob('./MURA-v1.1/train/XR_WRIST/patient*/*/*')):
#     splitted = file.split('/')
#     label = [0 if splitted[-2].split('_')[1]=="negative" else 1][0]
#     new_filename=str(label)+"_"+str(idx+17000)+".png"
#     shutil.copy(file, './Medathon_Mura/'+new_filename)

# Bütün resimler "Medathon_Mura" klasörüne atıldı
# "muradatavalid" klasörü, training sürecindeki validasyon resimlerini içeriyor
# "Medathon_Mura" klasörü hem MURA hem de Medathon verisetlerinin anormal ve normal resimlerinin tümünü içeriyor

In [5]:
training_folder = '/content/drive/MyDrive/Medathon_Mura'
valid_folder = '/content/drive/MyDrive/muradatavalid'

In [6]:
df = pd.DataFrame({"filename":os.listdir(training_folder)})
df["label"] = df["filename"].apply(lambda x: x[0])
df.to_csv('train.csv', index=False)

valid_df = pd.DataFrame({"filename":os.listdir(valid_folder)})
valid_df["label"] = valid_df["filename"].apply(lambda x: int(x[0]))
valid_df.to_csv('valid.csv', index=False)

In [7]:
loss_fn = F.binary_cross_entropy_with_logits
loss_name = "BCEWithLogits"
epochs=1
height=64
width=64
bs=64

In [8]:
class MedathonDataset(Dataset):
    def __init__(self,df,im_path,transforms=None,is_test=False):
        self.df = df
        self.im_path = im_path
        self.transforms = transforms
        self.is_test = is_test
        
    def __getitem__(self,idx):
        img_path = f"{self.im_path}/{self.df.iloc[idx]['filename']}"
        img = Image.open(img_path).convert("RGB")
        if self.transforms:
                img = self.transforms(image=np.array(img))["image"]
        if self.is_test:
            return img
        target = self.df.iloc[idx]['label']
        return img, torch.tensor([target],dtype=torch.float32)
    
    def __len__(self):
        return self.df.shape[0]

In [9]:
def get_train_val_split(df):
    train_df, valid_df = train_test_split(df, test_size=0.3)
    return train_df,valid_df

In [10]:
def get_augmentations(p=0.5, height, width):
    train_tfms = A.Compose([
        A.Resize(height,width),
        A.Cutout(num_holes=12, p=p),
        A.RandomRotate90(p=p),
        A.Flip(p=p),
        A.OneOf([
            A.RandomBrightnessContrast(brightness_limit=0.2,
                                       contrast_limit=0.2,
                                       ),
            A.HueSaturationValue(
                hue_shift_limit=20,
                sat_shift_limit=50,
                val_shift_limit=50)
        ], p=p),
        A.OneOf([
            A.IAAAdditiveGaussianNoise(),
            A.GaussNoise(),
        ], p=p),
        A.OneOf([
            A.MedianBlur(blur_limit=3, p=0.1),
            A.Blur(blur_limit=3, p=0.1),
        ], p=p),
        A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=p),
        A.OneOf([
            A.GridDistortion(p=0.1),
        ], p=p),
        ])
    
    test_tfms = A.Compose([
        A.Resize(height,width),
        ])
    return train_tfms, test_tfms

In [11]:
class MedathonEfficientNet(nn.Module):
    def __init__(self,model_name='efficientnet-b0',pool_type=F.adaptive_avg_pool2d):
        super().__init__()
        self.pool_type = pool_type
        self.backbone = EfficientNet.from_pretrained(model_name)
        in_features = getattr(self.backbone,'_fc').in_features
        self.classifier = nn.Linear(in_features,1)

    def forward(self,x):
        features = self.pool_type(self.backbone.extract_features(x),1)
        features = features.view(x.size(0),-1)
        return self.classifier(features)
    
class MedathonDenseNet(nn.Module):
    def __init__(self,model_name='densenet121',pool_type=F.adaptive_avg_pool2d):
        super().__init__()
        self.pool_type = pool_type
        self.backbone = DenseNet.from_pretrained(model_name)
        in_features = getattr(self.backbone,'classifier').in_features
        self.classifier = nn.Linear(in_features,1)

    def forward(self,x):
        features = self.pool_type(self.backbone.extract_features(x),1)
        features = features.view(x.size(0),-1)
        return self.classifier(features)
    
class MedathonResNet(nn.Module):
    def __init__(self,model_name='resnet18',pool_type=F.adaptive_avg_pool2d):
        super().__init__()
        self.pool_type = pool_type
        self.backbone = ResNet.from_pretrained(model_name)
        in_features = getattr(self.backbone,'fc').in_features
        self.classifier = nn.Linear(in_features,1)

    def forward(self,x):
        features = self.pool_type(self.backbone.extract_features(x),1)
        features = features.view(x.size(0),-1)
        return self.classifier(features)

In [12]:
def get_device():
    return torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

def get_model(model_name='efficientnet-b0',lr=1e-5,wd=0.01,freeze_backbone=False,opt_fn=torch.optim.AdamW,device=None):
    device = device if device else get_device()
    if model_name.startswith('dense'):
        model = MedathonDenseNet(model_name=model_name)
    elif model_name.startswith('eff'):
        model = MedathonEfficientNet(model_name=model_name)
    elif model_name.startswith('res'):
        model = MedathonResNet(model_name=model_name)
    if freeze_backbone:
        for parameter in model.backbone.parameters():
            parameter.requires_grad = False
    opt = opt_fn(model.parameters(),lr=lr,weight_decay=wd)
    model = model.to(device)
    return model, opt

def training_step(xb,yb,model,loss_fn,opt,device,scheduler):
    xb,yb = xb.to(device), yb.to(device)
    out = model(xb)
    opt.zero_grad()
    loss = loss_fn(out,yb)
    loss.backward()
    opt.step()
    scheduler.step()
    return loss.item()
    
def validation_step(xb,yb,model,loss_fn,device):
    xb,yb = xb.to(device), yb.to(device)
    out = model(xb)
    loss = loss_fn(out,yb)
    out = torch.sigmoid(out)
    # out = torch.softmax(out, dim=1)
    return loss.item(),out

def get_data(train_df,valid_df,train_tfms,test_tfms,bs):
    train_ds = MedathonDataset(df=train_df,im_path=training_folder,transforms=train_tfms)
    valid_ds = MedathonDataset(df=valid_df,im_path=valid_folder,transforms=test_tfms)
    train_dl = DataLoader(dataset=train_ds,batch_size=bs,shuffle=True,num_workers=2)
    valid_dl = DataLoader(dataset=valid_ds,batch_size=bs*2,shuffle=False,num_workers=2)
    return train_dl,valid_dl

In [13]:
def fit(epochs,model,train_dl,valid_dl,opt,device=None,loss_fn=loss_fn, fold=-1):
    device = device if device else get_device()
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt, len(train_dl)*epochs)
    val_rocs = [] 
    val_f1s = []
    val_accs=[]
    val_losses = []
    train_losses = []
    
    #Creating progress bar
    mb = master_bar(range(epochs))
    mb.write(['folds', 'epoch','train_loss','valid_loss','val_roc','val_F1','val_acc'],table=True)

    for epoch in mb:    
        trn_loss,val_loss = 0.0,0.0
        val_preds = np.zeros((len(valid_dl.dataset),1))
        val_targs = np.zeros((len(valid_dl.dataset),1))
        
        #Training
        model.train()
        
        #For every batch 
        for xb,yb in progress_bar(train_dl,parent=mb):
#             print(xb.permute(0,3,1,2))
            trn_loss += training_step(xb.permute(0,3,1,2).float(),yb,model,loss_fn,opt,device,scheduler) 
        trn_loss /= mb.child.total

        #Validation
        model.eval()
        with torch.no_grad():
            for i,(xb,yb) in enumerate(progress_bar(valid_dl,parent=mb)):
                loss,out = validation_step(xb.permute(0,3,1,2).float(),yb,model,loss_fn,device)
                val_loss += loss
                bs = xb.shape[0]
                val_preds[i*bs:i*bs+bs] = out.cpu().numpy()
                val_targs[i*bs:i*bs+bs] = yb.cpu().numpy()

        val_loss /= mb.child.total
        val_roc = roc_auc_score(val_targs.reshape(-1),val_preds.reshape(-1))
        val_f1 = f1_score(val_targs.reshape(-1),np.where(val_preds.reshape(-1)<0.5,0,1))
        val_acc = accuracy_score(val_targs.reshape(-1), np.where(val_preds.reshape(-1)<0.5,0,1))

        val_rocs.append(val_roc)
        val_accs.append(val_acc)
        val_f1s.append(val_f1)
        val_losses.append(val_loss)
        train_losses.append(trn_loss)
        

        mb.write([fold, epoch,f'{trn_loss:.6f}',f'{val_loss:.6f}',f'{val_roc:.6f}',f'{val_f1:.6f}',f'{val_acc:.6f}'],table=True)
    return model,val_rocs,val_accs, val_f1s, val_losses,train_losses

In [14]:
def create_records():
    records = pd.DataFrame()
    records["epoch"] = range(0,epochs)
    records["train_loss"] = train_losses
    records["val_loss"] = val_losses
    records["val_rocs"] = val_rocs
    records["val_accs"] = val_rocs
    records["val_f1s"] = val_rocs
    records["height"] = height
    records["width"] = width
    records["model"] = model_name
    records["loss_function"] = loss_name
    records["batch_size"] = bs
    records["fold"] = -1
    old_records = pd.read_csv('records1.csv')
    new_records = pd.concat([old_records, records])
    new_records.to_csv('records1.csv', index=False)
    return records

In [15]:
train_df = pd.read_csv('train.csv')
valid_df = pd.read_csv('valid.csv')

In [None]:

models = ['densenet121', 'efficientnet-b3', 'resnet18']
sizes = [224,256,512]
for i, model_name in enumerate(models):
    train_tfms,test_tfms = get_augmentations(p=0.5, height=sizes[i], width= sizes[i])
    train_dl,valid_dl = get_data(train_df,valid_df,train_tfms,test_tfms,bs)
    model, opt = get_model(model_name=model_name,lr=1e-4,wd=1e-4)
    model,val_rocs, val_accs, val_f1s, val_losses, train_losses, = fit(epochs,model,train_dl,valid_dl,opt, loss_fn=loss_fn)
    torch.save(model.state_dict(),"{}_{}_{}_{}_{}-{}_{}_{}.pth".format(model_name, loss_name, epochs, np.max(val_rocs), height, width, bs, -1))
    records = create_records()