<a href="https://www.kaggle.com/code/rimzakhama/rsna-training-v1?scriptVersionId=143880793" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# !pip install timm -q

In [None]:
import os
import time
import numpy as np
import pandas as pd

import gc

# image manipulation
import cv2
import PIL
from PIL import Image

# visualisation
import matplotlib.pyplot as plt
import seaborn as sns

# helpers
from tqdm import tqdm
import time
import copy
import gc
from enum import Enum
from sklearn import model_selection
from sklearn.metrics import roc_auc_score, precision_recall_curve

import timm


# for cnn
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, AdamW, SGD
from torch.autograd import Variable
from torch.utils.data import DataLoader, random_split, TensorDataset, Dataset, WeightedRandomSampler
from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR
from torchvision import models
from torchmetrics.classification import BinaryF1Score, BinaryPrecision, BinaryRecall, BinaryAccuracy, BinaryROC, BinaryAUROC
from torchvision import transforms
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
import albumentations as A

In [None]:
def set_seed(seed = 42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    print('> SEEDING DONE')
    
set_seed(42)

In [None]:
class Config :
    epochs_warmup = 0
    epochs = 5
    num_cycles = 0.5
    

In [None]:

class Dataset:
    def __init__(self, df, transform):
        self.df = df.copy()
        self.transform = transform
     
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        
        patient_id = self.df.loc[idx, 'patient_id']
        image_id = self.df.loc[idx, 'image_id']
        # Target
        target = self.df.loc[idx, 'cancer'] 
        
        # Get and preprocess images
        #dcm_path = '/kaggle/input/rsna-breast-cancer-detection/train_images/'
        #image = dicom.dcmread(image_path)
        
        #png_path = '/kaggle/input/rsna-png-images-same-format-as-original/output/rsna_pngs/train_images'
        #png_path = '/kaggle/input/png-cutted-images/output/png_cutted_images'
        png_path = '/kaggle/input/preprocessed-images-rsna/output/preprocessed_images_RSNA'
        
        # Image path
        #image_dcm_path =  os.path.join(png_path, patient_id.astype(str), image_id.astype(str)+'.dcm')
        image_png_path =  os.path.join(png_path, patient_id.astype(str), image_id.astype(str)+'.png')

        # convert image to RGB
        image = Image.open(image_png_path).convert('RGB')
        
        # Check if image is RGB 
        #print('image shape = ', np.array(image).shape)
        
        image = image.resize((1024,912))
        
        
        # Apply transformers on images
        #if (target == 1) and self.transform:
        if self.transform:    
            image = self.transform(image).to(torch.float32)
        else :
            default_transform = transforms.Compose([transforms.ToTensor()])
            image = default_transform(image).to(torch.float32)
            #image = image.to(torch.float32)
            
                
        
        # Convert to tensors
        #image = torch.tensor(image, dtype=torch.float32)
        
        target = torch.tensor(target, dtype=torch.float) # long
        
        return image, target

In [None]:
# Efficientnetv2_s
def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1.0 / p)


class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6, p_trainable=False):
        super(GeM, self).__init__()
        if p_trainable:
            self.p = Parameter(torch.ones(1) * p)
        else:
            self.p = p
        self.eps = eps

    def forward(self, x):
        ret = gem(x, p=self.p, eps=self.eps)
        return ret

    def __repr__(self):
        return (
            self.__class__.__name__
            + "("
            + "p="
            + "{:.4f}".format(self.p.data.tolist()[0])
            + ", "
            + "eps="
            + str(self.eps)
            + ")"
        )
    
class Efficientnetv2_s(nn.Module):
    def __init__(self, p=3, p_trainable=False, eps=1e-6):
        super(Efficientnetv2_s, self).__init__()
        
        # tf_efficientnetv2_s
        self.efficientnetv2_s = timm.create_model('tf_efficientnetv2_s', pretrained=True, in_chans=3)
        model = self.efficientnetv2_s
        clsf = model.default_cfg['classifier']
        n_features = model._modules[clsf].in_features
        model._modules[clsf] = nn.Identity()
        self.fc = nn.Linear(n_features, 1) # cancer
        self.pool = nn.Sequential(
            GeM(p=p, eps=eps, p_trainable=p_trainable),
            nn.Flatten())
    
    # if tf_efficientnetv2_s
    def forward(self, x):
        x = self.efficientnetv2_s(x) 
        #x = self.efficientnetv2_s.forward_features(x)
        #x = self.pool(x)
        logits = self.fc(x)
        return logits   
    
    

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        #self.network = models.resnet18(pretrained=True)
        #n_features = self.network.fc.out_features
        
        self.network = timm.create_model('resnet18', pretrained=True, in_chans=3)
        n_features = self.network.fc.out_features
        
        # add additional layer that maps 2048 extracted features from resnet to 1 feature 
        #determining the class
        self.classifier_layer = nn.Sequential(
            nn.Linear(n_features , 256),
            nn.Dropout(0.5),
            nn.Linear(256 , 1)
        )
    
    def forward(self, xb):        
        xb = self.network(xb)
        xb = self.classifier_layer(xb)
        return torch.sigmoid(xb)

In [None]:
# Only resnet
class CNN_RESNET(nn.Module):
    def __init__(self):
        super(CNN_RESNET, self).__init__()
        #self.network = models.resnet18(pretrained=True)
        #n_features = self.network.fc.out_features
        
        self.resnet = timm.create_model('resnet18', pretrained=True, in_chans=3) #resnet18
        n_features = self.resnet.fc.out_features
        
        # add additional layer that maps 2048 extracted features from resnet to 1 feature 
        #determining the class
        self.classifier_layer = nn.Sequential(
            nn.Linear(n_features , 256),
            nn.Dropout(0.8), #0.3
            nn.Linear(256 , 1)
        )
    
    def forward(self, xb):        
        xb = self.resnet(xb)
        xb = self.classifier_layer(xb)
        return xb
        #return torch.sigmoid(xb)
    

In [None]:
def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1.0 / p)

class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6, p_trainable=False):
        super(GeM, self).__init__()
        if p_trainable:
            self.p = Parameter(torch.ones(1) * p)
        else:
            self.p = p
        self.eps = eps

    def forward(self, x):
        ret = gem(x, p=self.p, eps=self.eps)
        return ret

    def __repr__(self):
        return (
            self.__class__.__name__
            + "("
            + "p="
            + "{:.4f}".format(self.p.data.tolist()[0])
            + ", "
            + "eps="
            + str(self.eps)
            + ")"
        )
    
class MammoModel(nn.Module):
    def __init__(self, name, *, pretrained=False, in_chans=3, p=3, p_trainable=False, eps=1e-6):
        super().__init__()
        model = timm.create_model(name, pretrained=pretrained, in_chans=in_chans)
        clsf = model.default_cfg['classifier']
        n_features = model._modules[clsf].in_features
        model._modules[clsf] = nn.Identity()
        
        self.fc = nn.Linear(n_features, 1) # cancer
        self.model = model

        self.pool = nn.Sequential(
            GeM(p=p, eps=eps, p_trainable=p_trainable),
            nn.Flatten())
    
    def forward(self, x):
        x = self.model(x)
        x = self.model.forward_features(x)
        x = self.pool(x)
        logits = self.fc(x)
        return logits

In [None]:
''' Model (we need to improve our model''' 
class BreastCancerModel(nn.Module):
    def __init__(self, Config):
        super().__init__()
        # efficientnet
        self.efficientnet = timm.create_model('efficientnet_b4', pretrained=True,
                                             in_chans=3)
        in_features = self.efficientnet.classifier.in_features
        self.efficientnet.classifier = nn.Linear(in_features, Config.NUM_CLASSES)  
        
        
        # Resnet
        self.resnet = timm.create_model('resnet50', pretrained=True, in_chans=3)
        self.resnet.fc = nn.Linear(2048, Config.NUM_CLASSES)
    
        
    def forward(self, image):
        output = self.efficientnet(image)
        #output = self.resnet(image)
    
        return output
    

In [None]:
def BCELoss_class_weighted(weights):
    """
    weights[0] is weight for class 0 (negative class)
    weights[1] is weight for class 1 (positive class)
    """
    def loss(y_pred, target):
        y_pred = torch.clamp(y_pred,min=1e-7,max=1-1e-7) # for numerical stability
        bce = - weights[1] * target * torch.log(y_pred) - (1 - target) * weights[0] * torch.log(1 - y_pred)
        return torch.mean(bce)

    return loss

In [None]:
# create class for earlystopping
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_loss = np.inf

    def early_stop(self, loss):
        if loss <= self.min_loss:
            self.min_loss = loss
            self.counter = 0
        elif loss > (self.min_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [None]:
def find_optimal_threshold(fpr, tpr, thresholds): # based on ROC curve
    J = tpr - fpr
    index = np.argmax(J)
    OptThreshold = thresholds[index]
    return OptThreshold
    

In [None]:
#### '''Training function'''
def train(data_loader, model, optimizer, device, criterion, scheduler=None):
    # Set model in training mode
    model.train()
    scaler = torch.cuda.amp.GradScaler(enabled=True)
    
    # Metrics
    metricf1 = BinaryF1Score()
    precision = BinaryPrecision()
    recall = BinaryRecall()
    accuracy = BinaryAccuracy()
    roc = BinaryROC()
    auc = BinaryAUROC()
    
    train_metrics = {'loss' : [], 'acc' : [], 'f1': [], 'precision': [], 'recall': [], 'auc': []}
    
    # Initial threshold
    threshold = 0.5
        

    # DataLoader
    tk0 = tqdm(data_loader, total=len(data_loader))
    
    running_loss = 0
    correct = 0
    total = 0
    all_outputs = torch.Tensor([])
    all_labels = torch.Tensor([]) # needed to calculate auc score ??
   
    for batch_idx, data in enumerate(tk0):
        images = data[0]
        targets = data[1]
        
        targets = torch.unsqueeze(targets, 1)
        
        images = images.to(device).float() 
        targets = targets.to(device).float() # long
        
        #print('targets shape = ', targets.shape)
        
        optimizer.zero_grad()
        #with torch.set_grad_enabled(True):
        with torch.cuda.amp.autocast(enabled=True):    
        
            outputs = model(images)#.squeeze()
            #print('outputs shape = ', outputs.shape)

        loss = criterion(outputs, targets)

        #loss.backward()
        #optimizer.step()

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # all outputs
        all_outputs = torch.cat((all_outputs, outputs.to('cpu')))

        # all labels
        all_labels = torch.cat((all_labels, targets.to('cpu')))


        # Correct classifications
        probs = outputs.sigmoid() # we don't need sigmoid if we have it in the 
        # model class 
        predicted_vals = probs > threshold
        correct += torch.sum(predicted_vals == targets.data)


        # Total classifications
        total += targets.size(0)

        #auc score
        #auc_score = roc_auc_score(all_labels.detach().numpy(), all_outputs .detach().numpy())
        
        # running loss    
        running_loss += loss.item()

        # collect any unused memmory
        #gc.collect()
        #torch.cuda.empty_cache()
        
        del outputs
        torch.cuda.empty_cache()

    #Accuracy
    accu = correct/total
    
    # Average training loss (epoch loss)
    train_loss = running_loss / len(data_loader)
    
    #auc score
    auc_score = roc_auc_score(all_labels.detach().numpy(), all_outputs.detach().numpy())
    
    
    # ROC
    print('all_labels type', all_labels.dtype)
    fpr, tpr, thresholds = roc(all_outputs, all_labels.long())
    
    
    # Find optimal threshold
    OptThreshold = find_optimal_threshold(fpr, tpr, thresholds)
    
    
    print(f'New threshold is {OptThreshold}')
    
    # Calculate metrics using optimized threshold
    # f1 score
    f1_measure = metricf1(all_outputs > OptThreshold, all_labels)

    

In [None]:
''' Training function by work of the 7th place'''

def train(data_loader, model, optimizer, device, criterion, scheduler=None):
    # Set model in training mode
    model.train()
    scaler = torch.cuda.amp.GradScaler(enabled=True)
    
    # Metrics
    metricf1 = BinaryF1Score()
    precision = BinaryPrecision()
    recall = BinaryRecall()
    accuracy = BinaryAccuracy()
    roc = BinaryROC()
    auc = BinaryAUROC()
    
    train_metrics = {'loss' : [], 'acc' : [], 'f1': [], 'precision': [], 'recall': [], 'auc': []}
    
    # Initial threshold
    threshold = 0.5
        

    # DataLoader
    tk0 = tqdm(data_loader, total=len(data_loader))
    
    running_loss = 0
    correct = 0
    total = 0
    all_outputs = torch.Tensor([])
    all_labels = torch.Tensor([]) # needed to calculate auc score ??
   
    for batch_idx, data in enumerate(tk0):
        images = data[0]
        targets = data[1]
        
        targets = torch.unsqueeze(targets, 1)
        
        images = images.to(device).float() 
        targets = targets.to(device).float() # long
        
        #print('targets shape = ', targets.shape)
        
        optimizer.zero_grad()
        
        #with torch.set_grad_enabled(True):
        with torch.cuda.amp.autocast(enabled=True):    
        
            outputs = model(images)#.squeeze()
            #print('outputs shape = ', outputs.shape)

        loss = criterion(outputs, targets)

        #loss.backward()
        #optimizer.step()

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # all outputs
        all_outputs = torch.cat((all_outputs, outputs.to('cpu')))

        # all labels
        all_labels = torch.cat((all_labels, targets.to('cpu')))


        # Correct classifications
        probs = outputs.sigmoid() 
        
        predicted_vals = probs > threshold
        correct += torch.sum(predicted_vals == targets.data)


        # Total classifications
        total += targets.size(0)

        #auc score
        #auc_score = roc_auc_score(all_labels.detach().numpy(), all_outputs .detach().numpy())
        
        # running loss    
        running_loss += loss.item()

        # collect any unused memmory
        #gc.collect()
        #torch.cuda.empty_cache()

    #Accuracy
    accu = correct/total
    
    # Average training loss (epoch loss)
    train_loss = running_loss / len(data_loader)
    
    #auc score
    auc_score = roc_auc_score(all_labels.detach().numpy(), all_outputs.detach().numpy())

    
    # Calculate metrics using optimized threshold
    # f1 score
    f1_measure = metricf1(all_outputs > threshold, all_labels)
   
    print('Train Loss: %.3f | Accuracy: %.3f'%(train_loss,accu))
    print('auc_score : %.3f'%(auc_score))
    print('f1_measure : %.3f'%(f1_measure))
  
    
   


In [None]:
'''Evaluation function'''

def evaluation(data_loader, model, device, criterion):
    model.eval()
    
    # Metrics
    metricf1 = BinaryF1Score()
    precision = BinaryPrecision()
    recall = BinaryRecall()
    accuracy = BinaryAccuracy()
    roc = BinaryROC()
    auc = BinaryAUROC()
    
    
    predictions = []
    tar = []
    running_loss = 0
    correct = 0
    total = 0
    
    threshold = 0.5
    
    all_outputs = torch.Tensor([])
    all_labels = torch.Tensor([]) # needed to calculate auc score ??
    
    for batch_idx, data in enumerate(data_loader):
            images = data[0]
            targets = data[1]
            
            #targets = torch.unsqueeze(targets.to(torch.float), 1) #float32
            targets = torch.unsqueeze(targets, 1)
        
            images = images.to(device, dtype=torch.float) 
            targets = targets.to(device, dtype=torch.float) # long ??
       
            with torch.no_grad():
                outputs = model(images)#.squeeze()
            loss = criterion(outputs, targets) 
                
            # all outputs
            all_outputs = torch.cat((all_outputs, outputs.to('cpu')))
            
            # all labels
            all_labels = torch.cat((all_labels, targets.to('cpu')))    
            
            running_loss += loss.item()
            
            # Correct classifications (for accuracy calculation)
            probs = outputs.sigmoid()
    
            predicted_vals = probs > threshold
            correct += torch.sum(predicted_vals == targets.data)
            # Total classifications
            total += targets.size(0)
            
    
            # Predictions
            #predictions.append(predicted_vals.cpu().numpy())
            predictions.append(predicted_vals.cpu().numpy())
            
            # Targets
            tar.append(targets.cpu().numpy())
            
            # collect any unused memmory
            #gc.collect()
            #torch.cuda.empty_cache()
            
            del outputs
            torch.cuda.empty_cache()
     
    # Loss 
    eval_loss = running_loss / len(data_loader)
    # Accuracy
    eval_accu = correct/total
    print('Eval Loss: %.3f | Accuracy: %.3f'%(eval_loss,eval_accu))
    
    #auc score
    eval_auc_score = roc_auc_score(all_labels.detach().numpy(), all_outputs .detach().numpy())
    
    # ROC
    print('type of labels', all_labels.dtype)
    fpr, tpr, thresholds = roc(all_outputs, all_labels.long())
    
    # Find optimal threshold
    OptThreshold = find_optimal_threshold(fpr, tpr, thresholds)
    
    # Calculate metrics using optimized threshold
    # f1 score
    eval_f1_measure = metricf1(all_outputs > OptThreshold, all_labels)
    
    
    # Predictions
    predictions = np.concatenate(predictions) # this line convert the list 
      # of lists into a 1d array.
        
    # Targets    
    tar = np.concatenate(tar)
    
    print('Eval Loss: %.3f | Eval Accuracy: %.3f'%(eval_loss,eval_accu))
    print('Eval auc_score : %.3f'%(eval_auc_score))
    print('Eval_f1_measure : %.3f'%(eval_f1_measure))

    return predictions, tar, OptThreshold     
    

In [None]:
''' Probabilistic F1 score'''
def pfbeta(labels, predictions, beta):
    y_true_count = 0
    ctp = 0
    cfp = 0
    
    
    for idx in range(len(labels)):  
        #print('predictions[idx] = ', predictions[idx])
        prediction = min(max(predictions[idx], 0), 1)
        #print('prediction = ', prediction)
        #print('labels[idx] = ', labels[idx])
        if (labels[idx]):
            y_true_count += 1
            ctp += prediction
            #print('ctp = ', ctp)
        else:
            cfp += prediction
            #print('cfp = ', cfp)

    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0

In [None]:
train_csv = '/kaggle/input/rsna-breast-cancer-detection/train.csv'
dfx = pd.read_csv(train_csv)

'''
augmentator = transforms.Compose([
    # input for augmentator is always PIL image
    # transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomVerticalFlip(0.5),
    transforms.RandomRotation(5),
    transforms.ToTensor(), # return it as a tensor and transforms it to [0, 1]
])
'''
transform = transforms.Compose([
    transforms.ToTensor()])

'''
augmentator = transforms.Compose([
    # input for augmentator is always PIL image
    # transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomAffine(degrees=(0, 180), scale=(0.8, 1.2)),
    transforms.ElasticTransform(),
    transforms.ToTensor(), # return it as a tensor and transforms it to [0, 1]
])

'''

augmentator = transforms.Compose([
    # input for augmentator is always PIL image
    # transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomVerticalFlip(0.5),
    #transforms.RandomPerspective(),
    #transforms.RandomRotation((0, 90)),
    #transforms.RandomAutocontrast(),
    #transforms.RandomAffine(degrees=(0, 180), scale=(0.8, 1.2)),
    #transforms.ElasticTransform(),
    transforms.ToTensor(), # return it as a tensor and transforms it to [0, 1]
    transforms.Normalize(mean = [0.1338, 0.1338, 0.1338],
                         std = [0.2068, 0.2068, 0.2068])    
])

valid_augmentator = transforms.Compose([
    # input for augmentator is always PIL image
    # transforms.ToPILImage(),
    #transforms.RandomHorizontalFlip(0.5),
    #transforms.RandomVerticalFlip(0.5),
    #transforms.RandomPerspective(),
    #transforms.RandomRotation((0, 90)),
    #transforms.RandomAutocontrast(),
    #transforms.RandomAffine(degrees=(0, 180), scale=(0.8, 1.2)),
    #transforms.ElasticTransform(),
    transforms.ToTensor(), # return it as a tensor and transforms it to [0, 1]
    transforms.Normalize(mean = [0.1338, 0.1338, 0.1338],
                         std = [0.2068, 0.2068, 0.2068])    
])

In [None]:
batch_size = 8  #32, 16

In [None]:
''' Split data before oversampling using train_test_split from sklearn'''
df_train, df_valid = model_selection.train_test_split(dfx, test_size=0.15, stratify=dfx['cancer'], random_state=42)
df_train = df_train.reset_index(drop=True)
df_valid = df_valid.reset_index(drop=True)

# Instantiate Dataset with training data
train_dataset = Dataset(df_train, transform=augmentator)

# Instantiate Dataloader with training dataset
train_data_loader = torch.utils.data.DataLoader(train_dataset, 
                                                batch_size=batch_size, 
                                                num_workers=2,
                                               drop_last=True)

# Instantiate Dataset with validation data
valid_data =  Dataset(df_valid, transform=valid_augmentator)


# Instantiate Dataloader with valiation dataset
valid_data_loader = torch.utils.data.DataLoader(valid_data, 
                                                batch_size=batch_size,
                                                num_workers=2, 
                                                drop_last=True)
# The advantage of train_test_split is that, the splitting of data into train and validation datasets is 
# done before oversampling data. In this notebook, we use augmentations to deal with imbalanced data, so
# we are oversampling our data.This operation should be applied only on the train dataset. Otherwise, 
# the validation dataset will de very similar to the train data and we will obtain an unrealistic 
# measure of performance.

In [None]:
''' Split data after oversampling using random_split'''
'''
dataset = Dataset(dfx, transform=augmentator)
val_pct = 0.1
val_size = int(val_pct * len(dataset))
train_size = len(dataset) - val_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])



# Applying sampler just to train dataset, not for validation, since the validation dataset 
# should be an imitation of real Dataset

#train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle = False, num_workers = 2, 
#                              pin_memory = True, sampler = weighted_random_sampler) # using our sampler

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle = True, num_workers = 2, 
                              pin_memory = True) # random sampler

val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle = False, pin_memory = True)
# If you use shuffle = True the DataLoader will initialize a RandomSampler for you, 
# otherwise it’ll use SequentialSampler.

dataloaders = {'train' : train_dataloader, 'val' : val_dataloader}
dataset_sizes = {'train': train_size, 'val' : val_size}
    
'''


In [None]:
def pfbeta_binarized(labels, predictions):
    positives = predictions[labels == 1]
    scores = []
    for th in positives:
        binarized = (predictions >= th).astype('int')
        score = pfbeta(labels, binarized, 1)
        scores.append(score)
    return np.max(scores)

In [None]:
# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the model
#model = MammoModel('tf_efficientnetv2_s', pretrained=True) #efficientnet_b5
#model = CNN()
model = Efficientnetv2_s()
#model = BreastCancerModel(Config=Config)
model.to(device)

# Define the criterion (loss)
#w_pos = 3
#w_neg = 1
#criterion = BCELoss_class_weighted(weights = [w_neg, w_pos])

# or
criterion = nn.BCEWithLogitsLoss()

# Define the optimizer
#optimizer = AdamW(model.parameters(), lr=2.56e-05)
optimizer = Adam(model.parameters(), lr=2.56e-05)


# Define the metric
metric = BinaryF1Score().to(device)


earlystoper = EarlyStopper(patience = 3)

# Scheduler
#scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=Config.epochs_warmup, num_training_steps=Config.epochs, 
        num_cycles=Config.num_cycles
            )


# Model path
#model_path = f"model_{fold}.bin"
model_path = "model.bin"
    
# Run
best_score = 0
for epoch in range(Config.epochs): 
    train(train_data_loader, model, optimizer, device, criterion, scheduler)
    scheduler.step()
    predictions, targets, OptThreshold = evaluation(valid_data_loader, model, device, criterion)

    print('epoch = ', epoch)

    score = pfbeta(targets, predictions, beta=1)
    #score = pfbeta_binarized(targets, predictions)
    
    #if F1_score > best_score:
    if score > best_score:
        #best_score = F1_score
        best_score = score
        
        PATH = "model.pt"
        
        checkpoint = torch.save({
            'model_state_dict': model.state_dict(),
            'threshold' : OptThreshold
            }, PATH)
            
        
    #print('f1_score = ', best_score)
    print('score = ', best_score) 

print('best_score_ever = ', best_score) 

torch.cuda.empty_cache()
gc.collect()


In [None]:
# In this version, we used only augmentations. 
# In this notebook, we use train_test_split from sklearn in order to split data before oversampling the
# train dataset.
# In this notebook, I use preprocessed_images_rnsa dataset created by the notebook 
# preprocess_images_RSNA.

In [None]:
# In this version, I have added normalization to the augmentations part. 
# In this version we used binarized f1 score.
# In this version, we apply augmentations without condition target =1.

# In this version, we use tf_efficientnetv2_s model, without pooling_layer.

# In this version, we have a training and an evaluation functions, by moving threshold. In this case 
# we should use binarized score function.
# => score =  0.19 in the fisrt epoch.


# To resume, in this version, we have efficient-net without pooling layer, our training and evaluation 
# functions, with threshold and pfbeta.
# With 3 epochs, we obtained a score=0.16.

