Connecting to Google Drive 

In [1]:
def connect_to_drive():
  from google.colab import drive
  drive.mount('/content/gdrive', force_remount=True)

!pip install import_ipynb 
!pip install torchmetrics
import import_ipynb 
connect_to_drive()
%cd '/content/gdrive/My Drive/Bioinformatics2'

Mounted at /content/gdrive
/content/gdrive/My Drive/Bioinformatics2


Importing modules

In [3]:
import torch
from utils import *
install_requirements()
from VascularClassificationDataset import *
import albumentations as A
from torchvision import transforms
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.augmentations.transforms import *
import segmentation_models_pytorch as smp
import wandb
from torchmetrics.functional import iou as iou_torch
from tqdm import tqdm
from torch.optim import SGD, Adam
from torch.optim.lr_scheduler import StepLR
from torchvision.ops.focal_loss import sigmoid_focal_loss
from segmentation_models_pytorch.losses import FocalLoss

importing Jupyter notebook from VascularClassificationDataset.ipynb


Retrieve images paths from txt

In [4]:
input_path_training='/content/gdrive/.shortcut-targets-by-id/1mtNVywqwmtCBefeGQrY2c6F6xBQQWq30/vascular_segmentation/Train'
input_path_test='/content/gdrive/.shortcut-targets-by-id/1mtNVywqwmtCBefeGQrY2c6F6xBQQWq30/vascular_segmentation/Test'

save_model_folder='/content/gdrive/MyDrive/Bioinformatics2/saved_models/'

txt_with_training_paths_inputs='/content/gdrive/MyDrive/Bioinformatics2/dataset_paths/training_paths_inputs.txt'
txt_with_training_paths_targets='/content/gdrive/MyDrive/Bioinformatics2/dataset_paths/training_paths_targets.txt'
txt_with_validation_paths_inputs='/content/gdrive/MyDrive/Bioinformatics2/dataset_paths/validation_paths_inputs.txt'
txt_with_validation_paths_targets='/content/gdrive/MyDrive/Bioinformatics2/dataset_paths/validation_paths_targets.txt'
txt_with_test_paths_inputs='/content/gdrive/MyDrive/Bioinformatics2/dataset_paths/test_paths_inputs.txt'
txt_with_test_paths_targets='/content/gdrive/MyDrive/Bioinformatics2/dataset_paths/test_paths_targets.txt'

imgs_training_input_split=read_list_images_path_from_txt(txt_with_training_paths_inputs)
imgs_training_target_split=read_list_images_path_from_txt(txt_with_training_paths_targets)
imgs_validation_input_split=read_list_images_path_from_txt(txt_with_validation_paths_inputs)
imgs_validation_target_split=read_list_images_path_from_txt(txt_with_validation_paths_targets)
imgs_test_input=read_list_images_path_from_txt(txt_with_test_paths_inputs)
imgs_test_target=read_list_images_path_from_txt(txt_with_test_paths_targets)

#Retrieving images from saved_predictions folder
import os
root_path='/content/gdrive/MyDrive/Bioinformatics2/saved_predictions/'

training_path=root_path+'training/'
validation_path=root_path+'validation/'
test_path=root_path+'test/'

saved_predictions_training = [training_path+path for path in os.listdir(training_path)]
saved_predictions_validation=[validation_path+path for path in os.listdir(validation_path)]
saved_predictions_test=[test_path+path for path in os.listdir(test_path)]

Defining transformations

In [5]:
import albumentations as A
from albumentations import *
affine_transform=A.Compose([Flip(), ShiftScaleRotate()])
elastic_transform=A.Compose([ElasticTransform()])
pixel_wise_transform=A.Compose([GaussNoise(), HueSaturationValue(), RandomBrightnessContrast(), RandomGamma()])
affine_elastic_pixelwise_transform=A.Compose([Flip(), ShiftScaleRotate(), ElasticTransform(), GaussNoise(), HueSaturationValue(), RandomBrightnessContrast(), RandomGamma()])
affine_elastic=A.Compose([Flip(), ShiftScaleRotate(), ElasticTransform()])
slice_length=1000
resize_transform=A.Compose([RandomCrop(slice_length, slice_length)])

In [6]:
import numpy as np
from torch.utils.data import Dataset
from pathlib import Path
import cv2
from PIL import Image
from torchvision.transforms import *

ccRCC=0
pRCC=1

class VascularClassificationDataset(Dataset):
    """
    Dataset class for the vascular dataset:
    """
    def __init__(self, list_images_input, mean_normalization=None, std_normalization=None, size=224, crop=None,
                 transform=None, return_names=False):
        super(VascularClassificationDataset, self).__init__()
        self.images_input = list_images_input
        self.mean_normalization=mean_normalization
        self.std_normalization=std_normalization
        self.totensor=ToTensor()
        self.normalize=Normalize(self.mean_normalization, self.std_normalization)
        self.crop=crop
        self.resize=Compose([Resize(size)])
        self.transform=transform
        self.return_names=return_names
        
    def __len__(self):
        return len(self.images_input)

    def __getitem__(self, index):

      img_path = str(self.images_input[index])
      img = Image.open(img_path).convert('RGB')

      if 'ccRCC' in img_path:
        label=ccRCC
      elif 'pRCC' in img_path:
        label=pRCC

      #Resize
      if self.crop:
        cropped = self.crop(image=np.array(img))          
        img = cropped['image']
        
      img=self.resize(img)
      
      if self.transform:
          transformed = self.transform(image=np.array(img))          
          img = transformed['image']

      img=self.totensor(img)

      if self.mean_normalization:
        img=self.normalize(img)

      if self.return_names==True:
        return img, img_path, label
        
      else:
        return img, label

Defining model, loss function, metrics, and initializing Wandb

In [16]:
from torch.nn import BCELoss
from torch.nn import BCEWithLogitsLoss
from torchvision.models import resnet50

batch_size=20
lr=0.01
momentum=0.9
max_n_epochs=80
dataset_type='original'
step_size=5
size=224
gamma=0.8
transforms=affine_elastic

model = resnet50(num_classes=2)
model.cuda()

def dice_loss(pred,target):
    numerator = 2 * torch.sum(pred * target)
    denominator = torch.sum(pred + target)
    return 1 - (numerator + 1) / (denominator + 1)

optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)

loss=BCELoss
scheduler=StepLR(optimizer, step_size=step_size, gamma=gamma)

wandb.init(project="Bioinformatics_classification_experiments_original",
           config={
              "batch_size": batch_size,
              "model": model,
              "learning_rate": lr,
              "max_n_epochs": max_n_epochs,
              "momentum": momentum,
              "optimizer": optimizer,
              "dataset": dataset_type,
              "loss": loss,
              "transforms": transforms,
              "scheduler": scheduler,
              "step_size":step_size,
              "gamma":gamma,
              "size": size
           })

run_id=wandb.run.id
save_model_path=save_model_folder+run_id


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Defining dataset and dataloaders

In [17]:
training_dataset=VascularClassificationDataset(saved_predictions_training, size=size)
validation_dataset=VascularClassificationDataset(saved_predictions_training, size=size)
test_dataset=VascularClassificationDataset(saved_predictions_test, size=size)

save_model_folder='/content/gdrive/MyDrive/Bioinformatics2/saved_models/'

print(f'Training dataset length: {len(training_dataset)}')
print(f'Validation dataset length: {len(validation_dataset)}')
print(f'Test dataset length: {len(test_dataset)}')

train_dataloader=DataLoader(training_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
valid_dataloader=DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
test_dataloader=DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=1)

Training dataset length: 160
Validation dataset length: 160
Test dataset length: 10


Training, validation and logging into Wandb

In [18]:
from torch.nn.functional import binary_cross_entropy
from torch.nn import Sigmoid
from sklearn.metrics import f1_score
from torch.nn import CrossEntropyLoss

criterion=CrossEntropyLoss()
highest_f1=0

for epoch in range(1, max_n_epochs+1, 1):

  model.train()
  training_loss=0
  training_f1=0

  y_true=[]
  y_pred=[]

  for x,y in tqdm(train_dataloader): 

    optimizer.zero_grad()
    out=model((x>0).cuda().float())
    my_loss=criterion(out.cuda(), y.cuda())

    for tupl in out:
      if tupl[0]>tupl[1]:
        y_pred.append(0)
      else:
        y_pred.append(1)
    for target in y:
      y_true.append(target)

    training_loss+=my_loss.item()
    my_loss.backward()
    optimizer.step()
  
  training_f1=f1_score(y_pred, y_true)
  
  training_samples=len(y_true)
  training_loss=training_loss/len(y_true)

  print(f'Epoch #{epoch}, training loss: {training_loss/len(y_true)}, training f1: {training_f1}')

  scheduler.step()

  model.eval()
  validation_loss=0
  validation_iou=0
  y_true=[]
  y_pred=[]

  for x,y in tqdm(valid_dataloader): 

    out=model((x>0).cuda().float())
    my_loss=criterion(out, y.cuda()) 

    for tupl in out:
      if tupl[0]>tupl[1]:
        y_pred.append(0)
      else:
        y_pred.append(1)
    for target in y:
      y_true.append(target)

    validation_loss+=my_loss.item()

  validation_f1=f1_score(y_pred, y_true)

  validation_samples=len(y_true)
  validation_loss=validation_loss/validation_samples

  print(f'Epoch #{epoch}, validation loss: {validation_loss}, validation f1: {validation_f1}')

  wandb.log({"training loss": training_loss, "validation loss": validation_loss, "epoch":epoch})
  wandb.log({"training iou": training_f1, "validation iou": validation_f1, "epoch": epoch})

  if validation_f1>highest_f1:
    print(f'saving model into {save_model_path}')
    highest_f1=validation_f1
    torch.save(model,f'{save_model_path}.pth')
    last_epoch_saved=epoch


100%|██████████| 8/8 [00:03<00:00,  2.06it/s]


Epoch #1, training loss: 0.0010099722561426462, training f1: 0.5217391304347826


100%|██████████| 8/8 [00:01<00:00,  4.97it/s]


Epoch #1, validation loss: 90.37755584716797, validation f1: 0.0


100%|██████████| 8/8 [00:03<00:00,  2.14it/s]


Epoch #2, training loss: 0.0008173553785309196, training f1: 0.7468354430379746


100%|██████████| 8/8 [00:01<00:00,  4.90it/s]


Epoch #2, validation loss: 841.9392211914062, validation f1: 0.0


100%|██████████| 8/8 [00:03<00:00,  2.14it/s]


Epoch #3, training loss: 0.000636075681696866, training f1: 0.8729281767955802


100%|██████████| 8/8 [00:01<00:00,  4.87it/s]


Epoch #3, validation loss: 247.20379486083985, validation f1: 0.0


100%|██████████| 8/8 [00:03<00:00,  2.15it/s]


Epoch #4, training loss: 0.0006649960114009446, training f1: 0.8048780487804877


100%|██████████| 8/8 [00:01<00:00,  5.04it/s]


Epoch #4, validation loss: 234.33869323730468, validation f1: 0.0


100%|██████████| 8/8 [00:03<00:00,  2.15it/s]


Epoch #5, training loss: 0.0004509238297875839, training f1: 0.8800000000000001


100%|██████████| 8/8 [00:01<00:00,  4.97it/s]


Epoch #5, validation loss: 38.5863697052002, validation f1: 0.024096385542168676
saving model into /content/gdrive/MyDrive/Bioinformatics2/saved_models/17zwg9ii


100%|██████████| 8/8 [00:03<00:00,  2.16it/s]


Epoch #6, training loss: 0.000211657600969275, training f1: 0.9534883720930233


100%|██████████| 8/8 [00:01<00:00,  4.92it/s]


Epoch #6, validation loss: 0.10668779872357845, validation f1: 0.8817204301075269
saving model into /content/gdrive/MyDrive/Bioinformatics2/saved_models/17zwg9ii


100%|██████████| 8/8 [00:03<00:00,  2.14it/s]


Epoch #7, training loss: 0.0005534278169403706, training f1: 0.9101796407185628


100%|██████████| 8/8 [00:01<00:00,  5.00it/s]


Epoch #7, validation loss: 4.10017466545105, validation f1: 0.4230769230769231


100%|██████████| 8/8 [00:03<00:00,  2.14it/s]


Epoch #8, training loss: 8.721182766350921e-05, training f1: 0.9426751592356688


100%|██████████| 8/8 [00:01<00:00,  5.01it/s]


Epoch #8, validation loss: 0.1562939173462837, validation f1: 0.9019607843137255
saving model into /content/gdrive/MyDrive/Bioinformatics2/saved_models/17zwg9ii


100%|██████████| 8/8 [00:03<00:00,  2.13it/s]


Epoch #9, training loss: 7.062176333420212e-05, training f1: 0.9523809523809524


100%|██████████| 8/8 [00:01<00:00,  4.92it/s]


Epoch #9, validation loss: 0.004513394643436186, validation f1: 0.9647058823529412
saving model into /content/gdrive/MyDrive/Bioinformatics2/saved_models/17zwg9ii


100%|██████████| 8/8 [00:03<00:00,  2.14it/s]


Epoch #10, training loss: 6.512882871383852e-05, training f1: 0.9487179487179488


100%|██████████| 8/8 [00:01<00:00,  4.92it/s]


Epoch #10, validation loss: 0.0013847873113149944, validation f1: 0.9938650306748467
saving model into /content/gdrive/MyDrive/Bioinformatics2/saved_models/17zwg9ii


100%|██████████| 8/8 [00:03<00:00,  2.14it/s]


Epoch #11, training loss: 2.6329479809312773e-05, training f1: 0.9704142011834319


100%|██████████| 8/8 [00:01<00:00,  4.96it/s]


Epoch #11, validation loss: 0.0002540394423306225, validation f1: 1.0
saving model into /content/gdrive/MyDrive/Bioinformatics2/saved_models/17zwg9ii


100%|██████████| 8/8 [00:03<00:00,  2.15it/s]


Epoch #12, training loss: 9.006759297562894e-06, training f1: 0.993939393939394


100%|██████████| 8/8 [00:01<00:00,  4.90it/s]


Epoch #12, validation loss: 0.0003994151214705255, validation f1: 0.9938650306748467


100%|██████████| 8/8 [00:03<00:00,  2.15it/s]


Epoch #13, training loss: 3.294869771593767e-05, training f1: 0.9753086419753086


100%|██████████| 8/8 [00:01<00:00,  4.91it/s]


Epoch #13, validation loss: 0.0011740764485750788, validation f1: 0.9813664596273292


100%|██████████| 8/8 [00:03<00:00,  2.16it/s]


Epoch #14, training loss: 3.2045402571156956e-06, training f1: 1.0


100%|██████████| 8/8 [00:01<00:00,  4.94it/s]


Epoch #14, validation loss: 7.633878983597242e-05, validation f1: 1.0


100%|██████████| 8/8 [00:03<00:00,  2.17it/s]


Epoch #15, training loss: 2.0393832460285922e-05, training f1: 0.9759036144578312


100%|██████████| 8/8 [00:01<00:00,  5.01it/s]


Epoch #15, validation loss: 0.0011990501341642811, validation f1: 0.993939393939394


100%|██████████| 8/8 [00:03<00:00,  2.17it/s]


Epoch #16, training loss: 2.0699366008614106e-06, training f1: 0.9938650306748467


100%|██████████| 8/8 [00:01<00:00,  4.95it/s]


Epoch #16, validation loss: 0.00031943423482516664, validation f1: 0.993939393939394


 25%|██▌       | 2/8 [00:01<00:03,  1.69it/s]


KeyboardInterrupt: ignored

TESTING

In [None]:
#loading model
from torch.nn import Sigmoid
model_path='/content/gdrive/MyDrive/Bioinformatics2/saved_models/1o9z0tra.pth'
model=torch.load(model_path, map_location=torch.device('cuda'))

In [23]:
 model.eval()

test_loss=0
test_f1=0
y_true=[]
y_pred=[]

for x,y in tqdm(test_dataloader): 

  out=model((x>0).cuda().float())
  my_loss=criterion(out, y.cuda()) 

  for tupl in out:
    if tupl[0]>tupl[1]:
      y_pred.append(0)
    else:
      y_pred.append(1)
  for target in y:
    y_true.append(target)

  test_loss+=my_loss.item()

test_f1=f1_score(y_pred, y_true)

test_samples=len(y_true)
test_loss=test_loss/validation_samples

print(f'Epoch #{epoch}, test loss: {test_loss}, test f1: {test_f1}')

print([(pred,target) for pred, target in zip(y_pred,y_true)])

100%|██████████| 1/1 [00:00<00:00,  3.41it/s]

Epoch #17, test loss: 1.4927728443581146e-06, test f1: 1.0
[(1, tensor(1)), (1, tensor(1)), (1, tensor(1)), (0, tensor(0)), (1, tensor(1)), (0, tensor(0)), (0, tensor(0)), (0, tensor(0)), (1, tensor(1)), (1, tensor(1))]





In [None]:
for x,y in tqdm(train_dataloader): 

    optimizer.zero_grad()
    out=model((x>0).cuda().float())
    logit, feature_conv, feature_convNBN = self.resNet(inputVariable[t])
    bz, nc, h, w = feature_conv.size()
    feature_conv = feature_conv.view(bz, nc, h*w)
    h_x = F.softmax(logit, dim=1).data
    probs, idx = h_x.sort(1, True)
    cam_img = torch.bmm(self.weight_softmax[idx[:, 0]].unsqueeze(1), feature_conv).squeeze(1)
    cam_img = F.softmax(cam_img, 1).data 
    cam_img = cam_img.cpu().numpy()
    cam_img = cam_img.reshape(h, w)
    cam_img = cam_img - np.min(cam_img)
    cam_img = cam_img / np.max(cam_img)
    cam_img = np.uint8(255 * cam_img)
    output_cam = cv2.resize(cam_img, size_upsample)
    img = cv2.cvtColor(np.uint8(img), cv2.COLOR_RGB2BGR)
    heatmap = cv2.applyColorMap(output_cam, cv2.COLORMAP_JET)
    result = heatmap * 0.3 + img * 0.5