In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import json

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader


import torchvision
from torchvision import transforms as T
from torchvision.models import detection
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets.utils import download_url
import torchvision.transforms as tt
from torchvision.datasets import ImageFolder
from torchvision.ops import box_convert

from torch.utils.data import random_split
from torchvision.utils import make_grid
from torchvision import transforms
from collections import Counter

from PIL import Image, ImageDraw
import cv2
from torch.cuda import is_available as check_cuda
from PIL.ImageOps import grayscale

In [None]:
from sklearn.model_selection import train_test_split

# Load Data

In [None]:
# Load all labels, boxes data
labels_dict = {}
to_df = []
#folder_name = os.path.dirname(os.getcwd()) + "\\Processed Data\\preprocessed_data\\AI+_processed\\jsons"
folder_name = "/content/drive/MyDrive/Practicum Data/Processed Data/preprocessed_data/AI+_processed/jsons"
for path in os.listdir(folder_name):
    with open(os.path.join(folder_name,path)) as json_file:
        data = json.load(json_file)
    im = path.replace('.json','')
    labels = [data['shapes'][0]['label']]
    pts = data['shapes'][0]['points']
    pts_correct_format = [np.array([pts[0][0],pts[0][1],pts[1][0],pts[1][1]])]
    labels_dict[im] = {'label':labels,'boxes':pts_correct_format}
    to_df.append([im,pts[0][0],pts[0][1],pts[1][0],pts[1][1],labels[0]])

df = pd.DataFrame(to_df,columns=['image_id','x1','y1','x2','y2','label'])
df['label_bool'] = [1 if df.loc[i,'label'] == 'benign' else 2 for i in range(len(df))]
df = df.drop(['label'],axis=1)
unique_imgs = df['image_id'].unique()

In [None]:
for_splitting = pd.read_csv('/content/drive/MyDrive/Practicum Data/Processed Data/metadata_df_all.csv')
for_splitting = for_splitting.iloc[1:,:]
unique_cases = list(for_splitting['case_id'].unique())
for_splitting.head()

Unnamed: 0,image_name,image_type,image_height,image_width,label,dataset,case_id
1,051D402B1CE1483B9D6519C3CFE6CFAF_8277284,jpg,473,734,malignant,AIplus,13136661
2,051D402B1CE1483B9D6519C3CFE6CFAF_8277285,jpg,473,734,malignant,AIplus,13136661
3,051D402B1CE1483B9D6519C3CFE6CFAF_8277286,jpg,473,734,malignant,AIplus,13136661
4,1.2.826.0.1.3680043.2.461.10889799.1302130936,jpg,331,474,benign,AIplus,13121058
5,1.2.826.0.1.3680043.2.461.10889799.2958215132,jpg,333,473,benign,AIplus,13121058


In [None]:
train_inds,val_inds = train_test_split(unique_cases,test_size=0.1,random_state=1993)
train_inds = list(for_splitting[for_splitting['case_id'].isin(train_inds)]['image_name'])
val_inds = list(for_splitting[for_splitting['case_id'].isin(val_inds)]['image_name'])
train_inds = [list(unique_imgs).index(i) for i in train_inds if i in unique_imgs]
val_inds = [list(unique_imgs).index(i) for i in val_inds if i in unique_imgs]
print(len(train_inds),len(val_inds))

1032 113


In [None]:
df.head()

Unnamed: 0,image_id,x1,y1,x2,y2,label_bool
0,20180720091141,164.384615,57.995951,247.380567,87.955466,1
1,20181112141259,106.57085,37.753036,252.319838,195.242915,2
2,20180717094148,93.615385,91.59919,221.550607,196.052632,2
3,20181119135628,185.072874,75.263158,492.360324,328.704453,2
4,20181113113547,109.283401,30.101215,558.676113,301.761134,2


# Setting up Dataset

In [None]:
class CustData(torch.utils.data.Dataset):
    def __init__(self,df,unique_imgs,indices):
        self.df = df
        self.unique_imgs = unique_imgs
        self.indices = indices
    def __len__(self):
        return len(self.indices)
    def __getitem__(self,idx):
        image_name = self.unique_imgs[self.indices[idx]]
        boxes = self.df[self.df.image_id == image_name].values[:,1:5].astype("float")
        #img = Image.open(r'../Processed Data/preprocessed_data/AI+_processed/imgs/'+image_name+".jpg").convert('RGB')
        img = Image.open(r'/content/drive/MyDrive/Practicum Data/Processed Data/preprocessed_data/AI+_processed/imgs/'+image_name+".jpg").convert('RGB')
        labels = torch.Tensor(self.df['label_bool']).to(torch.int64)
        target = {}
        target['boxes'] = torch.tensor(boxes)
        target['label'] = labels
        return T.ToTensor()(img), target

In [None]:
def custom_collate(data):
    return data

In [None]:
train_dl = torch.utils.data.DataLoader(CustData(df,unique_imgs,train_inds),
                                      batch_size = 4, # !!!
                                      shuffle = True,
                                      collate_fn = custom_collate,
                                      pin_memory = True if torch.cuda.is_available() else False)

In [None]:
val_dl = torch.utils.data.DataLoader(CustData(df,unique_imgs,val_inds),
                                    batch_size = 8,
                                    shuffle = False,
                                    collate_fn = custom_collate,
                                    pin_memory = True if torch.cuda.is_available() else False
                                    )

# Model Training

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 3
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features,num_classes)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.0005, momentum = 0.9, weight_decay=0.0005)
#optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=0.0005)

In [None]:
num_epochs = 5
model.to(device)

for epochs in range(num_epochs):
    epoch_loss = 0
    for data in train_dl:
        imgs = []
        targets = []
        for d in data:
            imgs.append(d[0].to(device))
            targ = {}
            targ['boxes'] = d[1]['boxes'].to(device)
            targ['labels'] = d[1]['label'].to(device)
            targets.append(targ)
        loss_dict = model(imgs,targets)
        loss = sum(v for v in loss_dict.values())
        epoch_loss += loss.cpu().detach().numpy()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(epoch_loss)

53.16481446676831
31.30473896618326
26.478667316840834
24.503067402870286
23.042972047071228


In [None]:
torch.save(model,'/content/drive/MyDrive/fasterrcnn_renet50_withadam_lr0005.pth')

# Loading Model for Experimentation

In [None]:
model = torch.load('/content/drive/MyDrive/Practicum Model Saves/fasterrcnn_renet50.pth')
model.eval()
print(1)

1


In [None]:
final_results = {}
#data=iter(val_dl).__next__()
for step, data in enumerate(val_dl):
  for i in range(len(data)):
    img = data[i][0]
    boxes = data[i][1]['boxes']
    labels = data[i][1]['label']


    output = model([img.to(device)])
    pred_box = output[0]['boxes'].detach().tolist()
    pred_labels = output[0]['labels'].detach()
    idx_in_inds = 8*step+i
    val_ind = val_inds[idx_in_inds]
    img_name = str(unique_imgs[val_ind])+'.jpg'
    final_results[img_name] = pred_box

  ##### CODE BELOW IS FOR DISPLAYING ############
    #p  = Image.fromarray((img.permute(1,2,0).detach().numpy()*255).astype('uint8'))
    #draw = ImageDraw.Draw(p)

    ### Visualize Real Bounding Boxes
    #for box in boxes:
      #draw.rectangle(list(box), fill=None, outline='red')

    ### Visualize predicted Bounding Boxes
    #for box in pred_box:
      #draw.rectangle(list(box), fill=None, outline='blue')

    #display(p)

In [None]:
# open a file for writing
with open('data.json', 'w') as f:
    # write the dictionary to the file in JSON format
    json.dump(final_results, f)