<a href="https://colab.research.google.com/github/Vengadore/Notebooks/blob/master/Training_DiabeticRetinopathy_Dataset_on_Efficientnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Diabetic Retinopathy Detection

Kaggle has a large competition of Diabetic Retinopathy detection which can be found here:
https://www.kaggle.com/c/diabetic-retinopathy-detection/

Their dataset consists in 35126 images labeled from 0 to 4 according to the degree of Retinopathy.
An analysis of the data is provided in this notebook.

In [1]:
!rm -rf sample_data
!nvidia-smi

Tue Nov 10 02:22:03 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.32.00    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   69C    P8    11W /  70W |      0MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Install dependencies

In [2]:
from IPython.display import clear_output

!pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
!pip install efficientnet_pytorch
clear_output(wait=False)

## Load data from local drive


In [5]:
!wget -O File.zip "https://correoipn-my.sharepoint.com/:u:/g/personal/ccarrillog1400_alumno_ipn_mx/EXqtaQYbJBRNrBsC4Q8Ut-0BsKVqR_3bVPIdNI0eNAPf6A?download=1"
!unzip File.zip
!rm File.zip
clear_output(wait=False)
print("Data Downloaded")

Data Downloaded


In [6]:
import pandas as pd
import os

data = pd.read_csv('data.csv')
data['image'] = data['image'].apply(lambda x : os.path.join("./train",x))
data.head()

Unnamed: 0,image,level
0,./train/17123_right.jpeg,0
1,./train/12616_right.jpeg,0
2,./train/6541_left.jpeg,0
3,./train/14418_right.jpeg,0
4,./train/39598_left.jpeg,0


#### Split data

In [7]:
from sklearn.model_selection import train_test_split

#Split data
X_train, X_test, y_train, y_test = train_test_split(data['image'], data['level'], test_size=0.22, random_state=42)

In [8]:
y_train.value_counts()

4    566
3    561
1    555
2    552
0    527
Name: level, dtype: int64

## Definition of the model

In [9]:
from efficientnet_pytorch import EfficientNet
import torch
network_name = 'efficientnet-b7'
model = EfficientNet.from_pretrained(network_name)
## Change efficientnet final layer
model._fc = torch.nn.Linear(in_features=2560,out_features=5,bias = True)

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b7-dcc49843.pth


HBox(children=(FloatProgress(value=0.0, max=266860719.0), HTML(value='')))


Loaded pretrained weights for efficientnet-b7


In [10]:
model._fc

Linear(in_features=2560, out_features=5, bias=True)

In [15]:
from torchvision.transforms import Resize,ToTensor,Compose,Normalize
from torchvision.transforms import RandomHorizontalFlip,RandomVerticalFlip,RandomRotation,ColorJitter
from PIL import Image

transforms = Compose([RandomHorizontalFlip(),RandomVerticalFlip(),RandomRotation(15)]) # Transformations for the training images

composed = Compose([Resize((600,900)), # Resize to a fit size for efficientnet
                    ToTensor(),  # Convert into sensor
                    Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # Normalize image

### Training parameters

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device);
torch.manual_seed(17)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [17]:
try:
    model = torch.load('/content/checkpoint_b0_11.ph', map_location=device)
except:
    print("No Checkpoint loaded")

In [18]:
classes = {0:[0,0,0,0,1],
           1:[0,0,0,1,0],
           2:[0,0,1,0,0],
           3:[0,1,0,0,0],
           4:[1,0,0,0,0]}

In [None]:
from tqdm import tqdm
import random

epochs = 100
batch_size = 2

for epoch in range(epochs):
    
    indexes = [idx for idx in range(len(X_train))]
    pbar = tqdm( range(len(X_train)//batch_size),ncols = 100)
    running_loss = 0.0
    running_acc = 0.0
    t = 0

    for step in pbar:
        # Load data
        idx = random.sample(indexes,batch_size)
        X = X_train.iloc[idx]
        y = y_train.iloc[idx]

        # Remove indexes
        [indexes.remove(i) for i in idx]

        # Load images
        try:
            images = [Image.open(File) for File in X]
        except:
            continue
        # Load y_true
        y_true = torch.LongTensor([c for c in y]).to(device)
        
        # Convert images to tensor
        x_batch = torch.FloatTensor().to(device)
        for image in images:
            P = transforms(image)
            P = composed(P).unsqueeze(0).to(device)
            x_batch = torch.cat((x_batch,P))

        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = model(x_batch)
        loss = criterion(outputs, y_true)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        t += batch_size

        _, preds = torch.max(outputs, 1)
        running_acc += torch.sum(preds == y_true).cpu().detach().numpy()
        acc = torch.sum(preds == y_true).cpu().detach().numpy()/batch_size;
        pbar.set_description("Epoch: {} Accuracy: {:0.5f} Loss: {:0.5f} ".format(epoch+1,running_acc/t,loss.item()))
    #Validation
    val_acc = 0.0
    val_loss = 0.0
    t = 0
    for point in range(len(X_test)//batch_size):
        with torch.no_grad():

            X = X_test.iloc[point*batch_size:(point+1)*batch_size]
            y = y_test.iloc[point*batch_size:(point+1)*batch_size]


            # Load images
            try:
                images = [Image.open(File) for File in X]
            except:
                continue
            # Load y_true
            y_true = torch.LongTensor([c for c in y]).to(device)
            
            # Convert images to tensor
            x_batch = torch.FloatTensor().to(device)
            for image in images:
                P = composed(image).unsqueeze(0).to(device)
                x_batch = torch.cat((x_batch,P))

            
            outputs = model(x_batch)
            loss = criterion(outputs, y_true)
            val_loss += loss.item()
            t += batch_size
            _, preds = torch.max(outputs, 1)
            val_acc += torch.sum(preds == y_true).cpu().detach().numpy()
    print("Validation -- Accuracy: {:0.5f} Loss: {:0.5f} \n".format(val_acc/t,loss.item()))
    try:
        torch.save(model,"/content/checkpoint_{}_{}_{:0.5f}.ph".format(network_name,epoch+1,val_acc/t))
    except:
        continue

Epoch: 1 Accuracy: 0.66848 Loss: 0.19707 : 100%|██████████████| 1380/1380 [1:20:19<00:00,  3.49s/it]


Validation -- Accuracy: 0.63368 Loss: 1.80880 



Epoch: 2 Accuracy: 0.67826 Loss: 0.13895 : 100%|██████████████| 1380/1380 [1:20:00<00:00,  3.48s/it]


Validation -- Accuracy: 0.63111 Loss: 1.66226 



Epoch: 3 Accuracy: 0.66739 Loss: 1.31670 : 100%|██████████████| 1380/1380 [1:19:40<00:00,  3.46s/it]


Validation -- Accuracy: 0.62853 Loss: 1.60525 



Epoch: 4 Accuracy: 0.66087 Loss: 0.99498 : 100%|██████████████| 1380/1380 [1:16:58<00:00,  3.35s/it]


Validation -- Accuracy: 0.61825 Loss: 1.88968 



Epoch: 5 Accuracy: 0.67428 Loss: 0.36158 : 100%|██████████████| 1380/1380 [1:15:47<00:00,  3.30s/it]


Validation -- Accuracy: 0.63239 Loss: 1.64491 



Epoch: 6 Accuracy: 0.66558 Loss: 0.27281 : 100%|██████████████| 1380/1380 [1:14:52<00:00,  3.26s/it]


Validation -- Accuracy: 0.62725 Loss: 1.53004 



Epoch: 7 Accuracy: 0.66014 Loss: 0.70921 : 100%|██████████████| 1380/1380 [1:14:31<00:00,  3.24s/it]


Validation -- Accuracy: 0.62082 Loss: 1.62294 



Epoch: 8 Accuracy: 0.68085 Loss: 1.53159 :   3%|▌               | 46/1380 [02:28<1:13:20,  3.30s/it]

In [None]:
torch.save(model,"checkpoint.ph")

## Evaluation

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
predicted = []
true_values = []
for point in range(len(X_test)//batch_size):
    with torch.no_grad():

        X = X_test.iloc[point*batch_size:(point+1)*batch_size]
        y = y_test.iloc[point*batch_size:(point+1)*batch_size]

        true_values.append(y)
        # Load images
        try:
            images = [Image.open(File) for File in X]
        except:
            continue
        # Load y_true
        y_true = torch.LongTensor([c for c in y]).to(device)
        
        # Convert images to tensor
        x_batch = torch.FloatTensor().to(device)
        for image in images:
            P = composed(image).unsqueeze(0).to(device)
            x_batch = torch.cat((x_batch,P))

        
        outputs = model(x_batch)
        loss = criterion(outputs, y_true)
        
        _, preds = torch.max(outputs, 1)
        predicted.append(preds)

In [None]:
true_values = np.array(true_values).reshape(-1)
dummy = torch.FloatTensor()
for tensor in predicted:
    dummy = torch.cat((tensor,dummy))
predicted = dummy.view(-1)

ValueError: ignored

In [None]:
confusion_matrix(true_values, predicted)

array([[27, 42, 38, 42, 31],
       [24, 47, 25, 30, 25],
       [28, 39, 33, 23, 31],
       [22, 32, 30, 29, 29],
       [21, 34, 35, 27, 24]])