No DR: This class represents images without any signs of diabetic retinopathy.

Mild/Moderate DR: This class includes images with mild or moderate signs of diabetic retinopathy, such as microaneurysms, dot and blot hemorrhages, or mild retinal abnormalities.

Severe/Profound DR: This class comprises images with severe or profound signs of diabetic retinopathy, including proliferative retinopathy, pre-retinal hemorrhages, venous beading, or macular edema.

In [83]:
#libraries 
import os
import shutil
import random
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

import cv2

import tensorflow
import keras
from keras.layers import Conv2D, Dropout, Flatten, Dense
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator

import matplotlib.pyplot as plt
import seaborn as sns 

import pandas as pd 
import numpy as np

import shutil 
import os

## Variables 

In [84]:

src_train_dir_path= '/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/cropped_aptos/train/'

In [85]:

src_val_dir_path='/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/cropped_aptos/val/'

In [86]:

src_test_dir_path= '/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/cropped_aptos/test/'

In [87]:
# test, train and val directories
train_directory = '/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/'
val_directory = '/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/'
test_directory = '/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/'

## Making dataframe from Directories

In [88]:
# data = pd.DataFrame(columns=['diagnosis', 'src_path', 'filename'])
# data.head()

In [89]:
def make_dataframe(source_path):
    data = []
    for class_name in os.listdir(source_path):
        if not class_name.startswith('.'): 
            for img_path in os.listdir(os.path.join(source_path, class_name)):
                if not img_path.startswith('.'):
    #                 print(os.path.join(src_train_dir_path, class_name, img_path))
                    src_path = os.path.join(source_path, class_name, img_path)
                    filename = img_path
                    data.append({'diagnosis': class_name, 'src_path': src_path, 'filename': filename})
    dataframe = pd.DataFrame(data)
    return dataframe
    

In [90]:
train_dataframe = make_dataframe(src_train_dir_path)
train_dataframe.shape

(2930, 3)

In [91]:
val_dataframe = make_dataframe(src_val_dir_path)
val_dataframe.shape

(366, 3)

In [92]:
test_dataframe = make_dataframe(src_test_dir_path)
test_dataframe.shape

(366, 3)

In [93]:
train_dataframe.head()

Unnamed: 0,diagnosis,src_path,filename
0,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,2d7666b8884f.png
1,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,50840c36f0b4.png
2,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,30cab14951ac.png
3,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,a8582e346df0.png
4,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,cab3dfa7962d.png


In [94]:
classes = {"No_DR": 'No_DR',
            "Mild": 'NPDR', 
            "Moderate": 'NPDR',
            "Severe": 'PDR',
            "Proliferate_DR": 'PDR'}

In [95]:
def map_dataframe(dataframe, dir_path):
#     print(dir_path)
    dataframe['class'] = list(map(classes.get, dataframe['diagnosis']))
#     dataframe['path'] = [i[1]['label']+'/'+i[1]['id']+'.jpeg' for i in dataframe.iterrows()]
    dataframe['dest_path'] = [dir_path+i[1]['class']+'/'+i[1]['filename'] for i in dataframe.iterrows()]
    print(dataframe['dest_path'][0])
    return dataframe 
    

In [96]:
train_df = map_dataframe(train_dataframe, train_directory)
train_df[:5]

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/2d7666b8884f.png


Unnamed: 0,diagnosis,src_path,filename,class,dest_path
0,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,2d7666b8884f.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...
1,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,50840c36f0b4.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...
2,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,30cab14951ac.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...
3,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,a8582e346df0.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...
4,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,cab3dfa7962d.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...


In [97]:
val_df = map_dataframe(val_dataframe, val_directory)
val_df[:5]

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/0f495d87656a.png


Unnamed: 0,diagnosis,src_path,filename,class,dest_path
0,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,0f495d87656a.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...
1,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,00cb6555d108.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...
2,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,15cc2aef772a.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...
3,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,12e3f5f2cb17.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...
4,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,03e25101e8e8.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...


In [98]:
test_df = map_dataframe(test_dataframe, test_directory)
test_df[:5]

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/ead23cc922ed.png


Unnamed: 0,diagnosis,src_path,filename,class,dest_path
0,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,ead23cc922ed.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...
1,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,ee78ce914066.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...
2,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,e9ff9352ccb3.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...
3,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,ea15a290eb96.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...
4,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,eba3acc42197.png,NPDR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...


## EDA

In [99]:
train_df['class'].value_counts()

No_DR    1434
NPDR     1108
PDR       388
Name: class, dtype: int64

In [100]:
val_df['class'].value_counts()

No_DR    172
NPDR     144
PDR       50
Name: class, dtype: int64

In [101]:
test_df['class'].value_counts()

No_DR    199
NPDR     117
PDR       50
Name: class, dtype: int64

In [104]:
train_df.shape[0]+val_df.shape[0]+test_df.shape[0]

3662

## Train, val and test split

In [105]:
# # training set -> 70%, validation_set -> 15% and testing set-> 15%
# train_set, test_set = train_test_split(dataframe, test_size=0.3, random_state=24, stratify=dataframe['diagnosis'])
# val_set, test_set = train_test_split(test_set, test_size=0.5, random_state = 24, stratify=test_set['diagnosis'])

In [106]:
# print(train_set.shape)
# train_set['diagnosis'].value_counts()

In [107]:
# val_set.shape

In [108]:
# test_set.shape

In [109]:
# 24575+5266+5267

## Creating train, val and test directories

In [110]:
# creating directories
os.makedirs(train_directory, exist_ok=True)
os.makedirs(val_directory, exist_ok=True)
os.makedirs(test_directory, exist_ok=True)

In [81]:
# def crop_image_from_gray(img,tol=7):
#     if img.ndim ==2:
#         mask = img>tol
#         return img[np.ix_(mask.any(1),mask.any(0))]
#     elif img.ndim==3:
#         gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
#         mask = gray_img>tol
        
#         check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
#         if (check_shape == 0): # image is too dark so that we crop out everything,
#             return img # return original image
#         else:
#             img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
#             img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
#             img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
#     #         print(img1.shape,img2.shape,img3.shape)
#             img = np.stack([img1,img2,img3],axis=-1)
#     #         print(img.shape)
#         return img

In [82]:
# def circle_crop(img, sigmaX=10):   
#     """
#     Create circular crop around image centre    
#     """    
    
#     img = cv2.imread(img)
#     img = crop_image_from_gray(img)    
#     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
#     height, width, depth = img.shape    
    
#     x = int(width/2)
#     y = int(height/2)
#     r = np.amin((x,y))
    
#     circle_img = np.zeros((height, width), np.uint8)
#     cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
#     img = cv2.bitwise_and(img, img, mask=circle_img)
#     img = crop_image_from_gray(img)
# #     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# #     img=cv2.addWeighted ( img,4, cv2.GaussianBlur( img , (0,0) , sigmaX) ,-4 ,128)
#     return img 

In [115]:
# moving images to respective directories
def move_to_directories(df, dest):
    for _, row in df.iterrows():
#         print(row)
        file_name = row['filename']
        class_name = row['class']
        src_path = row['src_path']
#         print(file_name, class_name, src_path)
        dest_path = os.path.join(dest, class_name, file_name)
        print(dest_path)
        
        
        os.makedirs(os.path.dirname(dest_path), exist_ok=True)
        shutil.copyfile(src_path, dest_path)
        
#         #performing image processing here
#         processed_image = circle_crop(src_path)
        
#         #saving the resutl to the destination folder
#         cv2.imwrite(dest_path, cv2.cvtColor(processed_image, cv2.COLOR_BGR2RGB))
        
        
#         cv2.imwrite(os.path.join(path , 'waka.jpg'), img)
#         shutil.copyfile(src_path, dest_path)
        

In [116]:
move_to_directories(train_df, train_directory)

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/2d7666b8884f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/50840c36f0b4.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/30cab14951ac.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a8582e346df0.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/cab3dfa7962d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/35d6c4c50072.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/67f5d89da548.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/6b07971c3bf6.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/7116128c65ab.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/71f6a6e4620a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/59e5212f7139.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a3d2a0c4cd17.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/47d1603a555b.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/8bbd7835e9aa.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/83e529e95b0e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/bb11db08584a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/5b068765e846.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/d567a1a22d33.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a95858e052d6.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/8bc6716c2238.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/82ac8463fadd.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/3c72f580d4ba.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/d1cad012a254.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/29b52f64d2db.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/22325552a4e3.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/7bf981d9c7fe.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/4029d70e9d8a.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/7ae69d22075a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/cfd1bd0fcbb4.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a443c4fd489c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/5712e2aa73a2.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/677f087cd697.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/e07045d7c5f7.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/4fa26d065ad3.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/63a03880939c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/3c726de3ee90.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/8bdb891661a8.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/42b08dca9b2f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/91cbe1c775ef.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/31b5d6fb0256.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/855f0a5442b6.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a15590a7d774.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/c6e1e9fbf39b.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/599b89048034.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/bca2bdc15fc5.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/2e26762daed5.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a790a3b36390.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/8eb3337a54e9.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/8e20b8fac7c3.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a3706ce27869.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/d9bbdc33db83.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/b99794a0beed.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/8ab8d9b3ce3f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/1f0e223b8055.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/65e530ee2e79.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/6d454444f17c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/8ff2733f6aef.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/1f63d44d9e3c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/4ee1ad981a6d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/959dc602febc.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/dde43aa22ae6.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/891392c9683c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/624fb7317106.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/8d3d67661620.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/6889bc64ab09.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a56230242a95.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/df5ce3ea7820.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a1b12fdce6c3.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/c8823cdaf7fa.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/698d6e422a80.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/803120c5d287.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/310c27067ac0.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/7247a2c97f71.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/85fce24084da.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/6ba5ed791444.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a0267206d51e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/b8ac328009e0.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/db49cdf1ea64.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/d9a475dfe59a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/51405d042000.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/49419f8d5cb4.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/3402124408ea.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/be7bc89f5fec.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/c80f79579fed.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/9a7bd084395e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/e03e70bc8bba.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a62ea0043aa7.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/d4be0403e6ab.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/c68dfa021d62.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/57469423a012.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/ab50123abadb.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/7c2f820a6425.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a9c7b83caf81.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/86fbac86ed3e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/abf09c44d5f4.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/ac2c814949f9.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/962cf85e4f6d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a47878630dc2.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/e17507a4a1f5.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/48543037d0b3.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/3fd7df6099e3.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/28f93cad89c5.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/5b644a403e1f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/df841a0440d8.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/5723d0ec895e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/b91ef82e723a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/9da74370835a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/aed4e743c230.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/77e7c7a160c8.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/80feb1f7ca5e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/57760be09c03.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a125377fb985.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/261c6bd63bff.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/b06dabab4f09.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/a7b7dc8788b9.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/cd9e2190c73f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/7f84284598f5.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/23d7ca170bdb.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/64a13949e879.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/7a0cff4c24b2.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/b3d12069e1c5.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/3cdda8b3df19.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/c280730cc211.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/d6228d951958.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/709784f7fcc2.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/NPDR/b746a6681ba9.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/408ea9d5e082.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/21d18b022429.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/b87f9c59748b.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/b55d2ddb3e75.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/3206171db5be.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/69fff98cb32a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/85cbb84ac8e0.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/1efa5d443707.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/cc3d2e961768.png
/Users/sud

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/c5e238aa18be.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/9cc6b1f9bcbd.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/6fe67482bfae.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/d51b3fe0fa1b.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/bfdee9be1f1d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/4ccfa0b4e96c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/e019b3e0f33d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/222f3ee3a1e8.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/2fde69f20585.png
/Users/sud

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/7d626a7ffe76.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/df84e7113003.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/4462fba1d2a1.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/6c3745a222da.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/5b5b80a3edee.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/44ecf3f4efa5.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/838c87c63422.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/7b211d8bd249.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/21abd36095a1.png
/Users/sud

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/4c60b10a3a6a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/810d3779abd9.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/6a244e855d0e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/6253f23229b1.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/3f49f8d100e9.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/9c52b87d01f1.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/4f0866b90c27.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/762d6e5d5068.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/a80dab8eddf4.png
/Users/sud

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/bec0acd539b2.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/24b87f744598.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/80ca40196225.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/3435fd8675a2.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/c0a0828e01b4.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/d2ffe9287dc7.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/7525ebb3434d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/9bafbbd152d2.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/PDR/d0ffa0425ef1.png
/Users/sud

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/7aabd768abff.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/54038e56131d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/1cc58b15f466.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/4c6c5a1bf5ab.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d9ad2a0ec026.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/68ddb15a74de.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/35362d43e753.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/de2eb5c8aa83.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d1fa0f744

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d99b0f7dd9b9.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d78b7401096f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/4c635a01593d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/6b3860e8f64f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/8ead17dfb6a6.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/cbc23af521f3.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/c3b15bf9b4bc.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/1faf8664816c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/39fd8ef3a

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d57d1be1bbd1.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/c81c6911f5e0.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d3dfd0a2dee6.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/e1ab92228e60.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/b963a11638f2.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/a95d9d61ddd4.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/cd563556cb57.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/3323fd59782e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d6dbb0820

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/7214fc7cbe03.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/609be3ca5ddf.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/8344c783da65.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/6110ecb3bb1c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/6c00dd8bf708.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/c18a006f7f1d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/3cab32dd6ef9.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/35cd9832fc0a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/92889b863

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/ca05f7e7801b.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/a1eb88562239.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/a1e236fbc863.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/5078caaf1f57.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/2d3f4094c08a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/215d2b7c3fde.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/73d40ce06a67.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/beeca5f14618.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/7d11dbc1e

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/bc34f52c37c7.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d7ac4a0c9760.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/973b0facfa9b.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/435414ccccf7.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/a963ac561580.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/84b472c49cfa.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/a1822dd8d05d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/75c180e04f65.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/a07d9a504

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/3286073a976e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/e2c39ed0c941.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d332d7b8a26e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/b46b09a45f39.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/b9c7c5182075.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/dc0f6e5b489b.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/1da4a17c18c9.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/41960d5f58c2.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/763ad1236

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/226c6ceb9185.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/abbb8791785e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/1fb455685328.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/384db24ebbd7.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/b6fd109b1bc9.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/db3cd58aa315.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/7d0a871c45db.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/3b2b91590590.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/aafe980ed

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/3abac0961bfd.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/9688c6ef5dc5.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/47536db39f00.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/8b568d47a1fd.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/5ad3dabeb2cd.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/e135d7ba9a0e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/ab88081e5654.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/37c523296d42.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/8af50c9d0

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/573ea80a53be.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/1d0b93317aa8.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/78937523f7a8.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/b77b88926843.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/beb2ad14fd2d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/568455854a11.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d6f36ec5564a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/90bde2ff8953.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/971bb98ab

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/4e1e252317b5.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d97911a32918.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/6c3589d7ed8d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/58a9e0d7f7af.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/de38adaae009.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/35beb47fe159.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/abe940882578.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/9f8112c710be.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/5baed382f

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/389552047476.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/6e0f78e188ff.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/c80b0f27541a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/93a1b984de84.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/43bc7c066dfb.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/7fe7309d0b4f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/aa4407aab872.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/650104ede84c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/544525563

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/44878f34e31f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/878e356c8fc9.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/4dd7b322f342.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/5995321563b7.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/9274e75dc4d5.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/2408799a09b2.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/5288f7441f64.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/5293576816aa.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/210bfe012

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/aeb6f4fd2eed.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/ab653b8554c0.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/da3a2275c850.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/4b237b958555.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/98441214557f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d16e39b9d6f0.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/6fbaaf8eb67a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/c7c3d363bc86.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/1f4fb37e0

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/5511f114e7ee.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/6e092caa065f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/ad029ba7fa8b.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/3232b34cbe99.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/ae58ccb5905e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/2dc647e00ad3.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/8191ae701985.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/260a455692b5.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/2a2274bcb

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/bc92a61a1f9c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/840a06a9c690.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/2ef10194e80d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/dd3176bacfe2.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d99dd99be001.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/a7673ac44509.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/88e4399d207c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d667af5742f6.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/2fdfb80ea

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/24f271c87e73.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/9ed6c2b25767.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/e4c799738a19.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/7ad0c4975890.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/ccea49708830.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d1b279cc02ae.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/d2fb715b0c41.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/b3a994760537.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/train/No_DR/27933cdbe

In [117]:
move_to_directories(val_df, val_directory)

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/0f495d87656a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/00cb6555d108.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/15cc2aef772a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/12e3f5f2cb17.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/03e25101e8e8.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/18b06f56ab27.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/17eff993386f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/07929d32b5b3.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/05a5183c92d0.png
/Users/sudeshgurung

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/0efc93ec838b.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/1541226c5d72.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/0fcfc6301f3d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/12025b34deb8.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/07083738b75e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/17188c13e635.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/0161338f53cc.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/0c76fd494af6.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/NPDR/01eb826f6467.png
/Users/sudeshgurung

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/PDR/034cb07a550f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/PDR/03a7f4a5786f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/PDR/001639a390f0.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/PDR/1a7e3356b39c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/PDR/080ee76c958c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/PDR/1638404f385c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/PDR/07122e268a1d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/PDR/0ada12c0e78f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/PDR/08a3875063c3.png
/Users/sudeshgurung/Msc-Data

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/No_DR/18b99159a14f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/No_DR/1ab8d3431ffc.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/No_DR/13073f075a56.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/No_DR/1a0dbc6c0cda.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/No_DR/12b57dac703e.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/No_DR/184a185e7447.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/No_DR/0d0b8fc9ab5c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/No_DR/03b373718013.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/val/No_DR/0daddc45d832.png
/Users/sud

In [118]:
move_to_directories(test_df, test_directory)

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/ead23cc922ed.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/ee78ce914066.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/e9ff9352ccb3.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/ea15a290eb96.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/eba3acc42197.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/f481f76a6b75.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/f6f7dba7104d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/eeb231c3ef1f.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/f55e1d2a19e4.png
/Users/sud

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/ff0740cb484a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/e868c3da340b.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/f7edc074f06b.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/f5e9a307288c.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/ed88faaa325a.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/fc4c2d35c6f8.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/e6f0ce5bf282.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/e7defafeb957.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/NPDR/e7fc93ac5b6d.png
/Users/sud

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/f4ea2a2cfbb9.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/ec57cc20d776.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/f4874247ede6.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/f58f0b2fd718.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/ee77763a6afb.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/fc8fce67fbf8.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/fce73678f650.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/f4de9620e3f2.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/e60e4edb3ca9.png
/

/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/edceb0657d77.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/fefded6bf135.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/f9d52509c571.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/ee3f5cf52188.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/f26b02ead915.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/f0546a45ef10.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/e50b0174690d.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/e8ddfc9709ce.png
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/script/3_class/dataset/test/No_DR/ff4832d55461.png
/

## Under sampling-[Balanced]

In [142]:
# val_dataframe.shape

(366, 4)

In [87]:
train_dataframe.shape

(2930, 4)

In [144]:
# test_dataframe.shape

(366, 4)

In [88]:
train_dataframe.head()

Unnamed: 0,id_code,diagnosis,path,file
0,1ae8c165fd53,Moderate,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,1ae8c165fd53.png
1,1b329a127307,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,1b329a127307.png
2,1b32e1d775ea,Proliferate_DR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,1b32e1d775ea.png
3,1b3647865779,No_DR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,1b3647865779.png
4,1b398c0494d1,No_DR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,1b398c0494d1.png


In [89]:
dataframe = train_dataframe.copy()
dataframe.head()

Unnamed: 0,id_code,diagnosis,path,file
0,1ae8c165fd53,Moderate,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,1ae8c165fd53.png
1,1b329a127307,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,1b329a127307.png
2,1b32e1d775ea,Proliferate_DR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,1b32e1d775ea.png
3,1b3647865779,No_DR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,1b3647865779.png
4,1b398c0494d1,No_DR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,1b398c0494d1.png


In [90]:
dataframe['diagnosis'].value_counts()

No_DR             1434
Moderate           808
Mild               300
Proliferate_DR     234
Severe             154
Name: diagnosis, dtype: int64

In [91]:
# using min sample for each classes 
sample_size = 154

In [92]:
no_dr = dataframe[dataframe['diagnosis']=='No_DR']
moderate = dataframe[dataframe['diagnosis']=='Moderate']
mild = dataframe[dataframe['diagnosis']=='Mild']
severe = dataframe[dataframe['diagnosis']=='Severe']
proliferate_dr = dataframe[dataframe['diagnosis']=='Proliferate_DR']

In [93]:
under_sampled_no_dr = no_dr.sample(sample_size)
under_sampled_moderate = moderate.sample(sample_size)
under_sampled_mild = mild.sample(sample_size)
under_sampled_severe = severe.sample(sample_size)
under_sampled_proliferate_dr = proliferate_dr.sample(sample_size)


In [94]:
under_sampled_df_cropped = pd.concat([under_sampled_no_dr, under_sampled_moderate, under_sampled_mild, under_sampled_severe, under_sampled_proliferate_dr])
under_sampled_df_cropped['diagnosis'].value_counts()

No_DR             154
Moderate          154
Mild              154
Severe            154
Proliferate_DR    154
Name: diagnosis, dtype: int64

In [95]:
# path to under sampled directory
under_sampled_dir = '/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/under_sampled_cropped/'
undersampled_train_dir= '/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/under_sampled_cropped/train/'
# undersampled_val_dir= '/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/under_sampled_aptos/val/'
# undersampled_test_dir= '/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/under_sampled_aptos/test/'

In [163]:
# # training set -> 70%, validation_set -> 15% and testing set-> 15%
# undersampled_train_set, undersampled_test_set = train_test_split(under_sampled_df, test_size=0.3, random_state=24, stratify=under_sampled_df['label'])
# undersampled_val_set, undersampled_test_set = train_test_split(undersampled_test_set, test_size=0.5, random_state = 24, stratify=undersampled_test_set['label'])

In [96]:
print(under_sampled_df_cropped.shape)
# print(undersampled_val_set.shape)
# print(undersampled_test_set.shape)

(770, 4)


In [97]:
# creating directories 
os.makedirs(undersampled_train_dir, exist_ok=True)
# os.makedirs(undersampled_val_dir, exist_ok=True)
# os.makedirs(undersampled_test_dir, exist_ok=True)

In [98]:
move_to_directories(under_sampled_df_cropped, undersampled_train_dir)

In [271]:
# move_to_directories(undersampled_val_set, undersampled_val_dir)

In [272]:
# move_to_directories(undersampled_test_set, undersampled_test_dir)

## Creating only two classes i.e., DR and NO_DR

In [273]:
two_classes_df = dataframe.copy()
two_classes_df.head()

Unnamed: 0,id,label,path,file
0,10_left,No_DR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,10_left.jpeg
1,10_right,No_DR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,10_right.jpeg
2,13_left,No_DR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,13_left.jpeg
3,13_right,No_DR,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,13_right.jpeg
4,15_left,Mild,/Users/sudeshgurung/Msc-Data_Analytics/dissert...,15_left.jpeg


In [275]:
two_classes_df['path'][1]

'/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/IPAC/eyepacs_preprocess/eyepacs_preprocess/10_right.jpeg'

In [279]:
two_classes_df.loc[two_classes_df['label'] != 'No_DR', 'label'] = 'DR'

In [280]:
two_classes_df['label'].value_counts()

No_DR    25802
DR        9306
Name: label, dtype: int64

## Balanced Dataset

In [401]:
target_size = 25802

In [402]:
# def balance_dataset(dataframe, target_size):
#     df_groups = dataframe.groupby(['label'])
#     df_balanced = pd.DataFrame({key:[] for key in dataframe.keys()})
    
#     for i in df_groups.groups.keys():
#         df_group = df_groups.get_group(i)
#         df_label = df_group.sample(frac=1)
#         current_size = len(df_label)
#         print(current_size)
#         if current_size >= target_size:
#                 # If current size is big enough, do nothing
#                 pass
#         else:

#             # Repeat the current dataset if it is smaller than target_size 
#             repeat, mod = divmod(target_size,current_size)


#             df_label_new = pd.concat([df_label]*repeat,ignore_index=True,axis=0)
#             df_label_remainder = df_group.sample(n=mod)

#             df_label_new = pd.concat([df_label_new,df_label_remainder],ignore_index=True,axis=0)

#             # print(df_label_new)

#             df_balanced = pd.concat([df_balanced,df_label_new],ignore_index=True,axis=0)


#     return df_balanced

    

In [403]:
# final_balanced = balance_dataset(dataframe, target_size)
# print(final_balanced.shape)
# final_balanced['label'].value_counts()

In [404]:
# final_balanced[final_balanced['label']=='Moderate'][5280:5300]

In [405]:
# final_balanced[final_balanced['label']=='Moderate'][0:30]

In [406]:
classes = dataframe['label'].unique()
classes

array(['No_DR', 'Mild', 'Moderate', 'Proliferate_DR', 'Severe'],
      dtype=object)

In [407]:
balanced = '/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/balanced'
temp = '/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/temp/'

In [408]:
# creating directories 
os.makedirs(balanced, exist_ok=True)
os.makedirs(temp, exist_ok=True)

In [409]:
# move_to_directories(dataframe, balanced)

In [410]:
for i in classes:
    os.makedirs(temp+i, exist_ok=True)

In [411]:
label_counts = {}


In [412]:
count = 0
for class_name in os.listdir(balanced):
    if not class_name.startswith('.'): 
        for img_path in os.listdir(balanced+'/'+class_name):
            if not img_path.startswith('.'):
                count += 1
        print(f'{class_name}: {count}')
        label_counts[balanced+'/'+class_name] = [class_name, count]
    count=0
        
            


Mild: 2438
Moderate: 5288
Proliferate_DR: 708
Severe: 872
No_DR: 25802


In [418]:
gen=ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=20, width_shift_range=.2,
                              height_shift_range=.2, zoom_range=.2)

In [419]:
aug_gen=gen.flow_from_directory( 
    balanced,
    target_size=(256,256), 
    class_mode=None, 
    batch_size=1,
    shuffle=False, 
    save_to_dir=target_dir, 
    save_prefix='aug-',
    save_format='jpeg')

Found 35108 images belonging to 5 classes.


In [420]:
aug_gen.filenames

['Mild/10030_left.jpeg',
 'Mild/10030_right.jpeg',
 'Mild/10085_left.jpeg',
 'Mild/10085_right.jpeg',
 'Mild/10150_right.jpeg',
 'Mild/10169_right.jpeg',
 'Mild/10177_left.jpeg',
 'Mild/10177_right.jpeg',
 'Mild/1020_left.jpeg',
 'Mild/1020_right.jpeg',
 'Mild/10220_left.jpeg',
 'Mild/10221_right.jpeg',
 'Mild/10232_right.jpeg',
 'Mild/10233_left.jpeg',
 'Mild/10233_right.jpeg',
 'Mild/10234_left.jpeg',
 'Mild/10234_right.jpeg',
 'Mild/10269_left.jpeg',
 'Mild/1027_left.jpeg',
 'Mild/10297_right.jpeg',
 'Mild/10320_right.jpeg',
 'Mild/10334_left.jpeg',
 'Mild/10356_right.jpeg',
 'Mild/1036_left.jpeg',
 'Mild/1036_right.jpeg',
 'Mild/10384_left.jpeg',
 'Mild/10426_left.jpeg',
 'Mild/1043_left.jpeg',
 'Mild/10468_right.jpeg',
 'Mild/10545_right.jpeg',
 'Mild/10551_left.jpeg',
 'Mild/10551_right.jpeg',
 'Mild/10567_right.jpeg',
 'Mild/10594_right.jpeg',
 'Mild/10657_left.jpeg',
 'Mild/10657_right.jpeg',
 'Mild/10672_left.jpeg',
 'Mild/10672_right.jpeg',
 'Mild/10737_right.jpeg',
 'Mild/10

In [421]:
# Get the number of classes in the dataset
num_classes = aug_gen.num_classes
num_classes

5

In [None]:
aug_gen.class_indices

In [426]:
# Generate and save the augmented images
for class_index in range(num_classes):
    print(class_index)
#     class_name = aug_gen.class_indices[class_index]
    
#     class_directory = os.path.join(augmented_directory, class_name)
#     os.makedirs(class_directory, exist_ok=True)

0
1
2
3
4


In [None]:
target_size

In [None]:
# Calculate the number of augmented images to generate
needed_num_of_augmented_images = target_size - len(aug_gen.filenames)
needed_num_of_augmented_images

In [377]:
for key, value in label_counts.items():
    sample_count = label_counts[key][1]
    if sample_count < target_size: 
        aug_img_count=0
        delta=target_size-sample_count
        print(delta)
        target_dir=os.path.join(temp, label_counts[key][0]+'/')
        print(target_dir)
        print(key)
        aug_gen=gen.flow_from_directory( 
            key,
            target_size=(256,256), 
            class_mode=None, 
            batch_size=1,
            shuffle=False, 
            save_to_dir=target_dir, 
            save_prefix='aug-',
            save_format='jpeg')
        

23364
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/temp/Mild/
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/balanced/Mild
Found 0 images belonging to 0 classes.
20514
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/temp/Moderate/
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/balanced/Moderate
Found 0 images belonging to 0 classes.
25094
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/temp/Proliferate_DR/
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/balanced/Proliferate_DR
Found 0 images belonging to 0 classes.
24930
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/temp/Severe/
/Users/sudeshgurung/Msc-Data_Analytics/dissertation/Project/dataset/balanced/Severe
Found 0 images belonging to 0 classes.
