In [None]:
# Basic Libs..

import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from tqdm import tqdm,tqdm_notebook
from prettytable import PrettyTable
import pickle
import os

import glob
import math
import time
from random import randrange

# Vis Libs..
from sklearn.manifold import TSNE
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams["axes.grid"] = False

# Image Libs.
from PIL import Image
import cv2

In [None]:
ROOT_PATH = 'C:/Users/fredd/Downloads/aptos2019-blindness-detection/'
TRAIN_PATH = ROOT_PATH + '/train_images/' 
TEST_PATH = ROOT_PATH + '/test_images/' 
dir_path = ROOT_PATH + '/'

In [None]:
train = pd.read_csv('C:/Users/fredd/Downloads/aptos2019-blindness-detection/train.csv')
test = pd.read_csv('C:/Users/fredd/Downloads/aptos2019-blindness-detection/test.csv')
print("------- Train CSV File --------")
print("Number of train Images: {} \n".format(train.shape[0]))
print(train.head(5),"\n")
print("*"*100)
print("------- Test CSV File --------")
print("Number of test Images: {} \n".format(test.shape[0]))
print(test.head(5),"\n")

In [None]:

# Intialization of variables which are useful for the later tasks.
width = 256
height = 256
n_channels = 3
split_size = 0.2
CLASSS = {0: "0-No DR", 1: "1-Mild DR", 2: "2-Moderate DR", 3: "3-Severe DR", 4: "4-Proliferative DR"}

In [None]:
train['diagnosis_type'] = train['diagnosis'].map(CLASSS.get)
train.head()

In [None]:
#EDA
data=[len(train),len(test)]
print("Number of images in train dataset: ",data[0])
print("Number of images in Test dataset: ",data[1])

labels=['train-data','test-Data']
plt.figure(figsize=(8,5))
plt.pie(data, explode=[0,0.1],labels=labels, shadow=True,autopct='%1.1f%%',startangle=90)
plt.title('Pie Chart of size of train and test datasets')
plt.axis('equal')
plt.savefig('C:/Users/fredd/Downloads/aptos2019-blindness-detection/size_data.png',dpi=96)
plt.show()

In [None]:
# Plot pie chart
labels = CLASSS.values()
sizes = train.diagnosis.value_counts()

fig1, ax1 = plt.subplots(figsize=(8,5))
ax1.pie(sizes,explode = [0.1,0,0,0,0], labels=labels, autopct='%1.1f%%', shadow=True, startangle=90)
ax1.axis('equal')

plt.title('DR-Class Distribution')
fig1.savefig('C:/Users/fredd/Downloads/aptos2019-blindness-detection/chart_dist.png',dpi=96)
plt.show()

In [None]:
def plot_classes(df, title):
    """
    Plots the histogram of class labels for given set of labels.
    labels - (Series object) which contains the class_labels of training sets.
            
    """
   
    counter = df.diagnosis_type.value_counts().sort_index()
    counter.plot(kind = 'bar',figsize=(8,5))
   
    plt.xlabel('Classes', size=14)
    plt.ylabel('Number of Images', size=14)
    plt.title('Class Distribution - ' + str(title))
    plt.xticks(size=14, color='#4f4e4e')
    plt.yticks(size=14, color='#4f4e4e')
    
    plt.grid()
    plt.savefig('C:/Users/fredd/Downloads/aptos2019-blindness-detection/'+title+'.png',dpi=96)
    plt.show()

    iter=0

    labels = df.diagnosis
    print("\n")
    
    for i in list(set(labels)):
        percentage = round(list(labels).count(i)/len(list(labels)),4)
        print("Number of images in class {} --> {} ({}% of total data)".format(CLASSS[i],list(labels).count(i),np.round(percentage*100,4)))
        iter+=1

In [None]:
plot_classes(train,'Train-Dataset')

In [None]:
#EDA_images
width = 5
height = 2
counter = 0
fig, axs = plt.subplots(height, width, figsize=(18,6))
plt.subplots_adjust(hspace=.4, wspace=0.4)#
# get some random image indices from the training set
rand_indices = [randrange(len(train)) for x in range(0,10)]
#rand_indices
for im in rand_indices:
    # open image
    image = Image.open(os.path.join(TRAIN_PATH, train.iloc[im].id_code + '.png'))
    
    # if aug is not None:
    #    image = aug(image=np.array(image))['image']
    
    i = counter // width
    j = counter % width
    axs[i,j].imshow(image) #plot the data
    #axs[i,j].axis('off')
    #axs[i,j].axis('equal')
    
    diagnosis = train[train['id_code'] == train.iloc[im].id_code].diagnosis.values[0]
    
    axs[i,j].set_title(CLASSS[diagnosis],size=14)
    counter += 1
    axs[i,j].axes.set_aspect('equal')
# set suptitle    
# plt.axes().set_aspect('equal')
plt.suptitle("Original Images\n",fontsize=16) 

fig.savefig('C:/Users/fredd/Downloads/aptos2019-blindness-detection/plot_classesNew.png',dpi=96)


plt.show()

In [None]:
# Function to show one image
import gc
def draw_img(imgs, target_dir, class_label='0'):
    for row in enumerate(imgs.iterrows()):
        name = row[1][1]['id_code'] + '.png'
        print(name)
        fig_cls=plt.figure(figsize=(15,10))
        img = plt.imread(dir_path + target_dir + '/' + name)
        plt.imshow(img)
        plt.title(class_label)
        plt.show()
        #fig_cls.savefig('Images/Class '+class_label+'.png')
        del img
        gc.collect
        
# Showing the class 0 image randomly
CLASS_ID = 0
draw_img(train[train.diagnosis == CLASS_ID].sample(n=5), 'train_images', CLASSS[CLASS_ID])

In [None]:
width = 5
height = 2
counter = 0
fig, axs = plt.subplots(height, width, figsize=(18,6))
plt.subplots_adjust(hspace=.4, wspace=0.4)#
# get some random image indices from the training set
rand_indices = [randrange(len(train)) for x in range(0,10)]
#rand_indices
for im in rand_indices:
    # open image
    image = Image.open(os.path.join(TRAIN_PATH, train.iloc[im].id_code + '.png'))
    
    # if aug is not None:
    #    image = aug(image=np.array(image))['image']
    
    i = counter // width
    j = counter % width
    axs[i,j].imshow(image) #plot the data
    #axs[i,j].axis('off')
    #axs[i,j].axis('equal')
    
    diagnosis = train[train['id_code'] == train.iloc[im].id_code].diagnosis.values[0]
    
    axs[i,j].set_title(CLASSS[diagnosis],size=14)
    counter += 1
    axs[i,j].axes.set_aspect('equal')
# set suptitle    
# plt.axes().set_aspect('equal')
plt.suptitle("Original Images\n",fontsize=16) 

#fig.savefig('images_pre/plot_classesNew3.png',dpi=96)


plt.show()

In [None]:
def image_analysis(df, path):
    width_range = []
    height_range = []
    for i in range(df.shape[0]):
        img = cv2.imread(path+df.iloc[i]['id_code']+'.png')
        height, width, _ = img.shape
        width_range.append(width)
        height_range.append(height)
    return width_range, height_range

In [None]:
width_range, height_range = image_analysis(train, 'C:/Users/fredd/Downloads/aptos2019-blindness-detection/train_images/')
avg_width = sum(width_range)/len(width_range)
avg_height = sum(height_range)/len(height_range)
max_width = max(width_range)
max_height = max(height_range)
min_width = min(width_range)
min_height = min(height_range)
print("Average width of images in training set: {}".format(int(avg_width)))
print("Average height of images in training set: {}".format(int(avg_height)))
print("-"*100)
print("Maximum width of images in training set: {}".format(max_width))
print("Maximum height of images in training set: {}".format(max_height))
print("-"*100)
print("Minimum width of images in training set: {}".format(min_width))
print("Minimum height of images in training set: {}".format(min_height))

In [None]:

width_range_test, height_range_test = image_analysis(test, 'C:/Users/fredd/Downloads/aptos2019-blindness-detection/test_images/')
avg_width_test = sum(width_range_test)/len(width_range_test)
avg_height_test = sum(height_range_test)/len(height_range_test)
max_width_test = max(width_range_test)
max_height_test = max(height_range_test)
min_width_test = min(width_range_test)
min_height_test = min(height_range_test)
print("Average width of images in test set: {}".format(int(avg_width_test)))
print("Average height of images in test set: {}".format(int(avg_height_test)))
print('-'*100)
print("Maximum width of images in test set: {}".format(max_width_test))
print("Maximum height of images in test set: {}".format(max_height_test))
print('-'*100)
print("Minimum width of images in test set: {}".format(min_width_test))
print("Minimum height of images in test set: {}".format(min_height_test))

In [None]:
plt.figure(figsize = (20,6))
plt.subplot(1,2,1)
sns.distplot(width_range, label = 'train_width')
sns.distplot(height_range,  label = 'train_height')
plt.legend()
plt.title("Histogram of Height and Width in Training Images",fontsize=14)
plt.subplot(1,2,2)
sns.distplot(width_range_test, label = 'test_width')
sns.distplot(height_range_test, label = 'test_height')
plt.legend()
plt.title("Histogram of Height and Width in Test Images")
plt.show()
#plt.savefig("images_pre/hist_ht_wt.png",dpi=96)

In [None]:
# function to plot a grid of images
def view_images(images, title = ''):
    """
    Function to plot grid with several examples of fundus images.
    INPUT:
        train - array with filenames for images and condition labels

    OUTPUT: None
    """
    width = 5
    height = 2
    fig, axs = plt.subplots(height, width, figsize=(15,5))
    
    for img in range(0, height * width):
        # open image
        image = Image.open(os.path.join(TRAIN_PATH,images[img] + '.png'))
        i = img // width
        j = img % width
        axs[i,j].imshow(image) #plot the data
        axs[i,j].axis('off')

    #fig.savefig('images_pre/'+title+'.png')
    # set suptitle
    plt.suptitle(title)
    plt.show()

In [None]:
view_images(train[train['diagnosis'] == 0][:10].id_code.values, title = 'Images without DR')

In [None]:
view_images(train[train['diagnosis'] == 1][:10].id_code.values, title = 'Mild DR Images')

In [None]:
view_images(train[train['diagnosis'] == 2][:10].id_code.values, title = 'Moderate DR Images')

In [None]:
view_images(train[train['diagnosis'] == 3][:10].id_code.values, title = 'Severe DR Images')

In [None]:
# PRE-PROCESSING
# This is a function to oversample the dataset (so some of the images of levels 1-4 are present multiple times in the dataset):

# Oversampling to balance the dataset

class_size=train.pivot_table(index='diagnosis', aggfunc=len).max().max()
train_df = train.groupby(['diagnosis']).apply(lambda x: x.sample(class_size, replace = True)).reset_index(drop = True)
train_df = train_df.sample(frac=1).reset_index(drop=True)
print('New Data Size:', train_df.shape[0], 'Old Size:', train.shape[0])
train_df.head()

In [None]:
plot_classes(train,'Balanced-Train-Dataset')

In [None]:
#Image pre-processing

#resize images

def resize_images(path, new_path, new_size):
     #if not item.startswith('.') and os.path.isfile(os.path.join(path, item)):
    
    if not os.path.exists(new_path):
            os.makedirs(new_path)

    lstimgs = [l for l in os.listdir(path) if l != '.DS_Store']

    for item in tqdm(lstimgs):
        if not item.startswith('.') and os.path.isfile(os.path.join(path, item)):

            img = Image.open(path+item)
            
            width,height = img.size
            ratio = height/width
            if width > new_size:
                new_image = img.resize((new_size,math.ceil(ratio*new_size)))   
            else:
                new_image = img
            new_image.save(new_path+item)

In [None]:
resize_images(path='C:/Users/fredd/Downloads/aptos2019-blindness-detection/train_images/', new_path='C:/Users/fredd/Downloads/aptos2019-blindness-detection/train-resized-256/',new_size=256)

In [None]:
resize_images(path='C:/Users/fredd/Downloads/aptos2019-blindness-detection/test_images/', new_path='C:/Users/fredd/Downloads/aptos2019-blindness-detection/test-resized-256/',new_size=256)

In [None]:
def crop_image(img,tol=7):
    if img.ndim ==2:
        mask = img>tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    elif img.ndim==3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img>tol
        
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img # return original image
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
   
            img = np.stack([img1,img2,img3],axis=-1)
   
        return img

In [None]:
def circle_crop(img, sigmaX = 30):   
    """
    Create circular crop around image centre    
    """    
    img = crop_image(img)    
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    height, width, depth = img.shape    
    
    x = int(width/2)
    y = int(height/2)
    r = np.amin((x,y))
    
    circle_img = np.zeros((height, width), np.uint8)
    cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
    img = cv2.bitwise_and(img, img, mask=circle_img)
    img = crop_image(img)
   
    img=cv2.addWeighted(img,4, cv2.GaussianBlur( img , (0,0) , sigmaX) ,-4 ,128)
    return img

In [None]:
def CLAHE(image):
   
    clipLimit = 2.0
    tileGridSize = (8,8)
    clahe=cv2.createCLAHE(clipLimit = clipLimit, tileGridSize = tileGridSize)
    img_new_1 = clahe.apply(image[:,:,0])
    img_new_2 = clahe.apply(image[:,:,1])
    img_new_3 = clahe.apply(image[:,:,2])


    img=cv2.merge([img_new_1,img_new_2,img_new_3])
    
    return img

In [None]:
def preprocess_crop_image(pathCrop,new_pathCrop,new_size):
 
    if not os.path.exists(new_pathCrop):
        os.makedirs(new_pathCrop)
    lstimgs = [l for l in os.listdir(pathCrop) if l != '.DS_Store']

    for item in tqdm(lstimgs):
        if not item.startswith('.') and os.path.isfile(os.path.join(pathCrop, item)):
            img = cv2.imread(pathCrop+item)
            img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        
            img = circle_crop(img) 
            img=CLAHE(img)
            cv2.imwrite(new_pathCrop+item, cv2.resize(img, (new_size,new_size)))

In [None]:
preprocess_crop_image(pathCrop='C:/Users/fredd/Downloads/aptos2019-blindness-detection/train-resized-256/',new_pathCrop='C:/Users/fredd/Downloads/aptos2019-blindness-detection/train-final-256/',new_size=256)

In [None]:
preprocess_crop_image(pathCrop='C:/Users/fredd/Downloads/aptos2019-blindness-detection/test-resized-256/',new_pathCrop='C:/Users/fredd/Downloads/aptos2019-blindness-detection/test-final-256/',new_size=256)

In [None]:
def images_per_class(class_labels,n, data,is_preprocess = False):

    """ 
    This function plots "num" number of images per each class
    Args : class_labels - (Series Object) which contains the class_labels of train or validation sets.
           n - (Integer)  number of images to be plot per each class
           data - (Series Object) which contains the id_code of each point in data sets.
           is_preprocess - (boolean) whether to perform image processing(True) on image or not(False by Default) 
    Output : None - this function doesn't return anything.
    """

    # class_labels num data_x data_y
    labels = list(set(class_labels))
    classes = ['No DR','Mild DR','Moderate DR','Severe DR','Proliferative DR']
    iter=0
    for i in labels:
        j=1
        plt.figure(figsize=(10,5))
        for row in range(len(data)):
            if class_labels.iloc[row] == i:
                if is_preprocess == False:plt.subplot(1,n,j)
                else: plt.subplot(1,n*2,j)
                img = cv2.imread('C:/Users/fredd/Downloads/aptos2019-blindness-detection/train_images/'+data.iloc[row]+'.png')
                img1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                plt.imshow(img1)
                plt.axis('equal')
                plt.title("Class = {} ({})".format(class_labels.iloc[row],classes[iter]))
                j+=1
                if is_preprocess == True:
                    
                    img = cv2.imread('C:/Users/fredd/Downloads/aptos2019-blindness-detection/train-final-256/'+data.iloc[row]+'.png')
                    plt.subplot(1,n*2,j)
                    plt.imshow(img)
                    plt.axis('equal')
                    plt.title('Image After Pre-Processing')
                    j+=1
            if is_preprocess == False and j>n: break
            elif is_preprocess == True and j>n*2: break
        iter+=1
        #plt.savefig('images_pre/'+str(iter)+'.png',dpi=96)
        plt.show()

In [None]:
images_per_class(train_df['diagnosis'],3,train_df['id_code'],False)  #printing 5 random images per each class.

In [None]:
images_per_class(train_df['diagnosis'],1,train_df['id_code'],True)  #printing 3 random images per each class.

In [None]:
def images_per_class_test(class_labels,n, data,is_preprocess = False):

    """ 
    This function plots "num" number of images per each class
    Args : class_labels - (Series Object) which contains the class_labels of train or validation sets.
           n - (Integer)  number of images to be plot per each class
           data - (Series Object) which contains the id_code of each point in data sets.
           is_preprocess - (boolean) whether to perform image processing(True) on image or not(False by Default) 
    Output : None - this function doesn't return anything.
    """

    # class_labels num data_x data_y
    labels = list(set(class_labels))
    classes = ['No DR','Mild DR','Moderate DR','Severe DR','Proliferative DR']
    iter=0
    for i in labels:
        j=1
        plt.figure(figsize=(10,5))
        for row in range(len(data)):
            if class_labels.iloc[row] == i:
                if is_preprocess == False:plt.subplot(1,n,j)
                else: plt.subplot(1,n*2,j)
                plt.subplots_adjust(top = 0.5, bottom=0.01, wspace=0.1)#
                img = cv2.imread('C:/Users/fredd/Downloads/aptos2019-blindness-detection/train_images/'+data.iloc[row]+'.png')
                img1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                plt.imshow(img1)
                plt.axis('scaled')
                plt.title("Class = {} ({})".format(class_labels.iloc[row],classes[iter]))
                j+=1
                if is_preprocess == True:
                    
                    img = cv2.imread('C:/Users/fredd/Downloads/aptos2019-blindness-detection/train-final-256/'+data.iloc[row]+'.png')
                    plt.subplot(1,n*2,j)
                    plt.imshow(img)
                    plt.axis('scaled')
                    plt.title('Image After Pre-Processing')
                    j+=1
            if is_preprocess == False and j>n: break
            elif is_preprocess == True and j>n*2: break
        iter+=1
        #plt.savefig('images_pre/test3_'+str(iter)+'.png',dpi=96)
        plt.show()

In [None]:
images_per_class_test(train_df['diagnosis'],1,train_df['id_code'],True)  #printing 3 random images per each class

In [None]:
images_per_class_test(train_df['diagnosis'],1,train_df['id_code'],True)

In [None]:
images_per_class_test(train_df['diagnosis'],1,train_df['id_code'],True)

In [None]:
images_per_class_test(train_df['diagnosis'],1,train_df['id_code'],True)

In [None]:
def images_per_class_pre(class_labels,n, data,is_preprocess = False):

    """ 
    This function plots "num" number of images per each class
    Args : class_labels - (Series Object) which contains the class_labels of train or validation sets.
           n - (Integer)  number of images to be plot per each class
           data - (Series Object) which contains the id_code of each point in data sets.
           is_preprocess - (boolean) whether to perform image processing(True) on image or not(False by Default) 
    Output : None - this function doesn't return anything.
    """

    # class_labels num data_x data_y
    labels = list(set(class_labels))
    classes = ['No DR','Mild DR','Moderate DR','Severe DR','Proliferative DR']
    iter=0
    for i in labels:
        j=1
        plt.figure(figsize=(10,5))
        for row in range(len(data)):
            if class_labels.iloc[row] == i:
                if is_preprocess == False:
                    plt.subplot(1,n,j)
                else:
                     plt.subplot(1,n*2,j)
                plt.subplots_adjust(top = 0.5, bottom=0.01, wspace=0.1)#
                img = cv2.imread('C:/Users/fredd/Downloads/aptos2019-blindness-detection/train-final-256/'+data.iloc[row]+'.png')
                img1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                axs[i,j].imshow(img1) #plot the data
                axs[i,j].axis('off')

                axs[i,j].set_title("Class = {} ({})".format(class_labels.iloc[row],classes[iter]))
                # plt.imshow(img1)
                # plt.axis('scaled')
                # plt.title("Class = {} ({})".format(class_labels.iloc[row],classes[iter]))
                j+=1
                # if is_preprocess == True:
                #     # obj = ImageProcessing(width,height,n_channels,sigmaX=30)
                #     #image = preprocess_image()
                #     img = cv2.imread('G:/Datasets/Diabetic/APTOS_DEC/train-final-256/'+data.iloc[row]+'.png')
                #     plt.subplot(1,n*2,j)
                #     plt.imshow(img)
                #     plt.axis('scaled')
                #     plt.title('Image After Pre-Processing')
                #     j+=1
            if is_preprocess == False and j>n: break
            # elif is_preprocess == True and j>n*2: break
        iter+=1

        #plt.savefig('images_pre/pre_'+str(iter)+'.png',dpi=96)
        plt.show()

In [None]:
width = 5
height = 2
counter = 0
fig, axs = plt.subplots(height, width, figsize=(15,5))
# get some random image indices from the training set
rand_indices = [randrange(len(train)) for x in range(0,10)]
#rand_indices
path='C:/Users/fredd/Downloads/aptos2019-blindness-detection/train-final-256/'
for im in rand_indices:
    # open image
    image = Image.open(os.path.join(path, train.iloc[im].id_code + '.png'))
    
    # if aug is not None:
    #    image = aug(image=np.array(image))['image']
    
    i = counter // width
    j = counter % width
    axs[i,j].imshow(image) #plot the data
    axs[i,j].axis('off')
    
    diagnosis = train[train['id_code'] == train.iloc[im].id_code].diagnosis.values[0]
    
    axs[i,j].set_title(CLASSS[diagnosis])
    counter += 1
# set suptitle    
plt.suptitle("Images after pre-processing",size=16)    
#fig.savefig('images_pre/preprocess_imgs.png',dpi=96)


plt.show()

In [None]:
width = 5
height = 2
counter = 0
fig, axs = plt.subplots(height, width, figsize=(15,5))
# get some random image indices from the training set
rand_indices = [randrange(len(test)) for x in range(0,10)]
#rand_indices
path='C:/Users/fredd/Downloads/aptos2019-blindness-detection/test-final-256/'
for im in rand_indices:
    # open image
    image = Image.open(os.path.join(path, test.iloc[im].id_code + '.png'))
    
    # if aug is not None:
    #    image = aug(image=np.array(image))['image']
    
    i = counter // width
    j = counter % width
    axs[i,j].imshow(image) #plot the data
    axs[i,j].axis('off')
    
    id_code = test[test['id_code'] == test.iloc[im].id_code].values[0]
    
    # axs[i,j].set_title(CLASSS[id_code])
    counter += 1
# set suptitle    
plt.suptitle("Images after pre-processing",size=16)    
#fig.savefig('images_pre/preprocess_imgs1.png',dpi=96)


plt.show()

In [None]:
width = 5
height = 2
counter = 0
fig, axs = plt.subplots(height, width, figsize=(15,5))
# get some random image indices from the training set
rand_indices = [randrange(len(train)) for x in range(0,10)]
#rand_indices
path='C:/Users/fredd/Downloads/aptos2019-blindness-detection/train-final-256/'
for im in rand_indices:
    # open image
    image = Image.open(os.path.join(path, train.iloc[im].id_code + '.png'))
    
    # if aug is not None:
    #    image = aug(image=np.array(image))['image']
    
    i = counter // width
    j = counter % width
    axs[i,j].imshow(image) #plot the data
    axs[i,j].axis('off')
    
    diagnosis = train[train['id_code'] == train.iloc[im].id_code].diagnosis.values[0]
    
    axs[i,j].set_title(CLASSS[diagnosis])
    counter += 1
# set suptitle    
plt.suptitle("Images after pre-processing",size=16)    
#fig.savefig('images_pre/preprocess_imgs1.png',dpi=96)


plt.show()

In [None]:
import cv2
import matplotlib.pyplot as plt
pts_per_class=2
# print(train.size)
# print(train_df.size)
# print(train1.size)
print(train.shape)
print(train_df.shape)
train1 = pd.read_csv('C:/Users/fredd/Downloads/aptos2019-blindness-detection/train.csv')
print(train1.shape)
class_size=train.pivot_table(index='diagnosis', aggfunc=len).max().max()
train1 = train.groupby(['diagnosis']).apply(lambda x: x.sample(class_size, replace = True)).reset_index(drop = True)
train1 = train1.sample(frac=1).reset_index(drop=True)
print('New Data Size:', train1.shape[0], 'Old Size:', train.shape[0])
#train_df.head()
train1['image'] = [i + '.png' for i in train1['id_code']]
TRAIN_PATH ='C:/Users/fredd/Downloads/aptos2019-blindness-detection/train-final-256/' 
train1['file_path'] = train1['id_code'].map(lambda x: os.path.join(TRAIN_PATH,'{}.png'.format(x)))

# def visualize_imgs(df,pts_per_class,color_scale):
file_path='C:/Users/fredd/Downloads/aptos2019-blindness-detection/train-final-256/'
df = train1.groupby('diagnosis',group_keys = False).apply(lambda train1: train1.sample(pts_per_class))
df = df.reset_index(drop = True)

plt.rcParams["axes.grid"] = False
for pt in range(pts_per_class):
    f, axarr = plt.subplots(1,5,figsize = (15,15))
  
    #axarr[0].set_ylabel("Sample Data Points")
    # [axi.set_axis_off() for axi in axarr.ravel()]
    df_temp = df[df.index.isin([pt + (pts_per_class*0),pt + (pts_per_class*1), pt + (pts_per_class*2),pt + (pts_per_class*3),pt + (pts_per_class*4)])]
    #print(df_temp)
    for i in range(5):
        # if color_scale == 'gray':
        #     img = conv_gray(cv2.imread(df_temp.file_path.iloc[i]))
        #     axarr[i].imshow(img,cmap = color_scale)
        # else:
        axarr[i].imshow(Image.open(df_temp.file_path.iloc[i]).resize((256,256)))
        axarr[i].set_xlabel('Class '+str(df_temp.diagnosis.iloc[i]),size=14)
        axarr[i].set_title('Class '+str(df_temp.diagnosis.iloc[i]),size=14)
plt.savefig('C:/Users/fredd/Downloads/aptos2019-blindness-detection/'+str(pt)+'.png',dpi=96)
plt.show()
# print(train.size)
# print(train_df.size)
# print(train1.size)# f.savefig("images_pre/pre_img_test.png")

In [None]:
import cv2
import matplotlib.pyplot as plt
pts_per_class=2

test1 = pd.read_csv('C:/Users/fredd/Downloads/aptos2019-blindness-detection/test.csv')
test1['image'] = [i + '.png' for i in test1['id_code']]
#print(test1)
TEST_PATH ='C:/Users/fredd/Downloads/aptos2019-blindness-detection/test-final-256/' 
test1['file_path'] = test1['id_code'].map(lambda x: os.path.join(TEST_PATH,'{}.png'.format(x)))
print(test1)
# # def visualize_imgs(df,pts_per_class,color_scale):
# file_path='/Volumes/surya/aptos2019-blindness-detection/test-final-256/'
# #df = test1.apply(lambda test1: test1.sample(pts_per_class))
# #df = test1.groupby('id_code',group_keys = False).apply(lambda test1: test1.sample(pts_per_class))
# df = df.reset_index(drop = True)
# #print(test)
# plt.rcParams["axes.grid"] = False
# for pt in range(pts_per_class):
#     f, axarr = plt.subplots(1,5,figsize = (15,15))

# #    #axarr[0].set_ylabel("Sample Data Points")
#    # [axi.set_axis_off() for axi in axarr.ravel()]
#     df_temp = df[df.index.isin([pt + (pts_per_class*0),pt +(pts_per_class*1), pt + (pts_per_class*2),pt + (pts_per_class*3),pt + (pts_per_class*4)])]
#     print(df_temp)
#     for i in range(5):
#         # if color_scale == 'gray':
#         #     img = conv_gray(cv2.imread(df_temp.file_path.iloc[i]))
#         #     axarr[i].imshow(img,cmap = color_scale)
#         # else:
#         axarr[i].imshow(Image.open(df_temp.file_path.iloc[i]).resize((256,256)))
#         axarr[i].set_xlabel('Class '+str(df_temp.id_code.iloc[i]),size=14)
#         axarr[i].set_title('Class '+str(df_temp.id_code.iloc[i]),size=14)
#     #plt.savefig('images_pre/pre_img_test'+str(pt)+'.png',dpi=96)
#     plt.show()

In [None]:
def plotting(img, title,i):
    """
    This function is used for subplots
    Args: img (numpy.ndarray) - image we need to plot
          title(string) - title of the plot
          i (integer) -  column number
    output: None - this function doesn't return anything.
    """
    plt.subplot(1,5,i)
    plt.imshow(img)
    plt.axis('off')
    plt.title(title)

In [None]:
img = 'C:/Users/fredd/Downloads/aptos2019-blindness-detection/train_images/201f882365d3.png'  #random train image
img = cv2.imread(img)
img1 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(15,5))
plotting(img1,'Before Image Processing:',1)

img1 = crop_image(img1)
plotting(img1,'Step-1: After Cropping ',2)

img1 = cv2.resize(img1, (256,256))
plotting(img1,'Step-2: After Resizing',3)

img1 = circle_crop(img1,)
plotting(img1,'Step-3: After Circle Crop & \n  Gaussian Blur',4)


img = CLAHE(img1)
plotting(img,'Step-4: After CLAHE',5)

In [None]:
img = 'C:/Users/fredd/Downloads/aptos2019-blindness-detection/test_images/003f0afdcd15.png'  #random train image
img = cv2.imread(img)
img1 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(15,5))
plotting(img1,'Before Image Processing:',1)

img1 = crop_image(img1)
plotting(img1,'Step-1: After Cropping ',2)

img1 = cv2.resize(img1, (256,256))
plotting(img1,'Step-2: After Resizing',3)

img1 = circle_crop(img1,)
plotting(img1,'Step-3: After Circle Crop & \n  Gaussian Blur',4)



img = CLAHE(img1)
plotting(img,'Step-4: After CLAHE',5)
#plt.savefig('images_pre/after_img_preprocess.png',dpi=96)

In [None]:
#print("Writing Train Array")

lst_imgs = [l for l in train1['image']]

X_train_256 = np.array([np.array(Image.open('C:/Users/fredd/Downloads/aptos2019-blindness-detection/train-final-256/' + img)) for img in lst_imgs])



print(X_train_256.shape)

In [None]:
#print("Writing test Array")
#test.columns
lst_imgs = [l for l in test1['image']]

X_test_256 = np.array([np.array(Image.open('C:/Users/fredd/Downloads/aptos2019-blindness-detection/test-final-256/' + img)) for img in lst_imgs])


print(X_test_256.shape)


In [None]:
y_train = pd.get_dummies(train_df['diagnosis']).values
print(y_train.shape)

In [None]:
np.save('C:/Users/fredd/Downloads/aptos2019-blindness-detection/y_train.npy',y_train)
np.save('C:/Users/fredd/Downloads/aptos2019-blindness-detection/X_train_256.npy',X_train_256)
np.save('C:/Users/fredd/Downloads/aptos2019-blindness-detection/X_test_256.npy',X_test_256)