In [28]:
import os
import sys
import gc
import time
import random
import cv2
import glob
import requests
import json
import math
import re
import hashlib
import psutil

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from functools import partial
from collections import Counter
from PIL import Image
from multiprocessing import cpu_count
from tqdm import tqdm
from multiprocessing import Pool
from statistics import median
from joblib import Parallel, delayed

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler

from fastai import *
from fastai.vision import *
from fastai.callbacks import *

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score

In [16]:
def crop_image_from_gray(image, tol=8):
    if image.ndim == 2:
        mask = image>told
        return image[np.ix_(mask.any(1),mask.any(0))]
    elif image.ndim== 3:
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        mask = gray_image>tol        
        check_shape = image[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0):
            return image
        else:
            image1=image[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            image2=image[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            image3=image[:,:,2][np.ix_(mask.any(1),mask.any(0))]
            image = np.stack([image1,image2,image3],axis=-1)
        return image

def save(image_name, path):
    image_size = 300
    image = cv2.imread(path)
    image = crop_image_from_gray(image)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    height, width, depth = image.shape
    rate = height / width
    height = int(image_size * rate)
    width = image_size
    image = cv2.resize(image, (height, width))
    image = cv2.addWeighted (image, 4, cv2.GaussianBlur(image , (0,0) , 30) , -4, 128) 
    
    largest_side = np.max((height, width))
    image = cv2.resize(image, (image_size, largest_side))

    height, width, depth = image.shape

    x = width // 2
    y = height // 2
    r = np.amin((x, y))

    circle_image = np.zeros((height, width), np.uint8)
    cv2.circle(circle_image, (x, y), int(r), 1, thickness=-1)
    image = cv2.bitwise_and(image, image, mask=circle_image)
    image = crop_image_from_gray(image)
    cv2.imwrite(os.path.join('..', 'output', 'retina', f'{image_name}.png'), image)

In [17]:
pre_df = pd.read_csv(os.path.join('..', 'input', 'retinopathy-train-2015', 'trainLabels.csv'))
image_name_list = os.listdir(os.path.join('..', 'input', 'retinopathy-train-2015', 'rescaled_train_896'))
image_name_path_list = [os.path.join('..', 'input', 'retinopathy-train-2015', 'rescaled_train_896', f'{image}') for image in image_name_list]
pre_df['path'] = image_name_path_list

In [18]:
base_image_dir = os.path.join('..', 'input')
train_dir = os.path.join(base_image_dir, 'train_images')
train_df = pd.read_csv(os.path.join(base_image_dir, 'train.csv'))
train_df['path'] = train_df['id_code'].map(lambda x: os.path.join(train_dir, '{}.png'.format(x)))

In [24]:
base_image_dir = os.path.join('..', 'input')
test_dir = os.path.join(base_image_dir, 'test_images')
test_df = pd.read_csv(os.path.join(base_image_dir, 'test.csv'))
test_df['path'] = test_df['id_code'].map(lambda x: os.path.join(test_dir, '{}.png'.format(x)))

In [26]:
def save(image_name, path):
    image_size = 300
    image = cv2.imread(path)
    image = crop_image_from_gray(image)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    height, width, depth = image.shape
    rate = height / width
    height = int(image_size * rate)
    width = image_size
    image = cv2.resize(image, (height, width))
    image = cv2.addWeighted (image, 4, cv2.GaussianBlur(image , (0,0) , 30) , -4, 128) 
    
    largest_side = np.max((height, width))
    image = cv2.resize(image, (image_size, largest_side))

    height, width, depth = image.shape

    x = width // 2
    y = height // 2
    r = np.amin((x, y))

    circle_image = np.zeros((height, width), np.uint8)
    cv2.circle(circle_image, (x, y), int(r), 1, thickness=-1)
    image = cv2.bitwise_and(image, image, mask=circle_image)
    image = crop_image_from_gray(image)
    cv2.imwrite(os.path.join('..', 'output', 'test_retina', f'{image_name}.png'), image)

In [19]:
for (idx, row) in tqdm(train_df.iterrows()):
    save(row['id_code'], row['path'])

3662it [13:51,  3.98it/s]


In [22]:
for (idx, row) in tqdm(pre_df.iterrows()):
    save(row['image'], row['path'])

35126it [1:00:32,  8.86it/s]


In [27]:
for (idx, row) in tqdm(test_df.iterrows()):
    save(row['id_code'], row['path'])


0it [00:00, ?it/s][A
2it [00:00, 11.10it/s][A
4it [00:00, 11.37it/s][A
6it [00:00, 11.38it/s][A
7it [00:00,  7.49it/s][A
8it [00:00,  7.64it/s][A
10it [00:01,  8.58it/s][A
11it [00:01,  6.60it/s][A
12it [00:01,  7.11it/s][A
14it [00:01,  8.07it/s][A
15it [00:01,  6.59it/s][A
16it [00:02,  5.18it/s][A
18it [00:02,  6.22it/s][A
20it [00:02,  6.96it/s][A
21it [00:02,  7.65it/s][A
22it [00:02,  8.08it/s][A
24it [00:02,  8.92it/s][A
26it [00:02,  9.70it/s][A
28it [00:03,  8.00it/s][A
30it [00:03,  8.80it/s][A
32it [00:03,  9.59it/s][A
34it [00:03, 10.07it/s][A
36it [00:04, 10.63it/s][A
38it [00:04, 10.67it/s][A
40it [00:04, 11.07it/s][A
42it [00:04,  8.36it/s][A
43it [00:05,  5.90it/s][A
45it [00:05,  6.03it/s][A
47it [00:05,  6.14it/s][A
49it [00:05,  7.05it/s][A
50it [00:06,  6.00it/s][A
52it [00:06,  7.03it/s][A
53it [00:06,  7.64it/s][A
55it [00:06,  8.59it/s][A
56it [00:06,  8.91it/s][A
58it [00:06,  9.07it/s][A
59it [00:06,  8.57it/s][A
61it [00:0

In [31]:
len(os.listdir('../input/retinopathy-train-2015/rescaled_train_896'))

32541

In [33]:
len(os.listdir('../output/previous_retina'))

35128

In [37]:
pre_df = pd.DataFrame(os.listdir('../input/retinopathy-train-2015/rescaled_train_896'), columns=['image'])

In [57]:
pre_df['image'] = pre_df['image'].apply(lambda x: x.replace('.png', ''))

In [58]:
pre_df.head()

Unnamed: 0,image
0,7297_right
1,7774_right
2,23523_left
3,31620_right
4,38701_right


In [59]:
df = pd.read_csv('../input/retinopathy-train-2015/trainLabels.csv')

In [60]:
df.head()

Unnamed: 0,image,level
0,10_left,0
1,10_right,0
2,13_left,0
3,13_right,0
4,15_left,1


In [61]:
new_df = pd.merge(pre_df, df, on='image', how='inner')

In [62]:
new_df.shape

(32540, 2)

In [63]:
new_df.head()

Unnamed: 0,image,level
0,7297_right,0
1,7774_right,0
2,23523_left,0
3,31620_right,0
4,38701_right,0


In [64]:
new_df['level'].value_counts()

0    23998
2     4849
1     2282
3      787
4      624
Name: level, dtype: int64

In [73]:
temp_0 = pd.read_csv('temp_0.csv')
temp_0.shape

(3773, 5)

In [75]:
temp_2 = new_df[new_df['level'] == 2].sample(3000, replace=False)

In [76]:
temp = new_df[(new_df['level'] != 2) & (new_df['level'] != 0)]

In [78]:
temp.shape

(3693, 2)

In [79]:
temp = pd.concat([temp, temp_0], axis=0).reset_index(drop=True)
temp = pd.concat([temp, temp_2], axis=0).reset_index(drop=True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


In [80]:
temp.shape

(10466, 5)

In [81]:
temp.head()

Unnamed: 0,brightness,image,level,path,preprocess_brightness
0,,33270_left,4,,
1,,24873_left,1,,
2,,43379_left,4,,
3,,7859_right,3,,
4,,38407_left,4,,


In [83]:
temp.drop(['path', 'brightness', 'preprocess_brightness'], axis=1, inplace=True)

In [84]:
temp.head()

Unnamed: 0,image,level
0,33270_left,4
1,24873_left,1
2,43379_left,4
3,7859_right,3
4,38407_left,4


In [85]:
temp.to_csv('resample_previous.csv')