In [1]:
import Augmentor as aug
import glob
import os
import numpy as np
import cv2
import PIL

In [2]:
from Augmentor.Operations import Operation

class Lightning(Operation):
    def __init__(self, probability, intensity_low=0.7, intensity_high=1.2):
        Operation.__init__(self, probability)
        self.intensity_low = intensity_low
        self.intensity_high = intensity_high
        
    def perform_operation(self, images):
        for i, image in enumerate(images):
            image = np.array(image.convert('RGB'))
            row, col, _ = image.shape
            light_intensity = np.random.randint(
                int(self.intensity_low * 100),
                int(self.intensity_high * 100)
            )
            light_intensity /= 100
            
            gaussian = 100 * np.random.random((row, col, 1))
            gaussian = np.array(gaussian, dtype=np.uint8)
            gaussian = np.concatenate((gaussian, gaussian, gaussian), axis=2)
            image = cv2.addWeighted(image, light_intensity, gaussian, 0.25, 0)
        
            image = PIL.Image.fromarray(image)
            images[i] = image
        return images

In [3]:
MULTIPLIER = 29

In [4]:
directory = '../Dataset/FINALFINAL/*'

In [5]:
folders = []
for f in glob.glob(directory):
    if os.path.isdir(f):
        folders.append(os.path.abspath(f))

print('Classes found {}'.format([os.path.split(x)[1] for x in folders]))

Classes found ['basalcellcaricinoma', 'lentigo', 'malignantmelanoma', 'pigmentednevus', 'seborrheickeratosis', 'wart']


In [6]:
pipelines = {}
for folder in folders:
    #print(os.path.split(folder)[1], folder)
    pipelines[os.path.split(folder)[1]] = (aug.Pipeline(source_directory=folder,
                                                       output_directory='inception_out',
                                                       save_format='jpg'
                                                       )
                                          )

Initialised with 1561 image(s) found.
Output directory set to /Users/DaniloBarros/Documents/FGA/2018-1/TCC1/Dataset/FINALFINAL/basalcellcaricinoma/inception_out.Initialised with 69 image(s) found.
Output directory set to /Users/DaniloBarros/Documents/FGA/2018-1/TCC1/Dataset/FINALFINAL/lentigo/inception_out.Initialised with 228 image(s) found.
Output directory set to /Users/DaniloBarros/Documents/FGA/2018-1/TCC1/Dataset/FINALFINAL/malignantmelanoma/inception_out.Initialised with 626 image(s) found.
Output directory set to /Users/DaniloBarros/Documents/FGA/2018-1/TCC1/Dataset/FINALFINAL/pigmentednevus/inception_out.Initialised with 897 image(s) found.
Output directory set to /Users/DaniloBarros/Documents/FGA/2018-1/TCC1/Dataset/FINALFINAL/seborrheickeratosis/inception_out.Initialised with 435 image(s) found.
Output directory set to /Users/DaniloBarros/Documents/FGA/2018-1/TCC1/Dataset/FINALFINAL/wart/inception_out.

In [7]:
classes_count = []
for p in pipelines.values():
    print("Class '{}' has {} samples".format(
        p.augmentor_images[0].class_label,
        len(p.augmentor_images)
    ))
    classes_count.append(len(p.augmentor_images))

Class 'basalcellcaricinoma' has 1561 samples
Class 'lentigo' has 69 samples
Class 'malignantmelanoma' has 228 samples
Class 'pigmentednevus' has 626 samples
Class 'seborrheickeratosis' has 897 samples
Class 'wart' has 435 samples


In [8]:
lightning = Lightning(probability=0.5)

for p in pipelines.values():
    p.rotate(probability=0.5, max_left_rotation=10, max_right_rotation=10)
    p.zoom_random(probability=0.4, percentage_area=0.9)
    p.flip_left_right(probability=0.7)
    p.flip_top_bottom(probability=0.5)
    p.random_distortion(probability=0.8, grid_width=5, grid_height=5, magnitude=15)
    # p.histogram_equalisation(0.4) # Verificar se faz sentido
    p.add_operation(lightning)
    p.resize(probability=1.0, width=299, height=299)

In [9]:
for p in pipelines.values():
    diff = max(classes_count) - len(p.augmentor_images)
    p.sample((len(p.augmentor_images) + diff)*MULTIPLIER + diff)

                                                                                                        

In [3]:
(69 + (1561-69)) * 29 + (1561-69) + 69

46830

In [None]:
import matplotlib.pyplot as plt
def add_gaussian_noise(X_img):
    X_img = np.array(X_img.convert('RGB'))
    row, col, _ = X_img.shape
    
#     X_img = cv2.cvtColor(X_img, cv2.COLOR_RGB2YUV)
#     X_img[:, :, 0] = cv2.equalizeHist(X_img[:, :, 0])
#     gaussian_img = cv2.cvtColor(X_img, cv2.COLOR_YUV2RGB)
    
    gaussian = np.random.random((row, col, 1))
    gaussian = (100 * gaussian) * 0.25
    gaussian = np.array(gaussian, dtype=np.uint8)
    gaussian = np.concatenate((gaussian, gaussian, gaussian), axis = 2)
    gaussian_img = cv2.addWeighted(X_img, 1.2, gaussian, 0.25, 0)
    
    return gaussian_img

path = '/Users/DaniloBarros/Documents/FGA/2018-1/TCC1/data-augmentation/complete_mednode_dataset/melanoma/2128172.jpg'
# X_imgs = cv2.imread(path)
# X_imgs = cv2.cvtColor(X_imgs, cv2.COLOR_BGR2RGB)
X_imgs = PIL.Image.open(path)
gaussian_noise_imgs = add_gaussian_noise(X_imgs)
plt.imshow(gaussian_noise_imgs)
plt.show()
plt.imshow(X_imgs)
plt.show()

In [1]:
import glob
import os
import random

In [5]:
path = '/home/danilo/Documents/FGA/2018-1/TCC1/Datasets/Atlas_5k'

In [6]:
os.path.sep.join(path.split(os.path.sep)[:-1]) + os.path.sep + 'aloooo'

'/home/danilo/Documents/FGA/2018-1/TCC1/Datasets/aloooo'

In [67]:
os.path.sep.join(path.split(os.path.sep)[-2:])

'Dataset/Dataset_blend'

In [13]:
label_dict = {
    'basalcellcarcinoma': 0,
    'lentigo': 1,
    'malignantmelanoma': 2,
    'pigmentednevus': 3,
    'seborrheickeratosis': 4,
    'wart': 5
}

In [8]:
dataset = []
for r, dirs, files in os.walk(path):
    if len(dirs) > 0:
        labels = dirs
        continue  # use only leaf folders
    files_full_path = ['{}/{}'.format(r, f) for f in files]
    directory_name = r.split(os.path.sep)[-1]
    dataset.append((files_full_path, directory_name))

In [14]:
X = [(img, label) for ndataset, label in dataset for img in ndataset]
y = [label_dict[label] for _, label in X]

In [63]:
oi = dict([(l, i) for i, l in enumerate(labels)])
oi

{'ak': 10,
 'basalcellcaricinoma': 5,
 'dermatofibroma': 11,
 'hemangioma': 3,
 'intraepithelial_carcinoma': 0,
 'lentigo': 7,
 'melanoma': 4,
 'naevus': 8,
 'pyogenic_granuloma': 2,
 'scc': 9,
 'seborrheickeratosis': 1,
 'wart': 6}

In [69]:
oi = 1
alo = 2
uhu = 3

In [79]:
oi = [(1, 2, 3, 4), (5,6,7,8)]
','.join([str(i) for i in oi[0]]) + '\n'

'1,2,3,4\n'

In [35]:
X = [(img, label) for ndataset, label in dataset for img in ndataset]

In [36]:
import random
random.shuffle(X)

In [3]:
from sklearn.model_selection import train_test_split

In [39]:
from sklearn.model_selection import train_test_split
import numpy as np

X_train, X_test, _, _ = train_test_split(X, np.zeros(len(X)), train_size=0.8)



In [44]:
len(X_test)

2647

In [62]:
label_dict = {
    'pigmentednevus':0,
    'seborrheickeratosis':1,
    'basalcellcarcinoma':2,
    'malignantmelanoma':3,
    'wart':4,
    'lentigo':5
}

[label_dict[label] for _, label in X_test]

[5,
 5,
 4,
 5,
 5,
 4,
 0,
 0,
 2,
 2,
 0,
 2,
 1,
 2,
 4,
 0,
 2,
 3,
 5,
 3,
 5,
 1,
 5,
 5,
 5,
 4,
 0,
 3,
 2,
 3,
 2,
 2,
 4,
 3,
 2,
 4,
 1,
 5,
 1,
 3,
 2,
 2,
 3,
 5,
 0,
 4,
 5,
 5,
 1,
 4,
 0,
 5,
 4,
 3,
 0,
 1,
 2,
 0,
 2,
 1,
 0,
 5,
 3,
 5,
 5,
 2,
 3,
 4,
 5,
 1,
 2,
 1,
 5,
 0,
 0,
 5,
 5,
 4,
 5,
 0,
 5,
 2,
 4,
 5,
 1,
 2,
 4,
 5,
 4,
 4,
 1,
 4,
 5,
 4,
 1,
 4,
 2,
 3,
 4,
 3,
 0,
 0,
 3,
 4,
 4,
 1,
 5,
 4,
 3,
 1,
 0,
 1,
 3,
 0,
 2,
 3,
 0,
 0,
 3,
 0,
 0,
 5,
 3,
 4,
 4,
 0,
 2,
 2,
 3,
 1,
 2,
 4,
 5,
 3,
 2,
 5,
 1,
 1,
 2,
 4,
 5,
 5,
 5,
 2,
 5,
 5,
 3,
 4,
 4,
 0,
 4,
 3,
 3,
 2,
 3,
 1,
 1,
 0,
 3,
 1,
 5,
 0,
 1,
 5,
 3,
 5,
 1,
 3,
 0,
 2,
 1,
 3,
 0,
 5,
 5,
 0,
 0,
 5,
 2,
 0,
 0,
 3,
 2,
 4,
 4,
 1,
 4,
 2,
 4,
 0,
 2,
 3,
 3,
 0,
 0,
 2,
 4,
 2,
 5,
 5,
 2,
 0,
 3,
 5,
 5,
 0,
 0,
 4,
 4,
 0,
 1,
 3,
 1,
 5,
 0,
 2,
 4,
 5,
 3,
 0,
 1,
 1,
 3,
 0,
 3,
 4,
 2,
 5,
 1,
 1,
 4,
 0,
 2,
 3,
 3,
 0,
 1,
 3,
 0,
 3,
 4,
 4,
 5,
 2,
 1,
 0,
 0,
 2,
 1,
 4,


In [89]:
'{:.2f}'.format(3.1239817239)

'3.12'

In [5]:
import lmdb

In [74]:
import time

In [75]:
t0 = time.time()

In [76]:
t0

1522875750.693052

In [78]:
time.time() - t0

38.40435028076172

In [85]:
print('o', end='\r')


o

In [120]:
path = '/home/danilo/Documents/FGA/2018-1/TCC1/Dataset/test-asan_test_biopsy/prediction.csv'
path_test_train = '/home/danilo/Documents/FGA/2018-1/TCC1/Dataset/test-train/prediction.csv'

In [6]:
import pandas as pd

In [150]:
df = pd.read_csv(path)
df_test_train = pd.read_csv(path_test_train)

In [140]:
label_dict = {
    'ak': 10,
    'basalcellcarcinoma': 5,
    'dermatofibroma': 11,
    'hemangioma': 3,
    'intraepithelial_carcinoma': 0,
    'lentigo': 7,
    'melanoma': 4,
    'naevus': 8,
    'pyogenic_granuloma': 2,
    'scc': 9,
    'seborrheickeratosis': 1,
    'wart': 6
}
import operator
label_array = [l for l,_ in sorted(label_dict.items(), key=operator.itemgetter(1))]

In [138]:
import operator
[l for l,_ in sorted(label_dict.items(), key=operator.itemgetter(1))]

['intraepithelial_carcinoma',
 'seborrheickeratosis',
 'pyogenic_granuloma',
 'hemangioma',
 'melanoma',
 'basalcellcarcinoma',
 'wart',
 'lentigo',
 'naevus',
 'scc',
 'ak',
 'dermatofibroma']

In [123]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, f1_score

In [141]:
oi = confusion_matrix(df['true'], df['prob'])
oi = pd.DataFrame(oi, index=label_array, columns=label_array)
oi

Unnamed: 0,intraepithelial_carcinoma,seborrheickeratosis,pyogenic_granuloma,hemangioma,melanoma,basalcellcarcinoma,wart,lentigo,naevus,scc,ak,dermatofibroma
intraepithelial_carcinoma,23,0,0,14,5,5,17,6,0,27,9,0
seborrheickeratosis,5,26,0,3,11,4,26,11,3,2,7,1
pyogenic_granuloma,0,0,13,12,4,0,3,0,0,5,0,0
hemangioma,3,0,2,51,6,2,6,1,2,5,0,5
melanoma,3,0,5,2,34,1,2,2,1,9,0,0
basalcellcarcinoma,4,3,2,8,6,54,11,1,0,10,7,4
wart,3,5,0,5,6,1,168,3,0,3,1,3
lentigo,2,2,0,1,2,0,0,38,0,0,3,1
naevus,3,20,0,27,35,4,53,9,12,10,0,62
scc,7,1,4,8,7,9,10,2,0,64,10,0


In [130]:
accuracy_score(df['true'], df['prob'])

0.45689655172413796

In [105]:
oi[6].sum()

198

In [106]:
80/198

0.40404040404040403

In [151]:
oi = confusion_matrix(df_test_train['true'], df_test_train['prob'], labels=np.arange(len(label_array)))
oi = pd.DataFrame(oi, index=label_array, columns=label_array)
oi

Unnamed: 0,intraepithelial_carcinoma,seborrheickeratosis,pyogenic_granuloma,hemangioma,melanoma,basalcellcarcinoma,wart,lentigo,naevus,scc,ak,dermatofibroma
intraepithelial_carcinoma,15,0,0,0,1,0,2,0,0,2,0,0
seborrheickeratosis,0,10,0,0,0,1,7,0,2,0,0,0
pyogenic_granuloma,0,0,10,1,6,1,0,0,0,2,0,0
hemangioma,2,0,1,11,3,0,2,0,1,0,0,0
melanoma,0,0,0,1,15,2,1,1,0,0,0,0
basalcellcarcinoma,0,0,0,2,1,12,2,1,0,0,0,2
wart,0,0,0,0,1,1,18,0,0,0,0,0
lentigo,0,0,0,1,2,0,1,12,3,0,0,1
naevus,0,3,0,0,0,2,1,1,13,0,0,0
scc,1,0,0,0,0,0,1,0,0,18,0,0


In [152]:
accuracy_score(df_test_train['true'], df_test_train['prob'])

0.6625