In [None]:
import os #for OS utilities
from PIL import Image #import image
import math
import pickle #Save and Load Data
import numpy as np # for linear algebra and matrices operations
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from skimage.io import imread #For reading an image
import matplotlib.pyplot as plt # For Plotting an images
import gc # Garbage Collection for optimized memory allocation
import sklearn
gc.enable() # memory is tight

In [None]:
name_label_dictionary = {
0:  "Nucleoplasm", 
1:  "Nuclear membrane",   
2:  "Nucleoli",   
3:  "Nucleoli fibrillar center" ,  
4:  "Nuclear speckles"   ,
5:  "Nuclear bodies"   ,
6:  "Endoplasmic reticulum",   
7:  "Golgi apparatus"   ,
8:  "Peroxisomes"   ,
9:  "Endosomes"   ,
10:  "Lysosomes"   ,
11:  "Intermediate filaments",   
12:  "Actin filaments"   ,
13:  "Focal adhesion sites",   
14:  "Microtubules"   ,
15:  "Microtubule ends",   
16:  "Cytokinetic bridge",   
17:  "Mitotic spindle"   ,
18:  "Microtubule organizing center" ,  
19:  "Centrosome"   ,
20:  "Lipid droplets",   
21:  "Plasma membrane",   
22:  "Cell junctions"  , 
23:  "Mitochondria"   ,
24:  "Aggresome"   ,
25:  "Cytosol",
26:  "Cytoplasmic bodies",   
27:  "Rods & rings" 
}

In [None]:
image_dataframe = pd.read_csv('train.csv') #Create a Data Frame to store the images along with the labels
print(image_dataframe.head())
print('The shape of datatframe is: ',image_dataframe.shape)

In [None]:
count_combination_dataframe = image_dataframe['Target'].value_counts()
print(count_combination_dataframe.head())
print(len(image_dataframe['Target'].unique()))

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer
encoder = MultiLabelBinarizer()
encoder.fit_transform([(0,), (1,),(2,),(3,),(4,),(5,),(6,),(7,),(8,),(9,),(10,), (11,), (12,), (13,), (14,), (15,), (16,),(17,), (18,), (19,), (20,), (21,), (22,), (23,), (24,), (25,), (26,),(27,)])

In [None]:
encoder.transform([(22,5)])

In [None]:
from sklearn.model_selection import train_test_split
samples = list(zip(image_dataframe['Id'],image_dataframe['Target']))
train_samples, validation_samples = train_test_split(samples,test_size=0.15)

In [None]:
def generator(data,batch_size=8):
    images_path_length = len(data)
    while 1:
        for off in range(0,images_path_length,batch_size):
            images_list = data[off:off+batch_size]
            rgb_arr=[]
            label=[]
            for j in images_list:
                red=[]
                green=[]
                blue=[]
                yellow=[]
                red = np.array(Image.open("train/"+j[0]+"_red.png").convert("L"))/255
                green = np.array(Image.open("train/"+j[0]+"_green.png").convert("L"))/255
                blue = np.array(Image.open("train/"+j[0]+"_blue.png").convert("L"))/255
                yellow = np.array(Image.open("train/"+j[0]+"_yellow.png").convert("L"))/255
                rgb_arr.append(np.stack([red/2+yellow/2,green/2+yellow/2,blue], -1))
                label.append(encoder.transform([tuple(map(int,j[1].split()))]))
            yield np.array(rgb_arr),np.array(label).reshape(len(label),28)#,batch_size,27)

In [None]:
train_images_gen = generator(train_samples)
val_images_gen = generator(validation_samples)

In [None]:
from keras.applications.inception_v3 import InceptionV3
from keras.layers import Dense
from keras.layers import Activation
from keras import Model
from keras.layers import Input


model = InceptionV3(include_top = True, weights = None, classes=28)

In [None]:
model.summary()

In [None]:
model.layers.pop()
x = model.layers[-1].output
x = Dense(28, activation='sigmoid', name='predictions')(x)
train_model = Model(input=model.input,output=x)

In [None]:
train_model.summary()

In [None]:
train_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
train_model.fit_generator(train_images_gen,epochs=2,verbose=1,steps_per_epoch=len(train_samples)/8, validation_data=val_images_gen,validation_steps=len(validation_samples)/8)

In [None]:
train_model.save_weights('Human_atlas.h5')

In [None]:
import matplotlib.pyplot as plt

plt.plot(train_model.history.history['loss'])
plt.plot(train_model.history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
train_model.load_weights('Human_atlas.h5')

In [None]:
test_df=pd.read_csv('sample_submission.csv')

In [None]:
def predict_generator(data,batch_size=8):
    images_path_length = len(data)
    while 1:
        for off in range(0,images_path_length,batch_size):
            images_list = data[off:off+batch_size]
            rgb_arr=[]
            #label=[]
            for j in images_list:
                red=[]
                green=[]
                blue=[]
                yellow=[]
                red = np.array(Image.open("test/"+j[0]+"_red.png").convert("L"))/255
                green = np.array(Image.open("test/"+j[0]+"_green.png").convert("L"))/255
                blue = np.array(Image.open("test/"+j[0]+"_blue.png").convert("L"))/255
                yellow = np.array(Image.open("test/"+j[0]+"_yellow.png").convert("L"))/255
                rgb_arr.append(np.stack([red/2+yellow/2,green/2+yellow/2,blue], -1))
                #label.append(encoder.transform([tuple(map(int,j[1].split()))]))
            yield np.array(rgb_arr)#,np.array(label).reshape(len(label),28)

In [None]:
test_samples = list(zip(test_df['Id']))
test_images_gen = predict_generator(test_samples)

In [None]:
results = train_model.predict_generator(test_images_gen,steps=len(test_samples)/8,verbose=1)

In [None]:
print(results.shape)

In [None]:
predictions=[]
for i in results:
    label_predict=np.arange(28)[i >=0.2]
    predictions.append(' '.join(str(l) for l in label_predict))

In [None]:
test_df['Predicted'] = predictions

In [None]:
test_df.to_csv('submission.csv',index=False)

In [None]:
rgb_arr_check=[]
red_check = np.array(Image.open("train/"+'008761b4-bbad-11e8-b2ba-ac1f6b6435d0'+"_red.png").convert("L"))/255
green_check = np.array(Image.open("train/"+'008761b4-bbad-11e8-b2ba-ac1f6b6435d0'+"_green.png").convert("L"))/255
blue_check = np.array(Image.open("train/"+'008761b4-bbad-11e8-b2ba-ac1f6b6435d0'+"_blue.png").convert("L"))/255
yellow_check = np.array(Image.open("train/"+'008761b4-bbad-11e8-b2ba-ac1f6b6435d0'+"_yellow.png").convert("L"))/255
rgb_arr_check.append(np.stack([red_check/2+yellow_check/2,green_check/2+yellow_check/2,blue_check], -1))

In [None]:
tmp=train_model.predict(np.array(rgb_arr_check))

In [None]:
print(tmp)
np.arange(28)[tmp[0] >=0.2]

for i in range(0,math.ceil(len(image_dataframe['Id'])/32)):
    values=next(train_images_gen)
    print(values[0].shape)
    #with open('rgb_images_1.pkl','ab') as f:
        #pickle.dump(values[0],f)

with open('rgb_norm.pkl','wb') as f:
    pickle.dump(rgb_arr,f)

rgb_arr=[]
for i in range(0,math.ceil(len(image_dataframe['Id'])/32)):
    values=next(images_gen)
    for j in range(0,32):
        rgb_arr.append(np.stack([values[0][j]/2+values[3][j]/2,values[1][j]/2+values[3][j]/2,values[2][j]], -1))
    print(len(rgb_arr),rgb_arr[0].shape)