In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from keras.models import Model
from keras.layers import Input
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.losses import binary_crossentropy
from keras.optimizers import Adam
from keras.callbacks import Callback
from keras import backend
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Optimizer
import keras

import cv2
import albumentations
import segmentation_models as sm
import os


In [9]:
path = '../Capstone/understanding_cloud_organization/'
train_df = pd.read_csv(path + 'train.csv')
train_df.head()

Unnamed: 0,Image_Label,EncodedPixels
0,0011165.jpg_Fish,264918 937 266318 937 267718 937 269118 937 27...
1,0011165.jpg_Flower,1355565 1002 1356965 1002 1358365 1002 1359765...
2,0011165.jpg_Gravel,
3,0011165.jpg_Sugar,
4,002be4f.jpg_Fish,233813 878 235213 878 236613 878 238010 881 23...


In [4]:
train_df['image'] = train_df['Image_Label'].apply(lambda x: x.split('_')[0])
train_df['class'] = train_df['Image_Label'].apply(lambda x: x.split('_')[1])
train_df['masked'] = ~ train_df['EncodedPixels'].isna()

In [5]:
types = ['fish','sugar','gravel','flower']

In [6]:
train_df['fish'] = np.where((train_df['class']=='Fish') & (train_df['masked'] == True),1,0)
train_df['sugar'] = np.where((train_df['class']=='Sugar') & (train_df['masked'] == True),1,0)
train_df['gravel'] = np.where((train_df['class']=='Gravel') & (train_df['masked'] == True),1,0)
train_df['flower'] = np.where((train_df['class']=='Flower') & (train_df['masked'] == True),1,0)

In [None]:
mask_df = train_df.groupby('image').agg(np.sum).reset_index()

In [None]:
# Run Length Encoder to convert the Encoded Pixel column in to an actual coverage mask.
#Also the "deconverter" to convert it back in to a column of encoded pixels 
#in order to conform to the submission guidlines.
#Citation and links in the Run Length Encoder Notebook.

def rle_to_mask(rle_string, height, width):
    '''
    convert RLE(run length encoding) string to numpy array

    Parameters: 
    rle_string (str): string of rle encoded mask
    height (int): height of the mask
    width (int): width of the mask 

    Returns: 
    numpy.array: numpy array of the mask
    '''
    
    rows, cols = height, width
    
    if rle_string == -1:
        return np.zeros((height, width))
    else:
        rle_numbers = [int(num_string) for num_string in rle_string.split(' ')]
        #print(rle_numbers)
        rle_pairs = np.array(rle_numbers).reshape(-1,2)
        #print(rle_pairs)
        img = np.zeros(rows*cols, dtype=np.uint8)
        #print(img)
        for index, length in rle_pairs:
            index -= 1
            img[index:index+length] = 255
        img = img.reshape(cols,rows)
        img = img.T
        return img

In [None]:
tr_img, val_img = train_test_split( mask_df.index, random_state=42, test_size=0.2)

tr_gen = ImageDataGenerator(tr_img, df=mask_df, target_df=train_df, batch_size=16,
                                reshape=(320, 480), augment=True, n_channels=3, n_classes=4)

val_gen = ImageDataGenerator(val_img, df=mask_df, target_df=train_df, batch_size=16, 
                                reshape=(320, 480), augment=False, n_channels=3, n_classes=4)

In [7]:
model = sm.Unet('resnet34', classes=4, input_shape=(320, 480, 3), activation='sigmoid')

NameError: name 'sm' is not defined

In [None]:
model.compile(optimizer=Nadam(lr=0.0002), loss=bce_dice_loss, metrics=[dice_coef])

In [None]:
#Save Checkpoints in case of crash... and only save the best performing model.
checkpoint = ModelCheckpoint('model.h5', save_best_only=True)

history = model.fit_generator(tr_gen, validation_data=val_gen, callbacks=[checkpoint], epochs=15)

In [None]:
sub_df = pd.read_csv(path + '/sample_submission.csv')
sub_df['ImageId'] = sub_df['Image_Label'].apply(lambda x: x.split('_')[0])
test = pd.DataFrame(sub_df['ImageId'].unique(), columns=['ImageId'])

In [None]:
#Building lists of image ids to make batches.  Then feed in to imageDataGenerator to make the batches for prediction.
for i in range(0, test.shape[0], 500):
    batch = list(range(i, min(test_imgs.shape[0], i + 500)))
    
test_gen = ImageDataGenerator(batch, df=test_imgs, shuffle=False, mode='predict', dim=(350, 525),
                                    reshape=(320, 480), n_channels=3, base_path= (path + '/test_images'), 
                                    target_df=sub_df, batch_size=1, n_classes=4)

In [None]:
test_df.drop(columns='ImageId', inplace=True)
test_df.to_csv('sub_final.csv', index=False)