In [1]:
from urllib.request import urlopen
import pandas as pd
from data_generator import DataGenerator
from keras.preprocessing.image import ImageDataGenerator
import keras.backend as K
import keras


Using TensorFlow backend.


In [2]:
def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall


    def precision(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [3]:
def create_model():
    resnet = keras.applications.ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
    output = resnet.layers[-1].output
    output = keras.layers.Flatten()(output)
    resnet = keras.models.Model(inputs=resnet.input, outputs=output)
    for layer in resnet.layers:
        layer.trainable = False

    model = keras.models.Sequential()
    model.add(resnet)
    model.add(keras.layers.Dense(512, activation='relu'))
    model.add(keras.layers.Dropout(0.3))
    model.add(keras.layers.Dense(512, activation='relu'))
    model.add(keras.layers.Dropout(0.3))
    model.add(keras.layers.Dense(2, activation='softmax'))
    model.compile(
        loss='categorical_crossentropy',
        optimizer=keras.optimizers.RMSprop(lr=1e-5),
        metrics=['accuracy', f1, 'mse']
    )
    return model

In [4]:
df = pd.read_csv('data/train_dataset.csv')
df_train = pd.DataFrame()
df_val = pd.DataFrame()

for idx, group in df.groupby('class'):
    train = group.sample(frac=0.8)
    val = group.drop(train.index)
    df_train = df_train.append(train, ignore_index=True)
    df_val = df_val.append(val, ignore_index=True)

    

In [5]:
image_generator = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.01,
    zoom_range=[0.9, 1.25],
    horizontal_flip=True,
    vertical_flip=True,
)

def create_aug_gen(generator):
    idx = 0
    while True:
        in_x, in_y = generator[idx]
        g_x = image_generator.flow(255*in_x, in_y, batch_size=in_x.shape[0])
        x, y = next(g_x)
        idx += 1
        if idx == len(generator):
            idx = 0
            generator.on_epoch_end()
        yield x/255.0, y
        


In [6]:
train_dicts = list(df_train.T.to_dict().values())
val_dicts = list(df_val.T.to_dict().values())
all_dicts = list(df.T.to_dict().values())

data_gen_train = DataGenerator(
    train_dicts, 
    image_dir='data/train_images', 
    image_size=(224, 224, 3), 
)

aug_data_gen_train = create_aug_gen(data_gen_train)

data_gen_full = DataGenerator(
    all_dicts, 
    image_dir='data/train_images', 
    image_size=(224, 224, 3), 
)

aug_data_gen_full = create_aug_gen(data_gen_full)

data_gen_val = DataGenerator(
    val_dicts, 
    image_dir='data/train_images', 
    image_size=(224, 224, 3), 
    shuffle=False
)

In [39]:
model = create_model()
model.fit_generator(aug_data_gen_train, epochs=10, validation_data=data_gen_val, steps_per_epoch=len(data_gen_train))

In [7]:
model = create_model()
model.fit_generator(aug_data_gen_full, epochs=10, steps_per_epoch=len(data_gen_full))

W1218 12:51:56.290606 140713021200192 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W1218 12:51:56.321984 140713021200192 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W1218 12:51:56.331458 140713021200192 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4185: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.

W1218 12:51:56.368114 140713021200192 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W1218 12:51:56.369200

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7ff9ecd4e1d0>

In [8]:
test_df = pd.read_csv('data/test_dataset.csv')
test_df['class'] = test_df['Predicted']
test_dicts = list(test_df.T.to_dict().values())
test_datagen = DataGenerator(
    test_dicts,
    image_dir='data/test_images/',
    image_size=(224, 224, 3), 
    batch_size=1,
    shuffle=False
)

In [9]:
predict = model.predict_generator(test_datagen)

In [10]:
predict_df = test_df[['Id']]
predict_df['Predicted'] = predict.argmax(axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [11]:
predict_df.to_csv('predicted.csv', index=False)