In [1]:
import json
import os
import PIL
import PIL.Image
from IPython.display import display
from PIL import Image

import keras
from keras_preprocessing.image import ImageDataGenerator
from keras.applications.inception_v3 import preprocess_input, decode_predictions
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# Import all the Keras machinery we need
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
from keras import metrics

Using TensorFlow backend.


In [6]:
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False)
base_model.save('inception.h5')

In [2]:
from keras.models import load_model
base_model = load_model('inception.h5')

Instructions for updating:
Colocations handled automatically by placer.


KeyboardInterrupt: 

In [3]:
# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)

# and a logistic layer -- withou only 3 output classes
predictions = Dense(3, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', 
              metrics=[metrics.mae, metrics.categorical_accuracy]
             )

NameError: name 'base_model' is not defined

In [4]:
# Prepare our training / validation / etc set
def getData():
    filename = './dataset/majurca-ecoclassifier-assets.json'
    im_path = []
    labels = []
    k=0

    #Read JSON data into the datastore variable
    if filename:
        with open(filename, 'r') as f:
            list_info = json.load(f)

    str_filter = '192-168-0-31' # to filter certain files
    #print(list_info[2443]['id'])
    #print(len(list_info))

    for dict in list_info:
        #print(k)
        if str_filter in dict['path']:
            if dict['tag_slugs'] != [] and dict['tag_slugs'] in [['godet-vide'], ['pet-fonce'], ['pet-clair']]:
                labels.append(dict['tag_slugs'][0])
                im_path.append(dict['thumbnail_320x200_path'])
            else:
                #print('no label',dict['path'])
                k=k+1
        else:
            #print('wrong camera',dict['path'])
            k=k+1

    print('{} images were ignored'.format(k))
    print(len(im_path),len(labels))
    return im_path, labels

def splitData(X,y):
    output_dir = './dataset_split'
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    #Create folder for the test and training split
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
        os.mkdir(output_dir + '/test')
        os.mkdir(output_dir + '/train')
    else:
        print("Warning: output dir {} already exists".format(output_dir))

    #Copying files into test and train folder
    label_count_test = []
    label_count_train = []

    for file in X_test:
        #file = file.replace(':','/')
        os.system('cp ./dataset/'+ file + ' ' + output_dir + '/test/' + file)

    for file in X_train:
        #file = file.replace(':','/')
        os.system('cp ./dataset/'+ file + ' ' + output_dir + '/train/' + file)

    #print(Counter(y_train), Counter(y_test))
    return X_train, X_test, y_train, y_test

def splitValidation(X,y):
    X_train, X_vali, y_train, y_vali = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
    return X_train, X_vali, y_train, y_vali

#files_all, labels_all = getData()
#print(type(files_all[3]),labels_all[3])
#X_try, X_test, y_try, y_test = splitData(files_all,labels_all)
#X_train, X_vali, y_train, y_vali = splitValidation(X_try, y_try)


In [5]:
# We build our (X, y) set. We ignore test set yet (laziness)
with open("./dataset/majurca-ecoclassifier-assets.json", "r") as source:
    assets = json.load(source)
    
# Populate our target variables. X is gonna be filename, y the class
X = []
y = []
target_labels = set(("pet-clair", "pet-fonce", "godet-vide", ))
for asset in assets:
    # Skip what's not from OUR camera
    if not "192-168-0-31" in asset['path']:
        continue
    intersection = target_labels.intersection(asset['tag_slugs'])
    if not intersection:
        continue
    label = intersection.pop()
    X.append("./dataset/{}".format(asset['thumbnail_320x200_path']))
    y.append(label)

In [27]:
n_train_samples = len(X) * 0.8
n_val_samples = len(X) * 0.2
batch_size = 32
datagen = keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=preprocess_input,
    validation_split=0.2
)
dataset_info = {'filename': X, 'class': y}
dataframe = pd.DataFrame(dataset_info)
dataframe
train_generator = datagen.flow_from_dataframe(
    dataframe,
    subset="training",
    class_mode="categorical",
    classes=target_labels,
    target_size=(224, 224),
    batch_size=batch_size,
)
validation_generator = datagen.flow_from_dataframe(
    dataframe,
    subset="validation",
    class_mode="categorical",
    classes=target_labels,
    target_size=(224, 224),
    batch_size=batch_size,
)


Found 580 images belonging to 3 classes.
Found 145 images belonging to 3 classes.


In [28]:
train_generator.class_indices

{'godet-vide': 0, 'pet-clair': 1, 'pet-fonce': 2}

In [29]:
# Build a simple CONVNET
# Like in https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

if K.image_data_format() == 'channels_first':
    input_shape = (3, 224, 224)
else:
    input_shape = (224, 224, 3)

cnnmodel = Sequential()
cnnmodel.add(Conv2D(32, (3, 3), input_shape=input_shape))
cnnmodel.add(Activation('relu'))
cnnmodel.add(MaxPooling2D(pool_size=(2, 2)))

cnnmodel.add(Conv2D(32, (3, 3)))
cnnmodel.add(Activation('relu'))
cnnmodel.add(MaxPooling2D(pool_size=(2, 2)))

cnnmodel.add(Conv2D(64, (3, 3)))
cnnmodel.add(Activation('relu'))
cnnmodel.add(MaxPooling2D(pool_size=(2, 2)))

cnnmodel.add(Flatten())
cnnmodel.add(Dense(64))
cnnmodel.add(Activation('relu'))
cnnmodel.add(Dropout(0.5))
cnnmodel.add(Dense(3))
cnnmodel.add(Activation('softmax'))

cnnmodel.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Leeeet's traaaaaaaaiiiiin!!!!
cnnmodel.fit_generator(
        train_generator,
        steps_per_epoch=n_train_samples // batch_size,
        epochs=10,
        validation_data=validation_generator,
        validation_steps=n_val_samples // batch_size,
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x12fc03f60>

In [30]:
cnnmodel.save('simplecnn.h5')
#Y_pred = cnnmodel.predict_generator(validation_generator, n_val_samples // batch_size+1)
#y_pred = np.argmax(Y_pred, axis=1)
#validation_generator.classes
#y_pred
n_correct = 0
#import pdb;pdb.set_trace()
for _val_xs, _val_ys in validation_generator:#n_validation_samples // batch_size):
    for idx in range(len(_val_ys)):
        img = _val_xs[idx]
        _y = np.argmax(_val_ys[idx], axis=0)
        pred = np.argmax(cnnmodel.predict(np.expand_dims(img, axis=0)), axis=1)
        print(_y, pred, pred == _y)
#        n_correct += pred == _y and 1 or 0
#print("Correct answers = %0.2f%%" % (n_correct / len(validation_generator.classes) * 100))

1 [1] [ True]
2 [1] [False]
1 [2] [False]
1 [1] [ True]
0 [0] [ True]
1 [1] [ True]
0 [0] [ True]
1 [1] [ True]
2 [2] [ True]
1 [1] [ True]
2 [2] [ True]
1 [1] [ True]
1 [1] [ True]
0 [0] [ True]
1 [1] [ True]
2 [2] [ True]
0 [0] [ True]
1 [1] [ True]
0 [0] [ True]
1 [1] [ True]
0 [0] [ True]
2 [2] [ True]
2 [2] [ True]
0 [0] [ True]
2 [2] [ True]
1 [1] [ True]
2 [2] [ True]
0 [0] [ True]
2 [2] [ True]
1 [2] [False]
2 [2] [ True]
1 [2] [False]
0 [0] [ True]
1 [1] [ True]
0 [0] [ True]
0 [0] [ True]
2 [2] [ True]
0 [1] [False]
0 [0] [ True]
1 [2] [False]
2 [2] [ True]
1 [1] [ True]
0 [1] [False]
1 [1] [ True]
1 [1] [ True]
2 [2] [ True]
0 [0] [ True]
0 [0] [ True]
1 [2] [False]
0 [0] [ True]
0 [0] [ True]
1 [1] [ True]
0 [0] [ True]
1 [1] [ True]
1 [1] [ True]
1 [1] [ True]
2 [2] [ True]
1 [1] [ True]
1 [1] [ True]
1 [1] [ True]
0 [0] [ True]
0 [0] [ True]
0 [0] [ True]
0 [0] [ True]
0 [0] [ True]
0 [0] [ True]
1 [1] [ True]
1 [1] [ True]
0 [0] [ True]
1 [1] [ True]
1 [1] [ True]
1 [1] 

KeyboardInterrupt: 

In [90]:
img_width = 224
img_height = 224
nb_filters1 = 32
nb_filters2 = 64
conv1_size = 3
conv2_size = 2
pool_size = 2

model = Sequential()
model.add(Conv2D(nb_filters1, conv1_size, conv1_size, border_mode ="same", input_shape=(img_width, img_height, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))

model.add(Conv2D(nb_filters2, conv2_size, conv2_size, border_mode ="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size), dim_ordering='th'))

model.add(Flatten())
model.add(Dense(256))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model.fit_generator(
        train_generator,
        steps_per_epoch=n_train_samples // batch_size,
        epochs=10,
        validation_data=validation_generator,
        validation_steps=n_val_samples // batch_size,
)


  # Remove the CWD from sys.path while we load stuff.
  
  app.launch_new_instance()


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

KeyboardInterrupt: 

In [59]:
#from keras.models import load_model
#from sklearn.metrics import confusion_matrix
#import matplotlib
#import matplotlib.pyplot as plt

def plot_confusion_matrix(cm, classes,
                          title='Confusion matrix',
                          ):
    """
    This function prints and plots the confusion matrix.
    """
    cmap = plt.cm.jet
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, '{:.2f}'.format(cm[i, j]), horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

#Confusion Matrix and Classification Report
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = cnnmodel.predict_generator(validation_generator, n_val_samples // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))
print('Classification Report')
print(classification_report(validation_generator.classes, y_pred, target_names=target_labels))

# Sample prediction
for idx in range(10):
    img = image.load_img(X[idx], target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    pred = cnnmodel.predict(x)
    print(X[idx], list(target_labels).index(y[idx]), np.argmax(pred), pred)
    
# Plot a pretty confusion matrix
#np.set_printoptions(precision=2)
#dev_feats = read_dataset('valid')
#predictions = emotion_classifier.predict_classes(dev_feats)
#te_labels = get_labels('valid')
#conf_mat = confusion_matrix(validation_generator.classes, y_pred)

#plt.figure()
#plot_confusion_matrix(conf_mat, classes=target_labels)
#plt.show()


Confusion Matrix
[[16 22  8]
 [15 27 16]
 [11 18 12]]
Classification Report
              precision    recall  f1-score   support

  godet-vide       0.38      0.35      0.36        46
   pet-fonce       0.40      0.47      0.43        58
   pet-clair       0.33      0.29      0.31        41

   micro avg       0.38      0.38      0.38       145
   macro avg       0.37      0.37      0.37       145
weighted avg       0.38      0.38      0.38       145

./dataset/wsEN4iv2SliFUuYNXIM-5Q:gfH6L7S3QdOvVp1oRcPtdg:320x200.png 0 0 [[0.76 0.22 0.09]]
./dataset/wsEN4iv2SliFUuYNXIM-5Q:mN01z2Y0RASRi1KBB0tY2Q:320x200.png 0 0 [[0.76 0.22 0.09]]
./dataset/wsEN4iv2SliFUuYNXIM-5Q:mquikLovRNGOnd9iAKaonw:320x200.png 0 0 [[0.62 0.   0.05]]
./dataset/wsEN4iv2SliFUuYNXIM-5Q:sXteG3h6S4SgTzzVskjsvQ:320x200.png 0 0 [[0.77 0.22 0.09]]
./dataset/wsEN4iv2SliFUuYNXIM-5Q:gC348iM2R-C9pUYEGrveqg:320x200.png 0 0 [[0.76 0.22 0.09]]
./dataset/wsEN4iv2SliFUuYNXIM-5Q:CGPTM_Q1STyzD6aGgEPazA:320x200.png 0 0 [[0.62 0.   0.05

In [50]:
y_pred

array([1, 1, 1, 1, 2, 0, 0, 0, 0, 1, 0, 1, 1, 0, 2, 1, 1, 0, 2, 1, 1, 1,
       2, 0, 2, 2, 2, 1, 1, 1, 2, 0, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 2, 1,
       0, 1, 1, 2, 1, 1, 1, 2, 2, 0, 1, 1, 2, 0, 0, 2, 0, 2, 0, 0, 1, 1,
       2, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 0, 0, 2, 1, 0, 0, 1, 1,
       0, 1, 1, 1, 2, 0, 2, 2, 0, 2, 2, 1, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 2, 0, 0, 1, 2, 1, 0, 1, 1, 1, 2, 2, 2, 1, 0, 1, 2, 1, 0,
       0, 0, 0, 1, 1, 1, 0, 1, 1, 2, 0, 0, 2])

In [20]:
# Leeeet's traaaaaaaaiiiiin!!!!
model.fit_generator(
        train_generator,
        steps_per_epoch=n_train_samples // batch_size,
        epochs=5,
        validation_data=validation_generator,
        validation_steps=n_val_samples // batch_size,
)

Epoch 1/5
Epoch 2/5
Epoch 3/5

KeyboardInterrupt: 

In [46]:
TRAIN_DIR = "./dataset_split/train"
VALIDATION_DIR =  "./dataset_split/test"

print(os.listdir('.'))
assert os.path.isdir(TRAIN_DIR), TRAIN_DIR
assert os.path.isdir(VALIDATION_DIR), VALIDATION_DIR

HEIGHT = 224
WIDTH = 224
BATCH_SIZE = 32

CATEGORIES = ["godet-vide","pet-clair","pet-fonce"]


#DATA AUGMENTATION
train_datagen =  ImageDataGenerator(
    preprocessing_function=preprocess_input,
    featurewise_std_normalization= True,
    rotation_range=0,
    horizontal_flip=False,
    vertical_flip=False
)

validation_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)


#DATA PREPARATION
# WITH FLOW FROM DATAFRAME
train_info = {'id':X_train,'label':y_train}
vali_info = {'id':X_vali,'label':y_vali}

traindf = pd.DataFrame(train_info)
validf = pd.DataFrame(vali_info)

train_generator=validation_datagen.flow_from_dataframe(
    dataframe=traindf,
    directory=TRAIN_DIR,
    x_col="id",
    y_col="label",
    subset="training",
    batch_size=BATCH_SIZE,
    #seed=42,
    shuffle=True,
    class_mode="categorical",
    target_size=(HEIGHT,WIDTH)
)

valid_generator=train_datagen.flow_from_dataframe(
    dataframe=validf,
    directory=VALIDATION_DIR,
    x_col="id",
    y_col="label",
    subset="validation",
    batch_size=BATCH_SIZE,
    #seed=42,
    shuffle=True,
    class_mode="categorical",
    target_size=(HEIGHT,WIDTH)
)


['Classifier-test.ipynb', 'dataset', 'inception.h5', 'dataset_split', '.ipynb_checkpoints', 'training.py']
Found 464 images belonging to 3 classes.
Found 0 images belonging to 0 classes.


In [2]:
# train the model on the new data for a few epochs
model.fit_generator(...)

# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.

# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(base_model.layers):
   print(i, layer.name)

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:249]:
   layer.trainable = False
for layer in model.layers[249:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
model.fit_generator(...)

Instructions for updating:
Colocations handled automatically by placer.
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


Exception: URL fetch failure on https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5: None -- [Errno 8] nodename nor servname provided, or not known