# State Farm

In [46]:
%matplotlib inline
from __future__ import division,print_function

import os, json
from shutil import copyfile
from glob import glob
import numpy as np
import pandas as pd
import re
from keras.utils.data_utils import get_file
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt
from PIL import Image
import keras
from keras.applications.vgg16 import VGG16
from keras.models import Model
from keras.applications.vgg16 import preprocess_input
from keras.layers import Dense, Dropout, Flatten, Input
from keras.preprocessing.image import ImageDataGenerator
from keras_tqdm import TQDMNotebookCallback
from utils import plots, get_batches, plot_confusion_matrix, get_data


## Load data

## Create validation set

Important that the validation set not only contains a random selection of each class but also for each person, since there is multiple images of each person

In [49]:
#path = "data/statefarm/"
path = "data/statefarm/sample/"

In [8]:
model_path = path + 'models/'
if not os.path.exists(model_path): os.mkdir(model_path)
batch_size=64

Read csv file and create set of person and classes and images

In [31]:
valid_path = path + 'valid/'
if not os.path.exists(valid_path): os.mkdir(valid_path) 
person_dict = {}
with open(path + 'driver_imgs_list.csv') as f:
    for line in f:
        words = line.rstrip().split(',')
        person_id = words[0]
        label = words[1]
        img = words[2]
        
        if person_id not in person_dict:
            person_dict[person_id] = {}
            person_dict[person_id][label] = [img]    
        elif label not in person_dict[person_id]:
            person_dict[person_id][label] = [img]
        else: 
            person_dict[person_id][label].append(img)
            


In [None]:
for p in person_dict.keys():
    for c in person_dict[p].keys():
        g = person_dict[p][c]
        random_order_images = np.random.permutation(g)
        if not os.path.exists(valid_path + c): os.mkdir(valid_path + c) 
        for i in range(int(len(random_order_images) * 0.1)):
            os.rename((path + "train/" + c + "/" + random_order_images[i]), (valid_path + c + "/" + random_order_images[i]))


Create a sample set with 50 picture of training images and 10 validation images

In [None]:
for p in person_dict.keys():
    for c in person_dict[p].keys():
        g = person_dict[p][c]
        random_order_images = np.random.permutation(g)
        if not os.path.exists(valid_path + c): os.mkdir(valid_path + c) 
        for i in range(int(len(random_order_images) * 0.1)):
            os.rename((path + "train/" + c + "/" + random_order_images[i]), (valid_path + c + "/" + random_order_images[i]))

In [44]:
sample_path = path + 'sample/'
sample_valid_path = sample_path + 'valid/'
sample_train_path = sample_path + 'train/'
if not os.path.exists(sample_path): os.mkdir(sample_path) 
if not os.path.exists(sample_valid_path): os.mkdir(sample_valid_path)
if not os.path.exists(sample_train_path): os.mkdir(sample_train_path) 
for p in person_dict.keys():
    for c in person_dict[p].keys():
        if not os.path.exists(sample_train_path + c): os.mkdir(sample_train_path + c)
        if not os.path.exists(sample_valid_path + c): os.mkdir(sample_valid_path + c) 
        try:
            train_files_in_class = os.listdir(path + "train/" + c) 
            valid_files_in_cass = os.listdir(valid_path + c)
            for i in range(30):
                copyfile((path + "train/" + c + "/" + train_files_in_class[i]), (sample_train_path + c + "/" + train_files_in_class[i]))
            for i in range(50, 60):
                copyfile((path + "valid/" + c + "/" + valid_files_in_cass[i]), (sample_valid_path + c + "/" + valid_files_in_cass[i]))
        except:
            print("Something went wrong")
    

Something went wrong


### Load data and fit to net

In [50]:
im_size=224

In [69]:
train_gen = ImageDataGenerator().flow_from_directory(path + "train",
                                              batch_size=batch_size,
                                              class_mode='categorical',
                                              target_size=(224, 224),
                                              shuffle=True);
val_gen = ImageDataGenerator().flow_from_directory(path + "valid",
                                          batch_size=batch_size,
                                          class_mode='categorical',
                                          target_size=(224, 224),
                                          shuffle=True)

Found 317 images belonging to 10 classes.
Found 100 images belonging to 10 classes.


### Creating a model

In [53]:
??VGG16

In [63]:
# Import our class, and instantiate

num_class=len(train_gen.class_indices)

#Input layer size of reshaped images
img_input = Input(shape=(3, im_size, im_size))
base_model = VGG16(weights=None, include_top=False, input_tensor=img_input)


### Modify model to fit the classes we have

In [64]:

# Add a new top layer
x = base_model.output
x = Flatten(name='flatten')(x)
x = Dense(4096, activation='relu', name='fc1')(x)
x = Dense(4096, activation='relu', name='fc2')(x)
x = Dense(num_class, activation='softmax', name='predictions')(x)

# This is the model we will train
model = Model(base_model.input, x)

# First: train only the new layers (which were randomly initialized)
for layer in base_model.layers:
    layer.trainable = False

model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])

callbacks_list = [keras.callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)]
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_2 (InputLayer)             (None, 3, 224, 224)   0                                            
____________________________________________________________________________________________________
block1_conv1 (Convolution2D)     (None, 64, 224, 224)  0           input_2[0][0]                    
____________________________________________________________________________________________________
block1_conv2 (Convolution2D)     (None, 64, 224, 224)  0           block1_conv1[0][0]               
____________________________________________________________________________________________________
block1_pool (MaxPooling2D)       (None, 64, 112, 112)  0           block1_conv2[0][0]               
___________________________________________________________________________________________

### Load weights

In [67]:
latest_weights_filename = model_path + 'weights–ft-vgg16-dogs%d.h5'

model.load_weights(latest_weights_filename)

IOError: Unable to open file (Unable to open file: name = 'data/statefarm/models/weights–ft-vgg16-dogs%d.h5', errno = 2, error message = 'no such file or directory', flags = 0, o_flags = 0)

### Save weights

In [68]:
results_filename = model_path + 'weights–ft-vgg16-dogs%d-one-epoch.h5'
model.save_weights(results_filename)

### Train

In [66]:
nb_epoch=1
history = model.fit_generator(train_gen, samples_per_epoch=train_gen.N, nb_epoch=nb_epoch, 
                        validation_data=val_gen, nb_val_samples=val_gen.N, callbacks=[TQDMNotebookCallback()])

Epoch 1/1
