In [37]:
from keras.models import Model
from keras.applications import vgg16
from keras.preprocessing.image import load_img, img_to_array
import keras

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [38]:
# get image filenames for training and test sets
import glob

directories = glob.glob('fruits-360/Training/*')
train_files = []
train_labels = []
for d in directories:
    fruit_name = d.split('/')[-1]
    # get filenames
    files = glob.glob(d+'/*')
    for fn in files:
        train_files.append(fn)
        train_labels.append(fruit_name)

directories = glob.glob('fruits-360/Test/*')
test_files = []
test_labels = []
for d in directories:
    fruit_name = d.split('/')[-1]
    # get filenames
    files = glob.glob(d+'/*')
    for fn in files:
        test_files.append(fn)
        test_labels.append(fruit_name)

len(train_files), len(test_files)

(55839, 18739)

In [39]:
# load up the VGG_16 model
vgg = vgg16.VGG16(include_top=False, weights='imagenet', 
                                     input_shape=(100,100,3))

output = vgg.layers[-1].output
output = keras.layers.Flatten()(output)
vgg_model = Model(vgg.input, output)

vgg_model.trainable = False
for layer in vgg_model.layers:
    layer.trainable = False
    
pd.set_option('max_colwidth', -1)
layers = [(layer, layer.name, layer.trainable) for layer in vgg_model.layers]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])   

Unnamed: 0,Layer Type,Layer Name,Layer Trainable
0,<keras.engine.input_layer.InputLayer object at 0xb3e2f29b0>,input_3,False
1,<keras.layers.convolutional.Conv2D object at 0xb3e2f2908>,block1_conv1,False
2,<keras.layers.convolutional.Conv2D object at 0xb40429080>,block1_conv2,False
3,<keras.layers.pooling.MaxPooling2D object at 0xb40429eb8>,block1_pool,False
4,<keras.layers.convolutional.Conv2D object at 0xb3e90a780>,block2_conv1,False
5,<keras.layers.convolutional.Conv2D object at 0xb3f714198>,block2_conv2,False
6,<keras.layers.pooling.MaxPooling2D object at 0xb3f9db780>,block2_pool,False
7,<keras.layers.convolutional.Conv2D object at 0xb3f9db080>,block3_conv1,False
8,<keras.layers.convolutional.Conv2D object at 0xb3fa10400>,block3_conv2,False
9,<keras.layers.convolutional.Conv2D object at 0xb3fa29a58>,block3_conv3,False


In [40]:
# load training images and extract features

IMG_DIM = (100,100)
features, labels = [], []
for idx, img in enumerate(train_files):
    img_array = np.array([img_to_array(load_img(img,target_size=IMG_DIM))])
    # scale pixels between (0,1)
    img_array = img_array.astype('float32') / 255
    features.append( vgg_model.predict(img_array, verbose=0)[0] )
    labels.append( (train_files[idx], train_labels[idx]) )
    if idx % 1000 == 0: print(idx)
        
features_df = pd.DataFrame(features)
labels_df = pd.DataFrame(labels, columns=['filename','fruit'])


0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000


In [41]:
df = pd.concat([labels_df,features_df], axis=1)
df.to_csv('training_data.csv', index=False)
del df

In [42]:
# load testing images and extract features

IMG_DIM = (100,100)
features, labels = [], []
for idx, img in enumerate(test_files):
    img_array = np.array([img_to_array(load_img(img,target_size=IMG_DIM))])
    # scale pixels between (0,1)
    img_array = img_array.astype('float32') / 255
    features.append( vgg_model.predict(img_array, verbose=0)[0] )
    labels.append( (test_files[idx], test_labels[idx]) )
    if idx % 1000 == 0: print(idx)
        
features_df = pd.DataFrame(features)
labels_df = pd.DataFrame(labels, columns=['filename','fruit'])

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000


In [43]:
df = pd.concat([labels_df,features_df], axis=1)
df.to_csv('testing_data.csv', index=False)
del df

In [44]:
df = pd.read_csv('training_data.csv')

In [45]:
df.head()

Unnamed: 0,filename,fruit,0,1,2,3,4,5,6,7,...,4598,4599,4600,4601,4602,4603,4604,4605,4606,4607
0,fruits-360/Training/Tomato 4/r_236_100.jpg,Tomato 4,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,1.313645,0.03652,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.033871,-0.0
1,fruits-360/Training/Tomato 4/247_100.jpg,Tomato 4,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,1.378478,0.220973,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
2,fruits-360/Training/Tomato 4/257_100.jpg,Tomato 4,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,1.274333,0.003696,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
3,fruits-360/Training/Tomato 4/r_78_100.jpg,Tomato 4,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,1.339923,-0.0,-0.0,-0.0,0.032979,-0.0,-0.0,-0.0,0.046481,-0.0
4,fruits-360/Training/Tomato 4/r_68_100.jpg,Tomato 4,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,1.45531,0.092919,-0.0,-0.0,0.117963,-0.0,-0.0,-0.0,0.074229,-0.0
