In [1]:
# filter warnings
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

# keras imports
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.applications.vgg19 import VGG19, preprocess_input
from keras.applications.xception import Xception, preprocess_input
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.preprocessing import image
from keras.models import Model
from keras.models import model_from_json
# keras uses tensor flow backend

# other imports
from sklearn.preprocessing import LabelEncoder
import numpy as np
import glob
import cv2
import h5py
import os
import json
import datetime
import time
import pandas as pd

Using TensorFlow backend.


In [2]:
ls

[31mGBBR Anatomy of The Ring Completed 7-28-2017.xlsx[m[m*
[1m[36mPics[m[m/
TransferTraining.ipynb
config.json
[31meshop_items_full.csv[m[m*
gab_nn.ipynb
neighbors_model_1.json
test.csv


In [3]:
# load the user configs
with open('config.json') as f:    
    config = json.load(f)

# config variables
model_name = config["model"]
weights = config["weights"]
include_top = config["include_top"]
train_path = config["train_path"]
features_path = config["features_path"]
labels_path = config["labels_path"]
test_size = config["test_size"]
results = config["results"]
model_path = config["model_path"]


In [6]:
model_name

'xception'

In [8]:
# start time
print("[STATUS] start time - {}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
start = time.time()

# create the pretrained models
# check for pretrained weight usage or not
# check for top layers to be included or not
if model_name == "vgg16":
    base_model = VGG16(weights=weights)
    model = Model(input=base_model.input, output=base_model.get_layer('fc1').output)
    image_size = (224, 224)
elif model_name == "vgg19":
    base_model = VGG19(weights=weights)
    model = Model(input=base_model.input, output=base_model.get_layer('fc1').output)
    image_size = (224, 224)
elif model_name == "resnet50":
    base_model = ResNet50(weights=weights)
    model = Model(input=base_model.input, output=base_model.get_layer('flatten').output)
    image_size = (224, 224)
elif model_name == "inceptionv3":
    model = InceptionV3(weights=weights, include_top=False)
    #base_model = InceptionV3(weights=weights)
    #model = Model(input=base_model.input, output=base_model.get_layer('flatten').output)
    image_size = (299, 299)
elif model_name == "xception":
    base_model = Xception(weights=weights)
    model = Model(input=base_model.input, output=base_model.get_layer('avg_pool').output)
    image_size = (299, 299)
else:
    base_model = None

print("[INFO] successfully loaded base model and model...")

# path to training dataset
train_labels = os.listdir(train_path)
train_labels = train_labels

# encode the labels
# print("[INFO] encoding labels...")
# le = LabelEncoder()
# le.fit([tl for tl in train_labels])

# empty pandas frame
extracted_cols = ['name', 'feature']
extracted_df = pd.DataFrame(columns=extracted_cols)  # create empty frame

# variables to hold features and labels
features = []
labels   = []


# loop over all the labels in the folder
# for i, label in enumerate(train_labels):
# cur_path = train_path + "/" + train_labels
for idx, image_path in enumerate(train_labels): # glob.glob(cur_path + "/*.jpg"):
    img = image.load_img(train_path + "/" + image_path, target_size=image_size)
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    feature = model.predict(x)
#     print(feature)
#     print(len(feature))
    flat = feature.flatten()
    
    extracted_df.loc[idx] = [image_path, flat]
    
    if idx%1000 == 0:
        print(idx)
        print('done')
        current_dir = os.getcwd()
        filename = current_dir + '/' + results
        extracted_df.to_pickle(filename)

#     features.append(flat)
#     labels.append(label)
#   print("[INFO] processed - {}".format(image_path))
#print("[INFO] completed feature extraction")

# encode the labels using LabelEncoder
# targetNames = np.unique(labels)
# le = LabelEncoder()
# le_labels = le.fit_transform(labels)

# get the shape of training labels
# print "[STATUS] training labels: {}".format(le_labels)
# print "[STATUS] training labels shape: {}".format(le_labels.shape)

# save features and labels
#h5f_data = h5py.File(features_path, 'w')
#h5f_data.create_dataset('dataset_1', data=np.array(features))

#h5f_label = h5py.File(labels_path, 'w')
#h5f_label.create_dataset('dataset_1', data=np.array(le_labels))

#h5f_data.close()
#h5f_label.close()

# save model and weights
# model_json = model.to_json()
# with open(model_path + str(test_size) + ".json", "w") as json_file:
#     json_file.write(model_json)

# save weights
# model.save_weights(model_path + str(test_size) + ".h5")
# print("[STATUS] saved model and weights to disk..")

# print "[STATUS] features and labels saved.."

# save df to csv
current_dir = os.getcwd()
filename = current_dir + '/' + results
extracted_df.to_pickle(filename)
# extracted_df.to_csv(filename,  index = False)


# end time
end = time.time()
print("[STATUS] end time - {}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))

[STATUS] start time - 2017-08-15 11:56




[INFO] successfully loaded base model and model...
0
done
1000
done
2000
done
3000
done
4000
done
5000
done
6000
done
7000
done
8000
done
9000
done
10000
done
11000
done
12000
done
13000
done
14000
done
15000
done
16000
done
17000
done
18000
done
19000
done
20000
done
21000
done
22000
done
23000
done
24000
done
25000
done
26000
done
27000
done
28000
done
29000
done
30000
done
31000
done
32000
done
33000
done
34000
done
35000
done
36000
done
37000
done
38000
done
39000
done
40000
done
41000
done
42000
done
43000
done
44000
done
45000
done
46000
done
47000
done
48000
done
49000
done
50000
done
51000
done
52000
done
53000
done
54000
done
[STATUS] end time - 2017-08-17 16:21


In [9]:
len(extracted_df)

54778

In [10]:
len(extracted_df['feature'][0])

2048

In [16]:
test = pd.read_pickle(filename)

In [17]:
len(test)

2

In [20]:
test['feature'][0][6]

0.71897906