In [1]:
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")

In [2]:
# import the necessary packages
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import img_to_array
from keras.utils import to_categorical
from pyimagesearch.lenet import LeNet
from imutils import paths

Using TensorFlow backend.


In [3]:
from keras.utils import np_utils

In [4]:
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import csv
import pandas as pd
import re
import cv2
import os

In [5]:
# This is code from the original tutorial for parsing the images
# construct the argument parse and parse the args
#ap = argparse.ArgumentParser()
#ap.add_argument("-d", "--dataset", required =True,
#               help="path to input dataset")
#ap.add_argument("-m", "--model", required=True,
#               help="path to output model")
#ap.add_argument("-p", "--plot", type=str, default="plot.png",
#               help="path to output accuracy/loss plot")
#args = vars(ap.parse_args())

In [6]:
# init the number of epochs to train for, init learning rate and batch size
EPOCHS = 25
INIT_LR = 1e-3
BS = 32

In [7]:
# init the image suffix, yset, and image list
print("[INFO] loading images...")
suffix = '.jpg'
img_list = []
yset = []

[INFO] loading images...


In [8]:
# create labels list and 2 dicts for 2 way mapping
labels = []
# key = label value = number
label_yval = dict()
# key = number value = label
yval_label = dict()

In [9]:
# use csv file to grab images/labels
df = pd.read_csv('zaslavsk_Cyclops_Cave_Ceramic_Petrography.csv')
nameCol = df['#img']
fabricCol = df['Fabric Code']

In [10]:
# add all fabric columns to the y set
for i in range (0,len(fabricCol)):
    labels.append(fabricCol[i])

In [11]:
# grab all unique labels
uni_labels = set(labels)
uni_labels = list(uni_labels)

In [12]:
# assign each label a dict key number
for i in range(0,len(uni_labels)):
    yval_label[i] = uni_labels[i]
    label_yval[uni_labels[i]] = i

In [13]:
label_yval

{'Cyclops Cave 1': 4,
 'Cyclops Cave 10': 6,
 'Cyclops Cave 11': 5,
 'Cyclops Cave 12': 9,
 'Cyclops Cave 2': 2,
 'Cyclops Cave 3': 7,
 'Cyclops Cave 4': 11,
 'Cyclops Cave 5': 0,
 'Cyclops Cave 6': 1,
 'Cyclops Cave 7': 10,
 'Cyclops Cave 8': 8,
 'Cyclops Cave 9': 3}

In [14]:
len(labels)

252

In [15]:
label_yval[labels[0]]

4

In [16]:
# create list of keys associated with their labels
for i in range (0, len(labels)):
    yset.append(label_yval[labels[i]])

In [17]:
len(yset)

252

In [18]:
# remove text and leave fabric cave number for labels and zero index
#for i in range (0,len(yset)):
#    yset[i] = int(re.sub("\D", "", yset[i]))
#    yset[i] = yset[i]-1

In [19]:
# gather images from path created from file names in csv file
for i in range (0,len(nameCol)):
    base_filename = nameCol[i]
    fileName = os.path.join("./Cyclops Cave/images/", base_filename + suffix)
    im = cv2.imread(fileName)
    im = cv2.resize(im, (28,28))
    im = img_to_array(im)
    img_list.append(im)

In [20]:
# split the test and training set 75:25
split = int(len(img_list)*(.75))
xtrain = img_list[:split]
xtest = img_list[split:]
ytrain = yset[:split]
ytest = yset[split:]

In [21]:
# transform to arrays
trainX = np.array(xtrain, dtype="float")/225.0
testX = np.array(xtest, dtype ="float")/225.0

ytrain = np.array(ytrain)
ytest = np.array(ytest)

In [22]:
trainX.shape

(189, 28, 28, 3)

In [23]:
# parsed Y data containers
trainY = []
testY = []

In [24]:
# convert labels from int to vectors
trainY = np_utils.to_categorical(ytrain,12)
testY = np_utils.to_categorical(ytest,12)

In [25]:
# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
                        height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
                        horizontal_flip=True, fill_mode="nearest")

In [26]:
# initialize the model
print("[INFO] compiling model...")
model = LeNet.build(width=28, height=28, depth=3, classes=12)
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt,
                metrics=["accuracy"])

[INFO] compiling model...


In [27]:
# train the network
print("[INFO] training network...")
H = model.fit_generator(aug.flow(trainX, trainY, batch_size=BS),
    validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS,
    epochs=EPOCHS, verbose=1)

[INFO] training network...
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [28]:
# plot the training loss and accuracy 
plt.style.use("ggplot")
plt.figure()
N = EPOCHS
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), H.history["acc"], label="train_acc")
plt.plot(np.arange(0, N), H.history["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy on Label Prediction")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig('figure.png')

In [29]:
chart = cv2.imread('figure.png',1)
cv2.imshow('Results',chart)
cv2.waitKey(0)
cv2.destroyAllWindows()