In [100]:
# More imports
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import csv
import pandas as pd
import re
import cv2
import os

# import the necessary packages for SVM predictor
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import imutils

In [101]:
# Import widget functionality
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [102]:
def extract_color_histogram(image, bins=(8,8,8)):
    # extract a 3D color histogram from the HSV color space using
    # the supplied number of 'bins' per channel
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0,1,2], None, bins, [0,180,0,256,0,256])
    
    # handle normalizing the histogram if we are using OpenCV 2.4.X
    if imutils.is_cv2():
        hist = cv2.normalize(hist)
    
    # otherwise, perform "in place" normaliation in OpenCV 3
    else:
        cv2.normalize(hist, hist)
    
    return hist.flatten()

In [103]:
def extract_blue_histogram(image):
    # extract blue histogram from the image
    hist = cv2.calcHist([image], [0], None, [265], [0,256])
    
    # handle normalizing the histogram if we are using OpenCV 2.4.X
    if imutils.is_cv2():
        hist = cv2.normalize(hist)
    
    # otherwise, perform "in place" normaliation in OpenCV 3
    else:
        cv2.normalize(hist, hist)
    
    return hist.flatten()

In [104]:
def extract_green_histogram(image):
    # extract blue histogram from the image
    hist = cv2.calcHist([image], [1], None, [265], [0,256])
    
    # handle normalizing the histogram if we are using OpenCV 2.4.X
    if imutils.is_cv2():
        hist = cv2.normalize(hist)
    
    # otherwise, perform "in place" normaliation in OpenCV 3
    else:
        cv2.normalize(hist, hist)
    
    return hist.flatten()

In [105]:
def extract_red_histogram(image):
    # extract blue histogram from the image
    hist = cv2.calcHist([image], [2], None, [265], [0,256])
    
    # handle normalizing the histogram if we are using OpenCV 2.4.X
    if imutils.is_cv2():
        hist = cv2.normalize(hist)
    
    # otherwise, perform "in place" normaliation in OpenCV 3
    else:
        cv2.normalize(hist, hist)
    
    return hist.flatten()

In [107]:
# init the image suffix, yset, and image list
suffix = '.jpg'
img_list = []
yset = []
# create labels list and 2 dicts for 2 way mapping
labels = []

In [106]:
# Testing Cells
csv_file = 'zaslavsk_Cyclops_Cave_Ceramic_Petrography.csv'

In [108]:
# use csv file to grab images/labels
df = pd.read_csv(csv_file)

toImg = list(df.columns.values)

img_menu = {}
for i in range(0, len(toImg)):
    img_menu[toImg[i]] = toImg[i]

def f(images_menu):
    return images_menu

In [109]:
out = interact(f, images_menu=img_menu)

A Jupyter Widget

In [110]:
# Choose column of label for prediction
toPredict = list(df.columns.values)

pred_menu = {}
for i in range(0, len(toPredict)):
    pred_menu[toPredict[i]] = toPredict[i]

def f(predictions_menu):
    return predictions_menu

In [111]:
out2 = interact(f, predictions_menu=pred_menu)

A Jupyter Widget

In [112]:
# grab chosen column names
nameCol = df[out.widget.result]
predCol = df[out2.widget.result]

# add all fabric columns to the y set
for i in range (0,len(predCol)):
    labels.append(predCol[i])
    
print("Number of labels to learn from: " + str(len(labels)))

Number of labels to learn from: 252


In [113]:
# create new hist_list
hist_list = []

In [114]:
# gather images from path created from file names in csv file
for i in range (0,len(nameCol)):
    base_filename = nameCol[i]
    fileName = os.path.join("./Cyclops Cave/images/", base_filename + suffix)
    im = cv2.imread(fileName)
    hist = extract_color_histogram(im)
    hist_list.append(hist)

In [91]:
# gather images from path created from file names in csv file
for i in range (0,len(nameCol)):
    base_filename = nameCol[i]
    fileName = os.path.join("./Cyclops Cave/images/", base_filename + suffix)
    im = cv2.imread(fileName)
    hist = extract_blue_histogram(im)
    hist_list.append(hist)

In [115]:
# transform labels into numerical system
le = LabelEncoder()
labels = le.fit_transform(predCol)

In [122]:
# separate data into test/train sets for features/labels
(xtrain, xtest, ytrain, ytest) = train_test_split(np.array(hist_list),labels, test_size = 0.5)

In [119]:
# Train the linear regression classifier
model2 = LinearSVC()
model2.fit(xtrain, ytrain)

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [120]:
# Calculate predictions on the data set
predictions2 = model2.predict(np.array(hist_list))
print(classification_report(labels, predictions2, target_names = le.classes_))

                 precision    recall  f1-score   support

 Cyclops Cave 1       0.75      0.97      0.84       144
Cyclops Cave 10       0.00      0.00      0.00         4
Cyclops Cave 11       0.00      0.00      0.00         4
Cyclops Cave 12       1.00      0.50      0.67         4
 Cyclops Cave 2       0.37      0.46      0.41        28
 Cyclops Cave 3       0.35      0.30      0.32        20
 Cyclops Cave 4       0.60      0.25      0.35        12
 Cyclops Cave 5       1.00      0.38      0.55         8
 Cyclops Cave 6       1.00      0.38      0.55         8
 Cyclops Cave 7       0.00      0.00      0.00         8
 Cyclops Cave 8       1.00      0.12      0.22         8
 Cyclops Cave 9       0.00      0.00      0.00         4

    avg / total       0.64      0.67      0.62       252



  'precision', 'predicted', average, warn_for)


In [98]:
len(labels)

252

In [121]:
count = 0
for i in range (0,len(labels)):
    if(predictions2[i] == labels[i]):
        count += 1

count/len(labels)

0.6746031746031746