# Imports

In [None]:
import numpy as np
from scipy import misc
import imageio
from skimage import data, io, filters
from matplotlib import pyplot as plt
from random import shuffle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import sqlite3
from training import image_training as it
import os
import sys
import pickle

### Test Files

In [None]:
classified_true = it.get_true_images('guardrail')
print(len(classified_true))
print(classified_true[0])

classified_false = it.get_false_images('guardrail')
print(len(classified_false))
print(classified_false[0])

### Add Test List and Classified List together

In [3]:
lt = [classified_false[0:len(classified_true)-1], classified_true]

flattened_list = [i for s in lt for i in s]

#print (flattened_list)
print(len(flattened_list))

12127


### Shuffle Test Files

In [4]:
shuffle(flattened_list)
for i in flattened_list[0:100]:
    print (i)

('\\\\itdd01fsp01\\D1VideoLog\\2016\\105\\47\\105004742191.jpg', 0)
('\\\\itdd01fsp01\\D1VideoLog\\2016\\113\\70\\113011032181.jpg', 1)
('\\\\itdd01fsp01\\D1VideoLog\\2016\\112\\94\\112013414251.jpg', 1)
('\\\\itdd01fsp01\\D1VideoLog\\2016\\100\\12\\100001203021.jpg', 0)
('\\\\itdd01fsp01\\D1VideoLog\\2016\\107\\115\\107015542201.jpg', 1)
('\\\\itdd01fsp01\\D1VideoLog\\2016\\105\\24\\105002436021.jpg', 0)
('\\\\itdd01fsp01\\D1VideoLog\\2016\\106\\23\\106002301021.jpg', 0)
('\\\\itdd01fsp01\\D1VideoLog\\2016\\100\\25\\100002516241.jpg', 0)
('\\\\itdd01fsp01\\D1VideoLog\\2016\\108\\29\\108002951211.jpg', 0)
('\\\\itdd01fsp01\\D1VideoLog\\2016\\101\\6\\101000657211.jpg', 0)
('\\\\itdd02fsp01\\VideoLog16\\203\\21\\203002138241.jpg', 1)
('\\\\itdd01fsp01\\D1VideoLog\\2016\\112\\39\\112003954141.jpg', 1)
('\\\\itdd01fsp01\\D1VideoLog\\2016\\108\\80\\108012038131.jpg', 1)
('\\\\itdd01fsp01\\D1VideoLog\\2016\\100\\78\\100011822141.jpg', 1)
('\\\\itdd01fsp01\\D1VideoLog\\2016\\100\\9\\100000909

### Validate That The Filepaths Exist

In [5]:
for file in flattened_list:
    if not os.path.isfile(file[0]):
        raise RuntimeError('File: {} could not be located.'.format(file[0]))
print('STATUS: ALL FILES OKAY')

STATUS: ALL FILES OKAY


### Progress Bar Class

In [5]:
class ProgressBar(object):
    
    def __init__(self, total_count, step_count=50):
        """Initialze a progress bar
        
        Keyword Arguments:
        total_count -- The total count to track progress.
        step_count -- The number of steps in this total count.
        """
        self.total_count = total_count
        self.step_count = step_count
        self.step_size = int(self.total_count / self.step_count)
        self.step = 0
        self.value = 0
        
    def __str__(self):
        string = 'Progress |'
        for i in range(0, self.step_count):
            if i < self.step:
                string += '#'
            else:
                string += '-'
        string += '| {:.2f}%'.format((float(self.value) / self.total_count) * 100)
        return string
        
    def update(self, value):
        self.value = value
        self.step = int(self.value / self.step_size)

### Define Data Files And Open Them

In [6]:
datafile = 'data.pickled'
answersfile = 'answers.pickled'

datapickled = open(datafile, 'wb+')
answerspickled = open(answersfile, 'wb+')

### Pickle Data To Files And Close Files

In [7]:


progressbar = ProgressBar(len(flattened_list))
item_count = 0
print(progressbar, '\r')
for item in flattened_list:
    item_count += 1
    try:
        data_array = np.array(imageio.imread(item[0]))
        string = pickle.dumps(data_array)
        pickle.dump(string, datapickled)
        pickle.dump(item[1], answerspickled)
        
    except Exception as e:
        print(e)
        print(item)
        break
    
    progressbar.update(item_count)
    print(progressbar, end='\r')

datapickled.close()
answerspickled.close()

Progress |--------------------------------------------------| 0.00% 
Progress |##################################################| 100.00%

### Random Forest Classifier

In [8]:
clf = RandomForestClassifier(warm_start=True, n_estimators=100)

### Function To Train The Model

In [9]:
def train_classifier(start_index, end_index):
    X = []
    y = []
    
    with open(datafile, 'rb') as datapickled:
        for i in range(0, start_index):
            pickle.load(datapickled)
        for i in range(start_index, end_index):
            string = pickle.load(datapickled)
            X.append(pickle.loads(string))
    X = np.array(X)
    
    with open(answersfile, 'rb') as answerspickled:
        for i in range(0, start_index):
            pickle.load(answerspickled)
        for i in range(start_index, end_index):
            y.append(pickle.load(answerspickled))
    y = np.array(y)
    
    X = X.reshape(X.shape[1] * X.shape[2] * X.shape[3], X.shape[0]).T
    y = y.reshape(y.shape[0],)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
    clf.fit(X_train, y_train)
    clf.set_params(n_estimators=clf.get_params()['n_estimators'] + 100)
    
    return (X_train, X_test, y_train, y_test)

### Train The Model Incrementally

In [10]:
progressbar = ProgressBar(len(flattened_list))
print(progressbar, end='\r')
for i in range(100, len(flattened_list), 100):
    X_train, X_test, y_train, y_test = train_classifier(i-100, i)
    progressbar.update(i)
    print(progressbar, end='\r')

Progress |##################################################| 99.78%

### Test The Model Based On Prediction Set

In [11]:
preds = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test,preds))
print (X_test.shape)
print (y_train)

Accuracy: 0.7
(10, 7200000)
[1 1 0 1 1 0 0 0 1 0 0 0 0 1 1 1 1 0 1 1 1 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 1
 1 1 0 1 1 0 0 1 0 1 0 0 1 1 0 1 1 0 0 1 1 1 0 1 1 0 0 1 0 0 1 1 1 0 0 1 0
 1 0 0 1 0 1 0 0 1 1 0 0 0 0 1 1]


### Attempt To Save The Machine To A File

In [12]:
machinefile = 'machine.pickled'
with open(machinefile, 'wb') as machinepickled:
    pickle.dump(clf, machinepickled)

### Test The Model

In [21]:
# '301004459215'
# '301004457041'
# '301004457092'
# '301004457251'

#  '301004400151' (yes example)
#  '301004405171' (no example)

yes = '301004400151.jpg'
no = '301004405171.jpg'

directory = '//itdd03fsp01/videolog/301/44/' + no

In [22]:
def transform_image(dir):
    data_array = np.array(imageio.imread(dir))
    reshape_array = data_array.reshape(data_array.shape[0] * data_array.shape[1] * data_array.shape[2], 1).T
    return reshape_array

In [23]:
test_image = transform_image(directory)
print (test_image)

[[179 179 187 ... 100  97  92]]


In [24]:
prediction = clf.predict(test_image)
if prediction == 1:
    print('Yes')
elif prediction == 0:
    print('No')
else:
    print('Error')

No
