In [100]:
from keras_preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.python.keras.applications.densenet import decode_predictions
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
import matplotlib.pyplot as plt
import numpy as np
import random
import pickle
import cv2
import os
import glob


## ENV VARS
IMG_SIZE = 32
SAVE_DIR = ''
IMG_DIR = './scripts/output/*/*.jpg'
BATCH_SIZE = 32
EPOCHS = 50

#STATIC VARS
dataset = []
labels = []
lb = LabelBinarizer()
img_train = ''
img_test = ''
labels_train = ''
labels_test = ''
model = Sequential()
classes = ''
data_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10.,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.,
    zoom_range=1.,
    horizontal_flip=True,
    vertical_flip=True
)

def preprocess():
    dimensions = np.prod(img_train.shape[1:])
    train_data = img_train.reshape(img_train.shape[0], dimensions).astype('float32')
    test_data = img_train.reshape(img_test.shape[0], dimensions).astype('float32')
    
def generated_data(set, labels):
    generator = data_gen.flow(
    set,
    labels, ##may need to be onehotenc
    batch_size=BATCH_SIZE,
    target_size=(IMG_SIZE,IMG_SIZE),
    class_mode='categorical',
    shuffle=True
    )
    return generator

def import_imgs():
    img_paths = []
    path_list = os.listdir(IMG_DIR)
    for i in path_list:
        img_paths.append(os.path.join(IMG_DIR, i))
    random.shuffle(img_paths)
    
    for img_path in img_paths:
        img = cv2.imread(img_path)
        if img_path == './scripts/output/sphinx/00000000.jpg': cv2.imshow("img", img)
        dataset.append(img)
        
        label = img_path.split('/')
        labels.append(label[-2])
    print(dataset)
    temp_dataset = np.array(dataset, dtype='float32')/255.0
    temp_labels = np.array(labels)
    classes = np.unique(temp_labels)
    (img_train, labels_train, img_test, labels_test) = train_test_split(temp_dataset, temp_labels, test_size=0.20)
    
def import_images():
    paths = glob.glob(IMG_DIR)
    

def one_hot_enc(labels_train, labels_test):
    #converts labels from int to vectors
    #one hot encoding on labels is already done through transform, fit_transform finds all unique classes
    labels_train = lb.fit_transform(labels_train)
    labels_test = lb.transform(labels_test)
    return labels_train, labels_test
    
def model_build():
    #input is 3072=32x32x3
    model.add(Dense(1024, input_shape=(3072,), activation='sigmoid'))
    model.add(Dense(512, activation='sigmoid'))
    model.add(Dense(len(lb.classes_), activation='softmax'))
    return model
    
    
def print_results(model_fit):
    N = np.arange(0, 30)
    plt.style.use("ggplot")
    plt.figure()
    plt.plot(N, model_fit.history["loss"], label="train_loss")
    plt.plot(N, model_fit.history["val_loss"], label="val_loss")
    plt.plot(N, model_fit.history["accuracy"], label="train_accuracy")
    plt.plot(N, model_fit.history["val_accuracy"], label="val_accuracy")
    plt.title("Training Loss and Accuracy (Simple NN)")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend()
    
def save_model():
    model.save(model, save_format='h5')
    f = open(SAVE_DIR, 'wb')
    f.write(pickle.dumps(lb))
    f.close()

def train(with_gen:bool):
    generator = generated_data(img_train, labels_train)
    if with_gen:
        history1 = model.fit(data_gen.flow(
                                img_train,
                                labels_train, ##may need to be onehotenc
                                batch_size=BATCH_SIZE,
                                target_size=(IMG_SIZE,IMG_SIZE),
                                class_mode='categorical',
                                shuffle=True
                                ),
            epochs=EPOCHS,
            validation_data=(img_test, labels_test))
    else:
        model_fit = model.fit(img_train, labels_train,
                              epochs=EPOCHS,
                              batch_size=BATCH_SIZE,
                              validation_data=(img_test, labels_test))
        
def print_errors(img_generator, predictions):
    fnames = img_generator.filenames
    ground_truth = img_generator.classes
    label2index = img_generator.class_indices
    idx2label = list(label2index.keys())
    predicted_classes = np.argmax(predictions, axis=1)
    errors = np.where(predicted_classes != ground_truth)[0]
    print("Number of errors = {}/{}".format(len(errors),img_generator.samples))
    return idx2label, errors, fnames

In [None]:
sgd = SGD(lr=0.01, momentum=0.7, decay=0.01/25)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])


In [None]:
predictions = model.predict(model.predict(img_test, batch_size=32))
encoded_pred = decode_predictions(predictions)
[test_loss, test_acc] = model.evaluate(img_test, labels_test)
print(classification_report(labels_test.argmax(axis=1), predictions.argmax(axis=1), target_names=lb.classes_))
for pred in range(len(encoded_pred[0])):
    print(encoded_pred[0][pred])