In [1]:
import os
import io
import random
import numpy as np
import pandas as pd
from PIL import Image
import autokeras as ak
import tensorflow as tf
from numpy import asarray
from tensorflow import keras
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from tensorflow.keras.callbacks import CSVLogger
from sklearn.datasets import load_files
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold

# Create images array from knapsack images

In [None]:
#create images array
directory = "All-6Set/knapsack-images"
images = []
labels = []
import os
rootdir = 'All-6Set/knapsack-images'

for subdir, dirs, files in os.walk(rootdir):
    for file in files:        
        instance = os.path.join(subdir, file)
        if instance.endswith('.jpg'):
            im = Image.open(instance).convert('L')
            pix = im.load()
            width, height = im.size
            pixel_values = list(im.getdata())
          
            imgplot = plt.imshow(im,cmap='Greys_r')
            data = asarray(im)
            images.append(data)
          # summarize shape
images2 = np.asarray(images)
images2 = images2.reshape((images2.shape[0], 32, 32))
print(images2.shape)

In [None]:
#read the labels
npdf = pd.read_csv('All-6Set/leenaknapsack.csv')
source = npdf.Label
print(source.value_counts())

In [5]:
#normalise to values between 0 and 1
X = images2.astype('float32')
X /= 255
print('X_train shape:', X.shape)
y = source

X_train shape: (5000, 32, 32)


In [6]:
tf_callbacks = [
    tf.keras.callbacks.CSVLogger("TSPimagesTraining.csv", separator=",", append=False),
    tf.keras.callbacks.TensorBoard(log_dir='./logs',profile_batch = 100000000),
]

# Autkeras Model

In [None]:
# test Autokeras Model with few epochs
clf = ak.ImageClassifier(overwrite=False, max_trials=1)
# Supervised training of the model
clf.fit(X, y, epochs=3, callbacks=tf_callbacks)

In [9]:
model = clf.export_model()
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 32)]          0         
_________________________________________________________________
cast_to_float32 (CastToFloat (None, 32, 32)            0         
_________________________________________________________________
expand_last_dim (ExpandLastD (None, 32, 32, 1)         0         
_________________________________________________________________
normalization (Normalization (None, 32, 32, 1)         3         
_________________________________________________________________
conv2d (Conv2D)              (None, 30, 30, 32)        320       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 64)        18496     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 64)        0     

In [None]:
#train the model using 10 folds with 100 epoch per fold
from sklearn import metrics
from sklearn.metrics import accuracy_score,recall_score,precision_score

cv = KFold(n_splits=10, random_state=1, shuffle=True)
all_tests = []
all_predictions = []

for train_index, test_index in cv.split(X, y):
  #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf = ak.ImageClassifier(overwrite= False, max_trials=1)
  # Supervised training of the model
    print("Start training")
    clf.fit(X_train, y_train, epochs=100, callbacks=tf_callbacks)

    print("Prendictions on unseen data")
    predicted_y = clf.predict(X_test)
    all_tests.append(y_test)
    all_predictions.append(predicted_y)
    
    report = classification_report(y_test, predicted_y)
    print(report)

In [8]:
print(len(all_predictions[9]))
predictions = []
for sublist in all_predictions:
    for item in sublist:
        predictions.append(int(item))

tests = []
for sublist in all_tests:
    for item in sublist:
        tests.append(item)

500


In [None]:
report = classification_report(tests, predictions)
print(report)

In [10]:
from sklearn import metrics
from sklearn.metrics import accuracy_score,recall_score,precision_score
from sklearn.metrics import f1_score

print("Accuracy:",metrics.accuracy_score(predictions, tests))
precision = precision_score(predictions, tests)
recall = recall_score(predictions, tests)
f1_score = f1_score(predictions, tests)
print('Precision: %.3f' % precision)
print('Recall: %.3f' % recall)
print('F1-score: %.3f' % f1_score)

Accuracy: 0.8784
Precision: 0.960
Recall: 0.825
F1-score: 0.888
