In [None]:
!pip install autokeras

In [2]:
import os
import random
import time
import numpy as np
import pandas as pd
from PIL import Image
import autokeras as ak
import tensorflow as tf
from numpy import asarray
from tensorflow import keras
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from tensorflow.keras.callbacks import CSVLogger
from sklearn.datasets import load_files
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold



In [None]:
!mkdir datasets
!unzip "/All-6Set.zip" -d "datasets"


Each of the 4 cells below is a different experiment

In [None]:
## ALL at once SETUP
directory = "/content/datasets"
domain = "ALL"

In [10]:
## TSP SETUP
!mkdir CHRvsGREEDY
!mv datasets/CHR datasets/GREEDY/ CHRvsGREEDY

directory = "/content/CHRvsGREEDY"
domain = "TSP"

mkdir: cannot create directory ‘CHRvsGREEDY’: File exists
mv: cannot stat 'datasets/CHR': No such file or directory
mv: cannot stat 'datasets/GREEDY/': No such file or directory


In [4]:
## KP SETUP
!mkdir EXPKNAPvsCOMBO
!mv datasets/Expknap datasets/Combo EXPKNAPvsCOMBO

directory = "/content/EXPKNAPvsCOMBO"
domain = "KP"

In [22]:
## GC SETUP
!mkdir LFvsDSATUR
!mv datasets/LF datasets/DSATUR/ LFvsDSATUR

directory = "/content/LFvsDSATUR"
domain = "GC"

mkdir: cannot create directory ‘LFvsDSATUR’: File exists


In [5]:
data = load_files(directory)

mixer = list(zip(data.filenames, data.data, data.target))
random.shuffle(mixer)

names, X, y = zip(*mixer)

X = np.array(X)
y = np.array(y)

In [6]:
## Keeps only the folder name and instance name. The folder name is just a precaution to ensure labels are carried on correctly
names = ['/'.join(name.split("/")[-2:]) for name in names]


In [None]:
#df = pd.DataFrame(names,y)

In [7]:
npnames = np.array(names)

In [None]:
%load_ext tensorboard

In [8]:
tf_callbacks = [
    tf.keras.callbacks.CSVLogger("Text-Training.csv", separator=",", append=False),
    tf.keras.callbacks.TensorBoard(log_dir='./logs',profile_batch = 100000000),
]

In [9]:
input_node = ak.TextInput()
output_node = ak.TextBlock(block_type="sequence")(input_node)
output_node = ak.ClassificationHead()(output_node)


In [10]:
trained_models_path = f'TrainedModels{domain}Text'
!mkdir $trained_models_path

In [None]:
cv = KFold(n_splits=10, random_state=1 ,shuffle=True)
all_tests = []
all_predictions = []
reports = []
results_df = pd.DataFrame()
folds=0
for train_index, test_index in cv.split(X, y):
  #print("TRAIN:", train_index, "TEST:", test_index)
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]

  clf = ak.AutoModel(inputs=input_node, outputs=output_node, overwrite=False, max_trials=1)
  #clf = ak.TextClassifier(overwrite=True, max_trials=1)
  # Supervised training of the model
  print("Start training")

  clf.fit(X_train, y_train, epochs=100, callbacks=tf_callbacks)
  print("Prendictions on unseen data")
  predicted_y = clf.predict(X_test)
  results_df['fold'+str(folds)] = npnames[test_index]
  results_df['fold'+str(folds)+ '-Y'] = y_test
  results_df['fold'+str(folds)+ '-Prediction'] = predicted_y

  all_tests.append(y_test)
  all_predictions.append(np.array(predicted_y, dtype=int).flatten())
  report = classification_report(y_test, np.array(predicted_y, dtype=int).flatten())
  reports.append(report)
  print(report) 
  time.sleep(13)
  clf.export_model().save(trained_models_path+'/TrainedModel-Fold'+str(folds))
  folds+=1;

In [None]:
!zip -r trained_models $trained_models_path
!zip -r ak-logs.zip logs

In [None]:
results_path = f'{domain}TextResults.csv'
results_df.to_csv(results_path)

import shutil

gdrive_link = f'/{domain}Text/'
os.makedirs(os.path.dirname(gdrive_link), exist_ok=True)
shutil.copy(results_path, gdrive_link)
shutil.copy("trained_models.zip", gdrive_link)
shutil.copy("ak-logs.zip", gdrive_link)


In [None]:
predictions = []
for sublist in all_predictions:
    for item in sublist:
        predictions.append(int(item[0]))

tests = []
for sublist in all_tests:
    for item in sublist:
        tests.append(item)

In [None]:

print(report)

              precision    recall  f1-score   support

           0       0.72      0.35      0.47       818
           1       0.58      0.87      0.70       848

    accuracy                           0.62      1666
   macro avg       0.65      0.61      0.59      1666
weighted avg       0.65      0.62      0.59      1666



In [None]:
exported_model = clf.export_model()

In [None]:
exported_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None,)]                 0         
                                                                 
 expand_last_dim (ExpandLast  (None, 1)                0         
 Dim)                                                            
                                                                 
 text_vectorization (TextVec  (None, 64)               0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, 64, 128)           640128    
                                                                 
 dropout (Dropout)           (None, 64, 128)           0         
                                                                 
 conv1d (Conv1D)             (None, 62, 32)            12320 

In [None]:
exported_model.save("our_model2")

In [None]:
loaded_model = tf.keras.models.load_model("our_model2", custom_objects=ak.CUSTOM_OBJECTS)

In [None]:
loadedpredictions = clf.predict(X_test)



In [None]:
loaded_model.fit(X_train, y_train, epochs=3, callbacks=tf_callbacks)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f4a30681e10>

In [None]:
df = pd.DataFrame(report).transpose()
df.to_latex('TSPimages.tex')


In [None]:
%tensorboard --logdir logs

In [None]:
!zip -r logs.zip logs

In [None]:
!pip install tensorboard-reducer


In [None]:
!tb-reducer logs* reduced_logs -r mean,std,min,max
