In [1]:
import autokeras
import pandas
import numpy
import os
from PIL import Image

In [2]:
def load_dataset(dirname, label):
    X = []
    Y = []
    filenames = os.listdir(dirname)
    for filename in filenames:
        img = Image.open("{}/{}".format(dirname, filename))
        arr = numpy.array(img)
        X.append(arr)
        Y.append(label)
    return X, Y

In [3]:
X_0, Y_0 = load_dataset("data/0", 0)
X_1, Y_1 = load_dataset("data/1", 1)

In [4]:
train_rate = 0.5
train_X_0 = X_0[0:int(len(X_0)*train_rate)]
train_X_1 = X_1[0:int(len(X_1)*train_rate)]
train_Y_0 = Y_0[0:int(len(Y_0)*train_rate)]
train_Y_1 = Y_1[0:int(len(Y_1)*train_rate)]

test_X_0 = X_0[int(len(X_0)*train_rate):]
test_X_1 = X_1[int(len(X_1)*train_rate):]
test_Y_0 = Y_0[int(len(Y_0)*train_rate):]
test_Y_1 = Y_1[int(len(Y_1)*train_rate):]

train_X = numpy.r_[train_X_0, train_X_1]
train_Y = numpy.r_[train_Y_0, train_Y_1]
test_X = numpy.r_[test_X_0, test_X_1]
test_Y = numpy.r_[test_Y_0, test_Y_1]

In [5]:
train_Y[0:2]

array([0, 0])

In [6]:
train_X[0:2]

array([[[3, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 2, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]]], dtype=uint8)

In [7]:
model = autokeras.ImageClassifier()

In [8]:
print(len(train_X), len(train_Y))
print(train_X.shape)
print(train_Y.shape)

923 923
(923, 28, 28)
(923,)


In [9]:
model.fit(train_X, train_Y, time_limit=0.2*60*60)

In [10]:
model.cnn.searcher.history

[{'model_id': 0,
  'loss': 0.049363873898983,
  'metric_value': 0.9868421052631579},
 {'model_id': 1,
  'loss': 0.014610419981181622,
  'metric_value': 0.9868421052631579},
 {'model_id': 2,
  'loss': 0.026835805177688597,
  'metric_value': 0.9868421052631579},
 {'model_id': 3,
  'loss': 0.04497421458363533,
  'metric_value': 0.9868421052631579},
 {'model_id': 4,
  'loss': 0.04631352722644806,
  'metric_value': 0.9868421052631579},
 {'model_id': 5,
  'loss': 0.050224247574806216,
  'metric_value': 0.9868421052631579},
 {'model_id': 6,
  'loss': 0.051245345175266264,
  'metric_value': 0.9868421052631579},
 {'model_id': 7,
  'loss': 0.05921011939644814,
  'metric_value': 0.9868421052631579}]

In [11]:
model.cnn.best_model.produce_model()

TorchModel(
  (0): ReLU()
  (1): BatchNorm1d(28, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): Conv1d(28, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (3): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (4): ReLU()
  (5): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (7): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (8): ReLU()
  (9): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (11): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (12): GlobalAvgPool1d()
  (13): Dropout(p=0.25)
  (14): Linear(in_features=64, out_features=64, bias=True)
  (15): ReLU()
  (16): Linear(in_features=64, out_features=2, bias=True)
)

In [12]:
model.final_fit(
    train_X,
    train_Y,
    train_X,
    train_Y,
    retrain=False)

In [13]:
model.predict(test_X[0:3])

array([0, 0, 0])

In [14]:
# save
path = "model.hdf5"
model.export_autokeras_model(path)

In [15]:
output_model = autokeras.utils.pickle_from_file(path)

In [16]:
output_model.graph.layer_list.pop(-1)

<autokeras.nn.layers.StubDense at 0x21240b3b160>

In [17]:
vec_train_X = output_model.predict(train_X)
vec_test_X = output_model.predict(test_X)

In [18]:
pandas.DataFrame(vec_train_X).head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0.0,1.968101,0.0,2.118062,0.0,0.0,0.0,1.767442,0.0,0.0,...,0.0,0.0,0.016671,4.646978,3.238155,2.938803,0.0,0.0,0.757516,0.0
1,0.0,1.247167,0.0,1.373488,0.0,0.0,0.0,1.293689,0.0,0.0,...,0.0,0.0,0.0,3.191289,2.05538,1.990622,0.0,0.0,0.526455,0.0


In [19]:
from sklearn.linear_model import LogisticRegression

In [20]:
lr = LogisticRegression(solver="lbfgs")

In [21]:
lr.fit(vec_test_X, test_Y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)

In [22]:
lr.score(vec_test_X, test_Y)

0.9978378378378379