## AutoML with AutoKeras
resource: [Jeff Heaton](https://github.com/jeffheaton/t81_558_deep_learning/blob/master/t81_558_class_14_01_automl.ipynb)

In [1]:
import tensorflow as tf
import keras_preprocessing
import numpy as np
import pandas as pd
import os 
import glob
import tqdm
from PIL import Image

In [2]:
!pip install autokeras

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting autokeras
  Downloading autokeras-1.0.19-py3-none-any.whl (162 kB)
[K     |████████████████████████████████| 162 kB 6.3 MB/s 
Collecting keras-tuner>=1.1.0
  Downloading keras_tuner-1.1.2-py3-none-any.whl (133 kB)
[K     |████████████████████████████████| 133 kB 44.8 MB/s 
Collecting kt-legacy
  Downloading kt_legacy-1.0.4-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner, autokeras
Successfully installed autokeras-1.0.19 keras-tuner-1.1.2 kt-legacy-1.0.4


In [3]:
import autokeras

#### Download and preprocess data

In [4]:
URL = 'https://github.com/jeffheaton/data-mirror/'
DOWNLOAD_SOURCE = URL + 'releases/download/v1/paperclips.zip'
DOWNLOAD_NAME = DOWNLOAD_SOURCE[DOWNLOAD_SOURCE.rfind('/')+1:]

COLAB = True

if COLAB:
    PATH = '/content'
else:
    PATH = '/data'
    
EXTRACT_TARGET = os.path.join(PATH, 'clips')
SOURCE = os.path.join(PATH, 'paperclips')

!wget -O {os.path.join(PATH,DOWNLOAD_NAME)} {DOWNLOAD_SOURCE}
!mkdir -p {SOURCE}
!mkdir -p {TARGET}
!mkdir -p {EXTRACT_TARGET}
!unzip -o -j -d {SOURCE} {os.path.join(PATH, DOWNLOAD_NAME)} >/dev/null

df_train = pd.read_csv(os.path.join(SOURCE, 'train.csv'))
df_train['filename'] = 'clips-' + df_train.id.astype(str) + '.jpg'

df_train = df_train[0:1000]

--2022-07-04 11:54:52--  https://github.com/jeffheaton/data-mirror/releases/download/v1/paperclips.zip
Resolving github.com (github.com)... 140.82.114.3
Connecting to github.com (github.com)|140.82.114.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/408419764/25830812-b9e6-4ddf-93b6-7932d9ef5982?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20220704%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20220704T115452Z&X-Amz-Expires=300&X-Amz-Signature=555fbc87b32bf0f20697c8c2abb9a1afaf705776bc11d743cf0cd0b44c9402d7&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=408419764&response-content-disposition=attachment%3B%20filename%3Dpaperclips.zip&response-content-type=application%2Foctet-stream [following]
--2022-07-04 11:54:52--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/408419764/25830812-b9e6-4ddf-93b6-7932d9ef5982?X-Amz-Al

In [5]:
IMG_SHAPE = (128, 128)

def load_images(files, img_shape):
    count = len(files)
    x = np.zeros((count,) + img_shape + (3,))
    i = 0
    for file in tqdm.tqdm(files):
        img = Image.open(file)
        img = img.resize(img_shape)
        img = np.array(img)
        img = img/255.
        x[i,:,:,:] = img
        i+=1
    return x

images = [os.path.join(SOURCE, x) for x in df_train.filename]
x = load_images(images, IMG_SHAPE)
y = df_train.clip_count.values

100%|██████████| 1000/1000 [00:04<00:00, 223.65it/s]


#### Train with AutoKeras

In [6]:
MAX_TRIALS = 2
SEED = 42
VAL_SPLIT = 0.1
EPOCHS = 500
BATCH_SIZE = 32

In [7]:
auto_reg = autokeras.ImageRegressor(overwrite=True, max_trials=MAX_TRIALS, seed=42)

auto_reg.fit(x, y, validation_split=VAL_SPLIT, batch_size=BATCH_SIZE, epochs=EPOCHS)

print(auto_reg.evaluate(x, y))

Trial 2 Complete [00h 04m 22s]
val_loss: 36.51014709472656

Best val_loss So Far: 36.51014709472656
Total elapsed time: 00h 39m 26s
INFO:tensorflow:Oracle triggered exit
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 6

In [8]:
model = auto_reg.export_model()
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128, 128, 3)]     0         
                                                                 
 cast_to_float32 (CastToFloa  (None, 128, 128, 3)      0         
 t32)                                                            
                                                                 
 resnet50 (Functional)       (None, None, None, 2048)  23587712  
                                                                 
 flatten (Flatten)           (None, 32768)             0         
                                                                 
 regression_head_1 (Dense)   (None, 1)                 32769     
                                                                 
Total params: 23,620,481
Trainable params: 32,769
Non-trainable params: 23,587,712
____________________________________________

#### export model

In [10]:
try:
    model.save('model_autokeras', save_format='tf')
except:
    model.save('model_autokeras.h5')
    
loaded_model = tf.keras.models.load_model('model_autokeras', custom_objects=autokeras.CUSTOM_OBJECTS)

print(loaded_model.evaluate(x, y))

INFO:tensorflow:Assets written to: model_autokeras/assets
[42.931114196777344, 42.931114196777344]
