In [2]:
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import MaxAbsScaler, StandardScaler
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import train_test_split, StratifiedKFold
import numpy as np
import pandas as pd
import keras_tuner as kt
from sklearn import metrics
import datetime
import re
import os
import seaborn as sns

In [3]:
train_df = pd.read_excel('fonts_training.xlsx')
test_df = pd.read_excel('fonts_test.xlsx')

In [4]:
letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

In [5]:
train_df.columns

Index(['Cat 1', 'Cat 2', 'Cat 3', 'Cat 4', 'Cat 5', 'Cat 6', 'Cat 7', 'Cat 8',
       'Cat 9', 'Cat 10', 'Cat 11', 'Cat 12', 'Cat 13', 'Cat 14', 'A', 'B',
       'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
       'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'],
      dtype='object')

In [6]:
x_train_full = train_df[['Cat 1', 'Cat 2', 'Cat 3', 'Cat 4', 'Cat 5', 'Cat 6', 'Cat 7', 'Cat 8',
       'Cat 9', 'Cat 10', 'Cat 11', 'Cat 12', 'Cat 13', 'Cat 14']].to_numpy()
y_train_full = train_df[['A', 'B',
       'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
       'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']].to_numpy()

In [7]:
x_scaler = StandardScaler().fit(x_train_full)
x_sc_train_full = x_scaler.transform(x_train_full)

In [8]:
def custom_split(x, y):
    jump = 4
    jp_cnt = 0
    counter = 0
    valid_idx = []
    
    while counter < 78:
        valid_idx.append(counter)
        
        counter += jump
        
        jp_cnt += 1
        
        if jp_cnt == 2 and jump == 4:
            jump = 1
        elif jump == 1:
            jump = 4
            jp_cnt = 0
    
    x_train, x_valid = [], []
    y_train, y_valid = [], []
    
    for i in range(78):
        if i in valid_idx:
            x_valid.append(x[i])
            y_valid.append(y[i])
        else:
            x_train.append(x[i])
            y_train.append(y[i])
            
    return np.array(x_train), np.array(x_valid), np.array(y_train), np.array(y_valid)

In [9]:
x_train, x_valid, y_train, y_valid = custom_split(x_sc_train_full, y_train_full)

In [10]:
def build_model(n_units, n_layers, learning_rate, dropout=False):
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=(14,), name='input_layer'))
    
    for i in range(n_layers):
        model.add(keras.layers.Dense(units=n_units, activation='relu', name=f'hidden_layer_{i}'))
        
    if dropout:
        model.add(keras.layers.Dropout(rate=.5, name='dropout_layer'))
        
    model.add(keras.layers.Dense(units=26, activation='softmax', name='output_layer'))
    
    model.compile(
        loss='categorical_crossentropy',
        optimizer='adam',
        learning_rate=learning_rate,
        metrics=['accuracy']
    )
    
    return model

In [14]:
def tune_and_build_model(hp):
    n_units = hp.Int('units', min_value=2, max_value=20, step=1)

    n_layers = hp.Int('layers', min_value=1, max_value=6, step=1)

    # dropout = hp.Boolean('dropout')

    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=.1, sampling='log')

    # model = build_model(n_units, n_layers, learning_rate, dropout=dropout)
    model = build_model(n_units, n_layers, learning_rate)

    return model

In [15]:
CALLBACKS = [keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5)]

# tuner = kt.BayesianOptimization(
#     hypermodel=tune_and_build_model,
#     objective='val_accuracy',
#     max_trials=20,
#     overwrite=True,
#     directory='tuning',
#     project_name='fr_bayesian'
# )

tuner = kt.RandomSearch(
    hypermodel=tune_and_build_model,
    objective='val_accuracy',
    max_trials=50,
    executions_per_trial=1,
    overwrite=True,
    directory='tuning',
    project_name='fr_random'
)

In [16]:
tuner.search(x_train, y_train, epochs=200, validation_data=(x_valid, y_valid))

Trial 50 Complete [00h 00m 10s]
val_accuracy: 0.807692289352417

Best val_accuracy So Far: 0.9230769276618958
Total elapsed time: 00h 08m 16s
INFO:tensorflow:Oracle triggered exit


In [21]:
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
model = tune_and_build_model(best_hp)

In [22]:
model.save('./models/fr_22_02_16_17_16')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: ./models/fr_22_02_16_17_16/assets


2022-02-23 17:16:34.214276: W tensorflow/python/util/util.cc:299] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


In [23]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_layer_0 (Dense)       (None, 13)                195       
_________________________________________________________________
hidden_layer_1 (Dense)       (None, 13)                182       
_________________________________________________________________
hidden_layer_2 (Dense)       (None, 13)                182       
_________________________________________________________________
hidden_layer_3 (Dense)       (None, 13)                182       
_________________________________________________________________
output_layer (Dense)         (None, 26)                364       
Total params: 1,105
Trainable params: 1,105
Non-trainable params: 0
_________________________________________________________________


In [24]:
tuner.results_summary()

Results summary
Results in tuning/fr_random
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
units: 13
layers: 4
learning_rate: 0.0014303730655063707
Score: 0.9230769276618958
Trial summary
Hyperparameters:
units: 15
layers: 4
learning_rate: 0.005026337341661121
Score: 0.8461538553237915
Trial summary
Hyperparameters:
units: 19
layers: 2
learning_rate: 0.0013425570956427056
Score: 0.8461538553237915
Trial summary
Hyperparameters:
units: 19
layers: 3
learning_rate: 0.0012078661593516366
Score: 0.8461538553237915
Trial summary
Hyperparameters:
units: 19
layers: 2
learning_rate: 0.004671621645754273
Score: 0.8461538553237915
Trial summary
Hyperparameters:
units: 19
layers: 4
learning_rate: 0.03779359776404598
Score: 0.807692289352417
Trial summary
Hyperparameters:
units: 18
layers: 4
learning_rate: 0.0002792819179347644
Score: 0.807692289352417
Trial summary
Hyperparameters:
units: 16
layers: 4
learning_rate: 0.0032123213571070538
Score