In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
#Training Sets
X = pd.read_csv('/kaggle/input/font-recognition/train_data.csv')
y = pd.read_csv('/kaggle/input/font-recognition/train_labels.csv')
#Testing Sets
ActualX = pd.read_csv('/kaggle/input/font-recognition/test_data.csv')

#Convert train_labels from string -> numbers
vals_to_replace = {'ARIAL':0, 'TIMES':1, 'SERIF':2, 'CAMBRIA': 3, 'CALIBRI': 4,'TAHOMA': 5 }
y['Font'] = y['Font'].map(vals_to_replace)

#Split training data into Training, Testing subsets; Introduce randomness in them with shuffle and random_state
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True,random_state=100)

In [None]:
#To preserve orginal y_test shape for accuracy_score during Test
aftery_test = y_test 

# Segregate Data Types:
1. image data
2. categorical data (Boolean)
3. numerical data

In [None]:
X_train_rc = X_train.iloc[:, 7:len(X_train)]/255
X_train_cat = X_train.iloc[:, 1:3]
X_train_num = X_train.iloc[:, np.r_[0,3:7]]

X_test_rc = X_test.iloc[:, 7:len(X_test)]/255
X_test_cat = X_test.iloc[:, 1:3]
X_test_num = X_test.iloc[:, np.r_[0,3:7]]

ActualX_rc = ActualX.iloc[:, 7:len(ActualX)]/255
ActualX_cat = ActualX.iloc[:, 1:3]
ActualX_num = ActualX.iloc[:, np.r_[0,3:7]]

### 1. Preprocess Image Data

In [None]:
import skimage
import skimage.feature

#creating empty arrays
ed_X_train_rc = np.zeros((len(X_train_rc),20,20))
ed_X_test_rc = np.zeros((len(X_test_rc),20,20))
ed_ActualX_rc = np.zeros((len(ActualX_rc),20,20))

def imgprep(df, newarr):
    for i in range(len(df)):
        img= np.array(df.iloc[i]).reshape(20,20)
        ed = skimage.feature.canny(image= img,sigma = 0.15)
        newarr[i] = ed    

#training set, testing set, actual test set
imgprep(X_train_rc, ed_X_train_rc)
imgprep(X_test_rc, ed_X_test_rc)
imgprep(ActualX_rc, ed_ActualX_rc)

In [None]:
#rescale for Inputing into NN
img_X_train = ed_X_train_rc.reshape(45500,20,20,1)  #shape rescaling
img_X_test = ed_X_test_rc.reshape(19500,20,20,1)
img_ActualX = ed_ActualX_rc.reshape(29221,20,20,1)

### 2. Preprocessing 2) Catagorical + 3) Numerical Data
    * for X_train_cat["strength"] -> [0.4, 0.7]; w count [25568, 19932]
    * for X_train_cat["italic"] -> [0,1]; w count [26305, 19195]
    
    
    
    

In [None]:
# Change 2) Categorical Data into [0,1] - i.e. only 'strength' column
str_bool_replace = {0.4:0, 0.7:1}
X_train_cat["strength"] = X_train_cat["strength"].map(str_bool_replace)
X_test_cat["strength"] = X_test_cat["strength"].map(str_bool_replace)
ActualX_cat["strength"] = ActualX_cat["strength"].map(str_bool_replace)

In [None]:
# Change 3) Numerical Data into the range [0,1]
from sklearn.preprocessing import MinMaxScaler
traincs = MinMaxScaler(feature_range =(0, 1))
X_train_num = traincs.fit_transform(X_train_num)
X_test_num = traincs.transform(X_test_num)  
ActualX_num = traincs.transform(ActualX_num)  

In [None]:
#Concatenating 2) Categorical and 3) Numerical Tgt
X_train_core = np.hstack([X_train_num, X_train_cat])
X_test_core = np.hstack([X_test_num, X_test_cat])
ActualX_core = np.hstack([ActualX_num, ActualX_cat])

# Neural Network Model

## Multi-Layer Perceptron Layer for Categorical + Numerical Data

### Prep output layer for NN

In [None]:
from  keras.utils import np_utils
nny_train = np_utils.to_categorical(y_train)
nny_test = np_utils.to_categorical(y_test)

In [None]:
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten, MaxPool2D, ZeroPadding2D, AveragePooling2D
from keras.layers import Conv2D, BatchNormalization, Input, concatenate, Add   

In [None]:
def mlp(core_inputs):
    x = Dense(64,activation="relu")(core_inputs)  #after passing input in layer -> x = output
    core_outputs = Dense(6,activation="softmax")(x)
    return core_outputs

## CNN for Image Pixel Data

In [None]:
def cnn(img_inputs):
    x = Conv2D(36,kernel_size=5,activation='relu')(img_inputs)
    x = BatchNormalization()(x)
    x = Dropout(0.1)(x)
    
    x = Conv2D(64,kernel_size=5,strides=1,padding='same',activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.1)(x)    
    
    x = Flatten()(x)
    x = Dense(72, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.1)(x)
    
    img_outputs = Dense(6, activation='softmax', kernel_regularizer='l2')(x)

    
    return img_outputs

## Combine them tgt!

In [None]:
from keras.layers import Lambda
from keras.optimizers import Adam

core_inputs = Input(shape = (7))
mlp_NN = mlp(core_inputs)
img_inputs = Input(shape=(20,20,1))

cnn_NN = cnn(img_inputs)
combineInput = concatenate([mlp_NN, cnn_NN])

final = Dense(units = 256, activation = "relu")(combineInput)
output_final = Dense(units = 6, activation = "softmax")(final)

model = Model(inputs = [core_inputs, img_inputs], outputs = [output_final])   
model.compile(optimizer=Adam(lr=0.01), loss="categorical_crossentropy", metrics=["accuracy"])

# Traing the model!

In [None]:
from keras.callbacks import LearningRateScheduler
annealer = LearningRateScheduler(lambda x: 0.01 * 0.95 ** x) #decrease lr by 0.95 per epoch

model.fit([X_train_core, img_X_train], nny_train,
          batch_size=32,
          epochs=50,
          validation_data=([X_test_core, img_X_test],  nny_test), callbacks=[annealer])

# Predict Test Data - Test for Accuracy

In [None]:
results = np.zeros((X_test.shape[0],6)) 
results = results + model.predict([X_test_core, img_X_test])
results = np.argmax(results,axis = 1)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(aftery_test,results)

# Actual Submision Data

In [None]:
actualresults = np.zeros((29221,6)) 
actualresults = 29221 + model.predict([ActualX_core, img_ActualX])
actualresults = np.argmax(actualresults,axis = 1)

In [None]:
#Convert labels from 'int' back to 'str'
stractualresults = []
for i in range(len(actualresults)):
    if actualresults[i] == 0:
        stractualresults.append('ARIAL')
    elif actualresults[i] == 1:
        stractualresults.append('TIMES')
    elif actualresults[i] == 2:
        stractualresults.append('SERIF')
    elif actualresults[i] == 3:
        stractualresults.append('CAMBRIA')
    elif actualresults[i] == 4: 
        stractualresults.append('CALIBRI')
    elif actualresults[i] == 5:
        stractualresults.append('TAHOMA')

In [None]:
pdresults = pd.Series(stractualresults,name="Font")
submission = pd.concat([pd.Series(range(1,29222),name = "ID"),pdresults],axis = 1)
submission.to_csv("mix_6.csv",index=False)
submission

# Attempted Used of Transfer Learning (resnet50) that ended up never being incorporated into the CNN

In [None]:
import cv2
colored_img_x_train = []
for i in range(len(img_X_train)):
    a = img_X_train[i]
    colored_img_x_train.append( cv2.merge((a,a,a)) )


colored_img_x_test = []
for i in range(len(img_X_test)):
    a = img_X_test[i]
    colored_img_x_test.append( cv2.merge((a,a,a)) )


print(np.shape(colored_img_x_test))


import tensorflow.keras as K
input_img = K.Input(shape=(20,20,3))
res_model = K.applications.ResNet50(include_top=False,
                                        weights="imagenet",
                                        input_tensor=input_img)

for layer in res_model.layers[:39]:
    layer.trainable = False

for i, layer in enumerate(res_model.layers):
    print(i, layer.name, "-", layer.trainable)
    
to_res = (20, 20)
model = K.models.Sequential()
model.add(K.layers.Lambda(lambda image: tf.image.resize(image, to_res))) 
model.add(res_model)
model.add(K.layers.Flatten())
model.add(K.layers.BatchNormalization())
model.add(K.layers.Dense(256, activation='relu'))
model.add(K.layers.Dropout(0.5))
model.add(K.layers.BatchNormalization())
model.add(K.layers.Dense(6, activation='softmax'))

import tensorflow as tf 
from keras.optimizers import Adam
model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(lr=0.01),
                  metrics=['accuracy'])

model.fit(np.array(colored_img_x_train), nny_train,
          batch_size=32,
          epochs=30,
          validation_data=(np.array(colored_img_x_test),  nny_test))