In [1]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [2]:
DATA_PATH = "letter-recognition.data"
HEADERS = [
    "lettr",
    "x-box",
    "y-box",
    "width",
    "high",
    "onpix",
    "x-bar",
    "y-bar",
    "x2bar",
    "y2bar",
    "xybar",
    "x2ybr",
    "xy2br",
    "x-ege",
    "xegvy",
    "y-ege",
    "yegvx",
]

In [3]:
data = pd.read_csv(DATA_PATH, header=None, names=HEADERS)

In [4]:
data.head()

Unnamed: 0,lettr,x-box,y-box,width,high,onpix,x-bar,y-bar,x2bar,y2bar,xybar,x2ybr,xy2br,x-ege,xegvy,y-ege,yegvx
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


In [5]:
data["lettr"] = data["lettr"].apply(lambda x: ord(x) - 65)

In [6]:
x_train, x_test, y_train, y_test = train_test_split(
    data[HEADERS[1:]], data[HEADERS[0]], test_size=0.20, random_state=42
)

In [7]:
x_train = tf.keras.utils.normalize(x_train.values, axis=1)
x_test = tf.keras.utils.normalize(x_test.values, axis=1)

In [8]:
model = tf.keras.models.Sequential(
    [
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dense(256, activation="relu"),
        tf.keras.layers.Dense(26, activation="softmax"),
    ]
)
model.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
history = model.fit(x_train, y_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [9]:
model.evaluate(x_test, y_test)



[0.2650868892669678, 0.9207500219345093]

In [10]:
knn = KNeighborsClassifier()
knn.fit(x_train, y_train)

In [11]:
y_pred = knn.predict(x_test)

In [12]:
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.97      0.99      0.98       149
           1       0.84      0.95      0.90       153
           2       0.95      0.92      0.93       137
           3       0.88      0.96      0.92       156
           4       0.92      0.93      0.93       141
           5       0.88      0.96      0.91       140
           6       0.92      0.94      0.93       160
           7       0.88      0.81      0.84       144
           8       0.96      0.93      0.94       146
           9       0.94      0.95      0.95       149
          10       0.90      0.86      0.88       130
          11       0.99      0.97      0.98       155
          12       0.98      0.97      0.98       168
          13       0.98      0.94      0.96       151
          14       0.94      0.96      0.95       145
          15       0.97      0.89      0.93       173
          16       0.96      0.95      0.96       166
          17       0.88    

In [13]:
param_grid = {"n_neighbors": [3, 5, 7, 9, 11, 13, 15]}
grid = GridSearchCV(knn, param_grid, n_jobs=-1)
grid.fit(x_train, y_train)

In [14]:
print(grid.best_params_)

{'n_neighbors': 3}
