In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
# Importing data-set of alphabets from kaggle. In this dataset there are 372037 samples of images of size 28*28 each.
data = pd.read_csv("../input/handwritten_data_785.csv", encoding = 'utf8')

In the data frame the first columns is the target values. Since we are predicting alphabets, it has 26 values ranging from 0 to 25. Each of the number relates to its corresponding alphabets. For example, 0 would be A, 1 would be B, and so on. 

Each row corresponds to an image, since the image pixel is 28 x 28, we have 28*28 = 784 numbers which is number of columns in each row.

The reason each image is of 28*28 pixels is because doing computation on this resolution is relatively easy. More resolution means more computation time.

In [None]:
target = data.iloc[:,0].values.reshape(-1,1) # reshaping from a tensor to a matrix
features = data.iloc[:, 1:]

In [None]:
target.shape #labels

In [None]:
features.shape #images

In [None]:
# Visualizing dataset
import string
display_features = data.values[:, 1:]
display_labels = data.values[:,0]
nr_to_letter = {k:v.upper() for k,v in enumerate(list(string.ascii_lowercase))}
display_features = display_features.reshape(len(display_features), 28, 28)
plt.title('Alphabet '+ nr_to_letter[display_labels[4]])
plt.imshow(display_features[4])

The model below is a simple neural network that uses 3 hidden layers softmax regression.

In [None]:
# Using keras which is a neural network library for python
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.models import Model

In [None]:
# Splitting into training and testing
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=42)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255 #removing rgb channels i.e. converting into b & w
X_test /= 255
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
inputs = X_train.shape[1]
print(inputs)

In [None]:
#Building the model
def logit_model(inputs): #inputs are size of image 
    model = Sequential()
    model.add(Dense(512, activation='relu', input_dim= 28*28)) # Hidden layer 1
    model.add(Dense(256, activation='relu', input_dim=512)) # Hidden layer 2: Hidden layer 1's nodes are forwaded to this hidden layer 2 as inputs
    model.add(Dense(128, activation='relu', input_dim=256)) # Hidden layer 3: Hidden layer 2's nodes are passed to hidden layer 3 as inputs
    model.add(Dense(26, activation='softmax', input_dim=128)) # Output layer : the number of nodes here will be 26 as we have 26 classes for alphabets
    return model                                                            # and the number of nodes in hidden layer 3 are passed as inputs for output layer

**Input Layer:**
Only one layer is input layer. The input layer is specified as a parameter to the first Dense object's constructor which in our case is 28x28 = 784. In case we had 100x100 images input_dim would have been 10,000.

**Hidden Layer:**
The number of layers and nodes in hidden layer again are not fixed. You start with few and gradually increase both until you reach a capped accuracy.

**Output Layer:**
Same as input, output has only one layer and number of nodes here are dependant on the problem for our case it is 26 as we have 26 classes.

For each example, the model returns a vector of log-odds scores(logarithm of the odds of some event) one for each of the class. So we need to use softmax to convert these scores into probabilities for each class.

In [None]:
log_reg = logit_model(inputs)

In [None]:
log_reg.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Using sparse categorical cross entropy (as we have many categories similarly for binary we'd use binary cross entropy)

In [None]:
model = log_reg.fit(X_train, y_train, epochs = 5, batch_size = 128)

In [None]:
# for validation data-set we use evaluate function
test_score = log_reg.evaluate(X_test, y_test)

In [None]:
print('Test cost of unseen sample: ' + str(test_score[0]))
print('Test cost of unseen sample: ' + str(test_score[1]))

In [None]:
plt.plot(range(len(model.history['loss'])), model.history['loss'], label='Training cost')
plt.title('Training Cost')


In [None]:
plt.plot(range(len(model.history['acc'])), model.history['acc'], 'r', label='Training Accuracy')
plt.title('Training Accuracy')