In [42]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import xgboost as xgb


In [43]:
data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
sample = pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')

In [44]:
m, n = data.shape

test = np.array(test)
data_test = test[:m].T
X_test = data_test
X_test = X_test / 255.


y = data.pop("label")
X = data
X = X/255.


In [45]:
X_train, X_val, y_train, y_val = train_test_split(X.values, y.values, test_size=0.2, random_state=21, stratify=y)
X_train = X_train.T
X_val = X_val.T

In [64]:
X_train.shape

In [46]:
def init_params():
    W1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2

def ReLU(Z):
    return np.maximum(Z, 0)

def softmax(Z2, t):
    
    exp_values = np.exp((Z2 - np.max(Z2, axis=0, keepdims=True))/t)
    A = exp_values / np.sum(exp_values, axis=0, keepdims=True)
#     print(A.shape)
    return A

def forward_prop(W1, b1, W2, b2, X, t):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2, t)
    return Z1, A1, Z2, A2

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def deriv_ReLU(Z):
    return Z > 0 

def back_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    m = Y.size
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * deriv_ReLU(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    return W1, b1, W2, b2
    

In [47]:
def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size


def gradient_descent(X, Y, iterations, alpha, t):
    W1, b1, W2, b2 = init_params()
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X, t)
        dW1, db1, dW2, db2 = back_prop(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 100 == 0:
            print("Iteration: ", i)
            predictions = get_predictions(A2)
            print("Accuracy: ", get_accuracy(get_predictions(A2), Y))
    return W1, b1, W2, b2, Z2

In [48]:
def make_predictions(X, W1, b1, W2, b2):
    t = 1
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X, t)
    predictions = get_predictions(A2)
    return predictions

def test_prediction(index, W1, b1, W2, b2):
    current_image = X_train[:, index, None]
    prediction = make_predictions(X_train[:, index, None], W1, b1, W2, b2)
    label = y_train[index]
    print("Prediction: ", prediction)
    print("Label: ", label)
    
    current_image = current_image.reshape((28, 28)) * 255
    plt.gray()
    plt.imshow(current_image, interpolation='nearest')
    plt.show()

In [49]:
W1, b1, W2, b2, Z2 = gradient_descent(X_train, y_train, 2000, 0.6, 30)

In [50]:
Z2.shape

In [51]:
Z2.T[0]

array([ 315.1284298 , -273.15235939,   83.11972464,  198.28001066,
       -140.68093094,  183.6227538 , -307.09620664,   75.82172375,
        -20.97230497,  -99.50242689])


In [59]:
from sklearn.manifold import TSNE
tsne = TSNE(random_state=1, n_iter=800, metric="cosine")

embs_01 = tsne.fit_transform(Z2.T)
# Add to dataframe for convenience


In [60]:
dfx = pd.DataFrame()
dfx['x'] = embs_01[:, 0]
dfx['y'] = embs_01[:, 1]
dfx['label'] = y_train

In [62]:
import seaborn as sns
sns.set(rc={'figure.figsize':(11.7,8.27)})

sns.scatterplot(data=dfx, x='x', y='y', alpha=0.5, hue='label', palette='tab10').set(title='t=30, lr=0.6, epoches=2000, tsne=800')
plt.savefig('img1.png')


In [55]:
dev_predictions = make_predictions(X_val, W1, b1, W2, b2)
get_accuracy(dev_predictions, y_val)


* t = 0.8 0.8013
* t = 0.9 0.8400
* t = 1 0.8133333333333334
* t = 1.1 0.835 0.8532142857142857


In [56]:
test = np.array(test)
test = test.T
test = test/255.

In [57]:
test_predictions = make_predictions(test, W1, b1, W2, b2)

df = pd.DataFrame(test_predictions)
sample['Label'] = test_predictions
sample.to_csv('submmd30.csv', index=False)

In [58]:
from sklearn.metrics import confusion_matrix, classification_report
print(classification_report(y_val, dev_predictions))