In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.activations import linear, relu, sigmoid
from sklearn.decomposition import PCA

In [2]:
data = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [3]:
data.head(5)

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
X = data.drop('label', axis=1).to_numpy()
y = data['label'].to_numpy()
test = test.to_numpy()

In [5]:
test.shape

(28000, 784)

In [6]:
print(test.shape)

(28000, 784)


In [7]:
X = tf.keras.utils.normalize(X, axis=1)
test = tf.keras.utils.normalize(test, axis=1)

In [8]:
tf.random.set_seed(1234)
model = Sequential([
    tf.keras.Input(shape=(784,)),
    Dense(128, activation='relu', name='L1'),
    Dense(128, activation='relu', name='L2'),
    Dense(10, activation='linear', name='L3')
],name="my_model")

In [9]:
model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 L1 (Dense)                  (None, 128)               100480    
                                                                 
 L2 (Dense)                  (None, 128)               16512     
                                                                 
 L3 (Dense)                  (None, 10)                1290      
                                                                 
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________


In [10]:
model.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics=['accuracy']
)

In [11]:
X_train, X_val, y_train, y_val = train_test_split(X,y,test_size=0.2, random_state=23)

In [12]:
model.fit(X_train, y_train, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x22992ed88b0>

In [13]:
loss, accuracy = model.evaluate(X_val, y_val)



In [15]:
pred = model.predict(X_train[120].reshape(1, 784))
prediction_p = tf.nn.softmax(pred)
        
predictions = np.argmax(prediction_p)
print(y_train[120])
print(predictions)

7
7


In [16]:
def predict(X):
    m, n = X.shape
    predictions = np.zeros((m,))
    
    for i in range(m):
        prediction = model.predict(X[i].reshape(1, 784))
        prediction_p = tf.nn.softmax(prediction)
        
        predictions[i] = np.argmax(prediction_p)
            
    return predictions.astype(int)

In [None]:
import time

start_time = time.time()
val_preds = predict(X_val)
end_time = time.time()
prediction_time = end_time - start_time
print("Prediction time:", prediction_time, "seconds")

In [24]:
accuracy = np.mean(val_preds == y_val)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

Validation Accuracy: 98.00%


In [25]:
test_preds = predict(test)

In [26]:
test_ids = np.arange(1, 28001)
df = pd.DataFrame({"ImageId":test_ids,
                  "Label": test_preds})

In [27]:
df.to_csv("submission.csv", index=False)

In [6]:
num_components = 90

pca = PCA(n_components=num_components)
X = pca.fit_transform(X)
test_pca = pca.fit_transform(test)
# print(data_pca.explained_variance_)
# print(data_pca.explained_variance_ratio_)
# print(data_pca.explained_variance_ratio_.cumsum())

In [9]:
print(X)

[[-661.59577936 -699.31132683  183.28203653 ...    4.31048534
     7.64124098   54.9130869 ]
 [1701.45168402 -360.55155611 -501.80558915 ...    8.85265816
   -87.4183375     2.15722396]
 [-886.89443475 -293.76578147   67.15531552 ...   74.26583902
    27.11741001  -12.51245987]
 ...
 [ 336.2699819   950.01527986 -168.60541208 ...   24.70113351
    77.53218158  -33.11230308]
 [ 327.39939107   62.01608895  697.5363954  ...  -80.53008738
   -65.51460495  -12.41099851]
 [-364.24920036  418.89302463   62.89982111 ... -138.68973137
    99.32192553   52.27302086]]


In [8]:
print(X.shape)
print(y.shape)

(42000, 90)
(42000,)


In [10]:
X_train, X_val, y_train, y_val = train_test_split(X,y,test_size=0.2, random_state=23)

In [11]:
tf.random.set_seed(1234)
model = Sequential([
        tf.keras.Input(shape=(784,)),
        Dense(25, activation='relu', name='L1'),
        Dense(15, activation='relu', name='L2'),
        Dense(10, activation='linear', name='L3')
    ],name="my_model"
)

In [12]:
model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 L1 (Dense)                  (None, 25)                2275      
                                                                 
 L2 (Dense)                  (None, 15)                390       
                                                                 
 L3 (Dense)                  (None, 10)                160       
                                                                 
Total params: 2,825
Trainable params: 2,825
Non-trainable params: 0
_________________________________________________________________


In [13]:
[layer1, layer2, layer3] = model.layers

In [14]:
W1, b1 = layer1.get_weights()
W2, b2 = layer2.get_weights()
W3, b3 = layer3.get_weights()
print(f"W1 shape = {W1.shape}, b1 shape = {b1.shape}")
print(f"W2 shape = {W2.shape}, b2 shape = {b2.shape}")
print(f"W3 shape = {W3.shape}, b3 shape = {b3.shape}")

W1 shape = (90, 25), b1 shape = (25,)
W2 shape = (25, 15), b2 shape = (15,)
W3 shape = (15, 10), b3 shape = (10,)


In [15]:
model.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
)

history = model.fit(X_train, y_train, epochs=40)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [21]:
def predict(X):
    m, n = X.shape
    predictions = np.zeros((m,))
    
    for i in range(m):
        prediction = model.predict(X[i].reshape(1,90))
        prediction_p = tf.nn.softmax(prediction)
        
        predictions[i] = np.argmax(prediction_p)
            
    return predictions.astype(int)

In [22]:
prediction = model.predict(X_train[2964].reshape(1,90))
prediction_p = tf.nn.softmax(prediction)
        
predictions = np.argmax(prediction_p)
print(predictions)

8


In [23]:
data[2960:2965]

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
2960,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2961,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2962,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2963,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2964,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
val_preds = predict(X_val)

In [36]:
#y_train is the actual value
accuracy = np.mean(val_preds == y_val)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

Validation Accuracy: 94.71%


In [None]:
test_preds = predict(test_pca)

In [33]:
test_ids = np.arange(1, 28001)
df = pd.DataFrame({"ImageId":test_ids,
                  "Label": test_preds})

In [34]:
print(df)

       ImageId  Label
0            1      2
1            2      0
2            3      3
3            4      2
4            5      4
...        ...    ...
27995    27996      3
27996    27997      3
27997    27998      9
27998    27999      3
27999    28000      2

[28000 rows x 2 columns]
