# Imports

In [42]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, make_scorer
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

In [43]:
from random import seed
import time

RANDOM_SEED = int(time.time())

seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# Load data

In [44]:
df_train = pd.read_csv('csv/train.csv')
labels_train = df_train['label']
pixels_train = df_train.drop(labels=['label'], axis=1)
pixels_train.head(5)

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [45]:
pixels_train.shape

(42000, 784)

In [46]:
pixels_test = pd.read_csv('csv/test.csv')
pixels_test.head(5)

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [47]:
train_indices, valid_indices = train_test_split(
    pixels_train.index,
    test_size=0.2,
    random_state=RANDOM_SEED,
    stratify=labels_train
)
len(train_indices), len(valid_indices)

(33600, 8400)

In [48]:
labels_train, labels_valid = labels_train[train_indices], labels_train[valid_indices]
pixels_train, pixels_valid = pixels_train.loc[train_indices], pixels_train.loc[valid_indices]

# Utils

In [49]:
def precision_macro(y_true, y_pred):
    return precision_score(y_true, y_pred, average='macro')

def recall_macro(y_true, y_pred):
    return recall_score(y_true, y_pred, average='macro')

def f1_macro(y_true, y_pred):
    return f1_score(y_true, y_pred, average='macro')


def precision_micro(y_true, y_pred):
    return precision_score(y_true, y_pred, average='micro')

def recall_micro(y_true, y_pred):
    return recall_score(y_true, y_pred, average='micro')

def f1_micro(y_true, y_pred):
    return f1_score(y_true, y_pred, average='micro')


def get_scores(y_true, y_pred):
    metrics = {
        'Accuracy': accuracy_score,

        '\nPrecision (macro)': precision_macro,
        'Recall (macro)': recall_macro,
        'F1 (macro)': f1_macro,

        '\nPrecision (micro)': precision_micro,
        'Recall (micro)': recall_micro,
        'F1 (micro)': f1_micro,
    }
    return {
        name: scorer(y_true, y_pred)
        for name, scorer in metrics.items()
    }

def score_model(model, X, y):
    y_pred = model.predict(X)
    for name, score in get_scores(y, y_pred).items():
        print(f'{name}: {score:.4f}')


f1_micro_scorer = make_scorer(f1_micro)

In [57]:
def make_submission(model, X):
    y = pd.Series(model.predict(X), name='ImageId')
    y.to_csv('csv/submission.csv', index_label='Label')

# Model

In [51]:
SVC_model = SVC().fit(pixels_train, labels_train)

In [52]:
score_model(SVC_model, pixels_train, labels_train)

Accuracy: 0.9892

Precision (macro): 0.9892
Recall (macro): 0.9892
F1 (macro): 0.9892

Precision (micro): 0.9892
Recall (micro): 0.9892
F1 (micro): 0.9892


In [53]:
score_model(SVC_model, pixels_valid, labels_valid)

Accuracy: 0.9756

Precision (macro): 0.9755
Recall (macro): 0.9754
F1 (macro): 0.9755

Precision (micro): 0.9756
Recall (micro): 0.9756
F1 (micro): 0.9756


In [58]:
make_submission(SVC_model, pixels_test)