In [1]:
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier, Pool
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split

# pd.set_option('display.max_columns', None)

In [2]:
# loading data
train_dr = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test_dr = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
train_dr

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
# defining a constant for random_state
RANDOM_STATE = 42

In [4]:
train_dr.info()
print('\n'f'Missing values: {train_dr.isna().sum().sum()}')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42000 entries, 0 to 41999
Columns: 785 entries, label to pixel783
dtypes: int64(785)
memory usage: 251.5 MB

Missing values: 0


# CatBoost baseline

In [16]:
X = train_dr.drop(['label'], axis=1)
y = train_dr.label

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE, shuffle=True, stratify=y)

In [17]:
# create a sparse matrix
Xtr = csr_matrix(X_train.astype('float32') / 255.0)
Xvl = csr_matrix(X_val.astype('float32') / 255.0)
train_pool = Pool(Xtr, label=y_train)
val_pool = Pool(Xvl, label=y_val)
test_dr_pool = Pool(test_dr)

In [11]:
clf = CatBoostClassifier(iterations=2000,
                         loss_function='MultiClass',
                         eval_metric='Accuracy',
                         task_type='GPU',
                         random_state=RANDOM_STATE)

clf.fit(
    train_pool,
    eval_set=val_pool,
    verbose=50,
    use_best_model=True,
    early_stopping_rounds=150
)

Learning rate set to 0.110775
0:	learn: 0.5924405	test: 0.5846429	best: 0.5846429 (0)	total: 28.4ms	remaining: 56.8s
50:	learn: 0.9405655	test: 0.9321429	best: 0.9321429 (49)	total: 1.16s	remaining: 44.4s
100:	learn: 0.9605952	test: 0.9497619	best: 0.9498810 (99)	total: 2.14s	remaining: 40.3s
150:	learn: 0.9666369	test: 0.9550000	best: 0.9550000 (150)	total: 3s	remaining: 36.8s
200:	learn: 0.9703274	test: 0.9583333	best: 0.9583333 (200)	total: 3.83s	remaining: 34.3s
250:	learn: 0.9722619	test: 0.9601190	best: 0.9602381 (247)	total: 4.64s	remaining: 32.4s
300:	learn: 0.9739286	test: 0.9607143	best: 0.9610714 (293)	total: 5.46s	remaining: 30.8s
350:	learn: 0.9751488	test: 0.9622619	best: 0.9622619 (350)	total: 6.29s	remaining: 29.6s
400:	learn: 0.9768452	test: 0.9628571	best: 0.9630952 (369)	total: 7.12s	remaining: 28.4s
450:	learn: 0.9783036	test: 0.9633333	best: 0.9636905 (448)	total: 7.94s	remaining: 27.3s
500:	learn: 0.9798810	test: 0.9633333	best: 0.9639286 (478)	total: 8.78s	remain

<catboost.core.CatBoostClassifier at 0x7f776a79d310>

**score 0.96328**

# MLP

## Default Model

In [5]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

In [6]:
# train data preparation
X = train_dr.drop('label', axis=1).values.astype('float32') / 255.0
y = train_dr.label.values.astype('int64')

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE, shuffle=True, stratify=y)

X_train_tz = torch.from_numpy(X_train)
X_val_tz = torch.from_numpy(X_val)
y_train_tz = torch.from_numpy(y_train)
y_val_tz = torch.from_numpy(y_val)

train_dataset = TensorDataset(X_train_tz, y_train_tz)
val_dataset = TensorDataset(X_val_tz, y_val_tz)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [7]:
# test data preparation
X_test = test_dr.values.astype('float32') / 255.0
X_test_tz = torch.from_numpy(X_test)
test_loader = DataLoader(TensorDataset(X_test_tz), batch_size=256, shuffle=False)

In [10]:
# create model
class MLP_Default(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(784, 256), nn.ReLU(),
            nn.Linear(256, 128), nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        return self.net(x)

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MLP_Default().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_func = nn.CrossEntropyLoss()

In [17]:
best_accuracy = 0.0
best_state = None

for epoch in range(30):
    # train
    model.train()
    for X_tr, y_tr in train_loader:
        X_tr, y_tr = X_tr.to(device), y_tr.to(device)
        optimizer.zero_grad()
        loss = loss_func(model(X_tr), y_tr)
        loss.backward()
        optimizer.step()

    # eval
    model.eval()
    correct = total_obj = 0
    with torch.no_grad():
        for X_vl, y_vl in val_loader:
            X_vl, y_vl = X_vl.to(device), y_vl.to(device)
            pred = model(X_vl).argmax(1)
            correct += (pred == y_vl).sum().item()
            total_obj += y_vl.size(0)

    val_accuracy = correct / total_obj
    print(f"epoch {epoch+1}: val_accuracy={val_accuracy:.4f}")

    # save best parameters
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        best_state = model.state_dict()
        print(f"New best! Saving model with accuracy={best_accuracy:.4f}")

# load the best weights
model.load_state_dict(best_state)

epoch 1: val_accuracy=0.9787
New best! Saving model with acc=0.9787
epoch 2: val_accuracy=0.9787
epoch 3: val_accuracy=0.9793
New best! Saving model with acc=0.9793
epoch 4: val_accuracy=0.9790
epoch 5: val_accuracy=0.9788
epoch 6: val_accuracy=0.9783
epoch 7: val_accuracy=0.9708
epoch 8: val_accuracy=0.9751
epoch 9: val_accuracy=0.9780
epoch 10: val_accuracy=0.9756
epoch 11: val_accuracy=0.9743
epoch 12: val_accuracy=0.9755
epoch 13: val_accuracy=0.9770
epoch 14: val_accuracy=0.9737
epoch 15: val_accuracy=0.9765
epoch 16: val_accuracy=0.9783
epoch 17: val_accuracy=0.9771
epoch 18: val_accuracy=0.9719
epoch 19: val_accuracy=0.9779
epoch 20: val_accuracy=0.9754
epoch 21: val_accuracy=0.9787
epoch 22: val_accuracy=0.9752
epoch 23: val_accuracy=0.9752
epoch 24: val_accuracy=0.9770
epoch 25: val_accuracy=0.9751
epoch 26: val_accuracy=0.9775
epoch 27: val_accuracy=0.9752
epoch 28: val_accuracy=0.9742
epoch 29: val_accuracy=0.9754
epoch 30: val_accuracy=0.9773


<All keys matched successfully>

**score 0.97703**

In [19]:
# submit
preds = []
model.eval()
with torch.no_grad():
    for (X_test, ) in test_loader:
        X_test = X_test.to(device)
        preds.append(model(X_test).argmax(1).cpu().numpy())
preds = np.concatenate(preds)

submission = pd.DataFrame({
    'ImageId': np.arange(1, len(preds)+1),
    'Label': preds
})
submission.to_csv('submission.csv', index=False)