In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Data Load

In [2]:
import pandas as pd
import numpy as np

emnist = pd.read_csv("emnist-digits-train.csv", header = None)

In [3]:
emnist = emnist.to_numpy()
emnist

array([[8, 0, 0, ..., 0, 0, 0],
       [9, 0, 0, ..., 0, 0, 0],
       [6, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [4]:
emnist.shape

(240000, 785)

In [5]:
X_train = emnist[:, 1:]
X_train

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [6]:
y_train = emnist[:, 0]
y_train

array([8, 9, 6, ..., 1, 1, 0], dtype=int64)

# Train Model

In [7]:
from sklearn.ensemble import ExtraTreesClassifier

ett_cf = ExtraTreesClassifier()

In [8]:
ett_cf.fit(X_train, y_train)

ExtraTreesClassifier()

In [9]:
from sklearn.model_selection import cross_val_score

cross_val_score(ett_cf, X_train, y_train, cv=10, scoring="accuracy")

array([0.98329167, 0.98295833, 0.9835    , 0.98275   , 0.98258333,
       0.98379167, 0.983875  , 0.98358333, 0.98204167, 0.982875  ])

In [10]:
np.mean([0.98329167, 0.98295833, 0.9835    , 0.98275   , 0.98258333,
       0.98379167, 0.983875  , 0.98358333, 0.98204167, 0.982875  ])

0.9831249999999999