### Sample program for classification prediction by Support Vector Machine  

#### Import libraries  

In [1]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import datasets

#### Load data and set parameters  

In [2]:
df = pd.read_csv('ai-end2-4.csv', delimiter=',', skiprows=0, header=0)
print(df.shape)
print(df.info())
display(df.head())

(1000, 65)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 65 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   pixel_0_0  1000 non-null   float64
 1   pixel_0_1  1000 non-null   float64
 2   pixel_0_2  1000 non-null   float64
 3   pixel_0_3  1000 non-null   float64
 4   pixel_0_4  1000 non-null   float64
 5   pixel_0_5  1000 non-null   float64
 6   pixel_0_6  1000 non-null   float64
 7   pixel_0_7  1000 non-null   float64
 8   pixel_1_0  1000 non-null   float64
 9   pixel_1_1  1000 non-null   float64
 10  pixel_1_2  1000 non-null   float64
 11  pixel_1_3  1000 non-null   float64
 12  pixel_1_4  1000 non-null   float64
 13  pixel_1_5  1000 non-null   float64
 14  pixel_1_6  1000 non-null   float64
 15  pixel_1_7  1000 non-null   float64
 16  pixel_2_0  1000 non-null   float64
 17  pixel_2_1  1000 non-null   float64
 18  pixel_2_2  1000 non-null   float64
 19  pixel_2_3  1000 non-null   float64
 20

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,label
0,0.0,0.0,0.0,2.0,15.0,8.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,14.0,1.0,0.0,0.0,4
1,0.0,0.0,1.0,10.0,13.0,12.0,5.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,9
2,0.0,0.0,12.0,16.0,16.0,13.0,1.0,0.0,0.0,4.0,...,0.0,0.0,0.0,12.0,16.0,16.0,9.0,0.0,0.0,3
3,0.0,0.0,9.0,16.0,6.0,0.0,0.0,0.0,0.0,3.0,...,0.0,0.0,0.0,12.0,14.0,3.0,0.0,0.0,0.0,8
4,0.0,1.0,9.0,15.0,13.0,4.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,16.0,14.0,8.0,0.0,0.0,9


In [3]:
X = df.loc[:, 'pixel_0_0':'pixel_7_7']
y = df['label']
print(X.shape)
print(y.shape)
print(y.value_counts())

(1000, 64)
(1000,)
9    109
1    108
4    107
3    103
6    101
2     99
8     98
5     94
7     92
0     89
Name: label, dtype: int64


In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=20, stratify=y)

#### Check number of labels  

In [5]:
# print(np.bincount(y))
# print(np.bincount(y_train))
# print(np.bincount(y_test))

#### Scaling to [0, 1] (NOTE: use X_train. X_test should not be included)  

In [6]:
sc = MinMaxScaler(feature_range=(0, 1), copy=True)
sc.fit(X_train)
X_train_std = sc.transform(X_train)

#### Train Support Vector Classifier  

In [7]:
svc = SVC(kernel='rbf', C=0.1, gamma='scale')
svc.fit(X_train_std, y_train)

SVC(C=0.1)

#### Prediction using train_data  

In [8]:
y_train_pred = svc.predict(X_train_std)

#### Report accuracy and confusion matrix for train_data  

In [9]:
print('accuracy(Q10) for train data:', accuracy_score(y_train, y_train_pred))

ct_pred = pd.crosstab(y_train, y_train_pred)
display(ct_pred)

accuracy(Q10) for train data: 0.94875


col_0,0,1,2,3,4,5,6,7,8,9
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,70,0,0,0,1,0,0,0,0,0
1,0,83,2,0,0,0,0,0,0,2
2,0,2,75,0,0,0,0,0,1,1
3,0,1,0,71,0,0,0,3,5,2
4,0,0,0,0,84,0,0,1,1,0
5,0,0,0,0,0,72,0,0,0,3
6,0,1,0,0,0,0,79,0,1,0
7,0,0,0,0,0,0,0,74,0,0
8,0,1,0,0,0,1,1,0,70,5
9,0,1,0,0,0,2,0,2,1,81


#### Scaling of X_test (using mean and std of X_train)  

In [10]:
X_test_std = sc.transform(X_test)

#### Prediction using test_data  

In [11]:
y_test_pred = svc.predict(X_test_std)

#### Report accuracy and confusion matrix for test_data  

In [12]:
print('accuracy(Q10) for test data:', accuracy_score(y_test, y_test_pred))

ct_test = pd.crosstab(y_test, y_test_pred)
display(ct_test)

accuracy(Q10) for test data: 0.945


col_0,0,1,2,3,4,5,6,7,8,9
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,18,0,0,0,0,0,0,0,0,0
1,0,19,0,0,0,0,0,0,0,2
2,0,1,19,0,0,0,0,0,0,0
3,0,0,0,20,0,0,0,0,0,1
4,0,2,0,0,19,0,0,0,0,0
5,0,0,0,0,0,17,0,0,0,2
6,0,1,0,0,0,0,19,0,0,0
7,0,0,0,0,0,0,0,18,0,0
8,0,2,0,0,0,0,0,0,18,0
9,0,0,0,0,0,0,0,0,0,22
