### Sample program for classification prediction by Support Vector Machine  

#### Import libraries  

In [64]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import datasets

#### Load data and set parameters  

In [65]:
df = pd.read_csv('ai-end1-2.csv', delimiter=',', skiprows=0, header=0)
print(df.shape)
print(df.info())
display(df.head())

(100, 14)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   target                        100 non-null    int64  
 1   alcohol                       100 non-null    float64
 2   malic_acid                    100 non-null    float64
 3   ash                           100 non-null    float64
 4   alcalinity_of_ash             100 non-null    float64
 5   magnesium                     100 non-null    float64
 6   total_phenols                 100 non-null    float64
 7   flavanoids                    100 non-null    float64
 8   nonflavanoid_phenols          100 non-null    float64
 9   proanthocyanins               100 non-null    float64
 10  color_intensity               100 non-null    float64
 11  hue                           100 non-null    float64
 12  od280/od315_of_diluted_wines  100 non-null    float64
 

Unnamed: 0,target,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,0,13.64,3.1,2.56,15.2,116.0,2.7,3.03,0.17,1.66,5.1,0.96,3.36,845.0
1,1,11.87,4.31,2.39,21.0,82.0,2.86,3.03,0.21,2.91,2.8,0.75,3.64,380.0
2,0,14.2,1.76,2.45,15.2,112.0,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450.0
3,0,13.05,1.73,2.04,12.4,92.0,2.72,3.27,0.17,2.91,7.2,1.12,2.91,1150.0
4,1,11.81,2.12,2.74,21.5,134.0,1.6,0.99,0.14,1.56,2.5,0.95,2.26,625.0


In [66]:
X = df.loc[:, 'alcohol':]
y = df['target']
print(X.shape)
print(y.shape)
y.value_counts()

(100, 13)
(100,)


1    40
0    36
2    24
Name: target, dtype: int64

In [67]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, random_state=8, stratify=y)

#### Check number of labels  

In [68]:
# print(np.bincount(y))
# print(np.bincount(y_train))
# print(np.bincount(y_test))

#### Scaling to [0, 1] (NOTE: use X_train. X_test should not be included)  

In [69]:
sc = MinMaxScaler(feature_range=(0, 1), copy=True)
sc.fit(X_train)
X_train_std = sc.transform(X_train)

#### Train Support Vector Classifier  

In [70]:
svc = SVC(kernel='rbf', C=0.3, gamma=4)
svc.fit(X_train_std, y_train)

SVC(C=0.3, gamma=4)

#### Prediction using train_data  

In [71]:
y_train_pred = svc.predict(X_train_std)

#### Report accuracy and confusion matrix for train_data  

In [72]:
print('accuracy(Q3) for train data:', accuracy_score(y_train, y_train_pred))

ct_pred = pd.crosstab(y_train, y_train_pred)
display(ct_pred)

accuracy(Q3) for train data: 0.9666666666666667


col_0,0,1,2
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,22,0,0
1,0,24,0
2,0,2,12


#### Scaling of X_test (using mean and std of X_train)  

In [73]:
X_test_std = sc.transform(X_test)

#### Prediction using test_data  

In [74]:
y_test_pred = svc.predict(X_test_std)

#### Report accuracy and confusion matrix for test_data  

In [75]:
print('accuracy(Q3) for test data:', accuracy_score(y_test, y_test_pred))

ct_test = pd.crosstab(y_test, y_test_pred)
display(ct_test)

accuracy(Q3) for test data: 0.85


col_0,0,1,2
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,13,1,0
1,0,16,0
2,0,5,5
