In [1]:
import numpy as np
import pandas as pd

In [2]:
from label_encoder import LabelEncode
from kfold import KFold, StratifiedKFold
from metrics import accuracy_score, classification_report, confusion_matrix, f1_score

In [3]:
data = pd.read_csv('GLASS.csv', header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,1,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,2,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [4]:
print('Total no of examples ::', len(data))

Total no of examples :: 214


In [5]:
print('Is any data missing ::', data.isnull().values.any())

Is any data missing :: False


In [6]:
data[10].value_counts()

2    76
1    70
7    29
3    17
5    13
6     9
Name: 10, dtype: int64

### Only six classes

In [7]:
lb = LabelEncode()
data[10] = lb.fit_transform(data[10])

In [8]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,1,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,0
1,2,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,0
2,3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,0
3,4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,0
4,5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,0


In [9]:
data[10].value_counts()

1    76
0    70
5    29
2    17
3    13
4     9
Name: 10, dtype: int64

In [10]:
from sklearn.svm import SVC


numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88



In [55]:
X, y = data.drop(columns = [0, 10]).values, data[10].values

In [57]:
def push_cv_scores(accuracies, f1_macro_scores, f1_weighted_scores, y_true, y_pred):
    _, _, f1_macro, _ = f1_score(y_true, y_pred, average='macro')
    f1_macro_scores.append(f1_macro)
    
    _, _, f1_weighted, _ = f1_score(y_true, y_pred, average='weighted')
    f1_weighted_scores.append(f1_weighted)
    
    accuracies.append(accuracy_score(y_true, y_pred))

In [68]:
f1_macro_scores = []
f1_weighted_scores = []
accuracies = []

for i, (train_index, test_index) in enumerate(StratifiedKFold(n_splits=10).split(X, y)):
    X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]
#     print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
    model = SVC(C=0.1, kernel='poly', degree=4, gamma='auto')
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_train_pred = model.predict(X_train)
#     print(y_pred.shape, y_train_pred.shape)
    print('Train Accuracy :: ', accuracy_score(y_train, y_train_pred))
#     print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    
    push_cv_scores(accuracies, f1_macro_scores, f1_weighted_scores, y_test, y_pred)
    
    print(accuracies[-1], f1_macro_scores[-1], f1_weighted_scores[-1])

SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=4, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

Train Accuracy ::  0.7916666666666666
              precision    recall  f1-score   support

           0       0.62      0.71      0.67         7
           1       0.55      0.75      0.63         8
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         1
           4       1.00      1.00      1.00         1
           5       1.00      0.67      0.80         3

    accuracy                           0.64        22
   macro avg       0.53      0.52      0.52        22
weighted avg       0.58      0.64      0.60        22

0.6363636363636364 0.5163742690058479 0.5963317384370016


SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=4, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

Train Accuracy ::  0.8020833333333334
              precision    recall  f1-score   support

           0       0.50      0.57      0.53         7
           1       0.75      0.75      0.75         8
           2       0.00      0.00      0.00         2
           3       0.50      1.00      0.67         1
           4       1.00      1.00      1.00         1
           5       1.00      0.67      0.80         3

    accuracy                           0.64        22
   macro avg       0.62      0.66      0.62        22
weighted avg       0.64      0.64      0.63        22

0.6363636363636364 0.6249999999999999 0.6272727272727272


SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=4, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

Train Accuracy ::  0.8229166666666666
              precision    recall  f1-score   support

           0       0.67      0.86      0.75         7
           1       0.83      0.62      0.71         8
           2       1.00      0.50      0.67         2
           3       0.50      1.00      0.67         1
           4       1.00      1.00      1.00         1
           5       1.00      1.00      1.00         3

    accuracy                           0.77        22
   macro avg       0.83      0.83      0.80        22
weighted avg       0.81      0.77      0.77        22

0.7727272727272727 0.7996031746031745 0.7711038961038961


SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=4, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

Train Accuracy ::  0.78125
              precision    recall  f1-score   support

           0       0.71      0.71      0.71         7
           1       0.75      0.75      0.75         8
           2       0.00      0.00      0.00         1
           3       1.00      1.00      1.00         2
           4       1.00      1.00      1.00         1
           5       1.00      1.00      1.00         3

    accuracy                           0.77        22
   macro avg       0.74      0.74      0.74        22
weighted avg       0.77      0.77      0.77        22

0.7727272727272727 0.744047619047619 0.7727272727272727


SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=4, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

Train Accuracy ::  0.7927461139896373
              precision    recall  f1-score   support

           0       0.78      1.00      0.88         7
           1       0.88      0.88      0.88         8
           2       0.00      0.00      0.00         1
           3       0.00      0.00      0.00         2
           4       1.00      1.00      1.00         1
           5       0.67      1.00      0.80         2

    accuracy                           0.81        21
   macro avg       0.55      0.65      0.59        21
weighted avg       0.70      0.81      0.75        21

0.8095238095238095 0.5916666666666666 0.7488095238095238


SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=4, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

Train Accuracy ::  0.8186528497409327
              precision    recall  f1-score   support

           0       0.83      0.71      0.77         7
           1       0.70      0.88      0.78         8
           2       0.00      0.00      0.00         1
           3       1.00      0.50      0.67         2
           5       0.75      1.00      0.86         3

    accuracy                           0.76        21
   macro avg       0.66      0.62      0.61        21
weighted avg       0.75      0.76      0.74        21

0.7619047619047619 0.6141636141636142 0.7386475957904529


SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=4, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

Train Accuracy ::  0.8134715025906736
              precision    recall  f1-score   support

           0       0.50      0.43      0.46         7
           1       0.62      0.71      0.67         7
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         1
           4       0.50      1.00      0.67         1
           5       0.67      0.67      0.67         3

    accuracy                           0.52        21
   macro avg       0.38      0.47      0.41        21
weighted avg       0.49      0.52      0.50        21

0.5238095238095238 0.41025641025641024 0.503052503052503


SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=4, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

Train Accuracy ::  0.8134715025906736
              precision    recall  f1-score   support

           0       0.50      0.57      0.53         7
           1       0.75      0.43      0.55         7
           2       0.33      0.50      0.40         2
           3       0.50      1.00      0.67         1
           4       1.00      1.00      1.00         1
           5       1.00      1.00      1.00         3

    accuracy                           0.62        21
   macro avg       0.68      0.75      0.69        21
weighted avg       0.66      0.62      0.62        21

0.6190476190476191 0.6909090909090908 0.6199134199134199


SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=4, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

Train Accuracy ::  0.8031088082901554
              precision    recall  f1-score   support

           0       0.56      0.71      0.63         7
           1       0.86      0.86      0.86         7
           2       0.00      0.00      0.00         2
           3       0.50      1.00      0.67         1
           4       0.00      0.00      0.00         1
           5       1.00      0.67      0.80         3

    accuracy                           0.67        21
   macro avg       0.49      0.54      0.49        21
weighted avg       0.64      0.67      0.64        21

0.6666666666666666 0.491468253968254 0.6400793650793651


SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=4, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

Train Accuracy ::  0.8031088082901554
              precision    recall  f1-score   support

           0       0.62      0.71      0.67         7
           1       0.71      0.71      0.71         7
           2       0.00      0.00      0.00         2
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         1
           5       0.67      0.67      0.67         3

    accuracy                           0.67        21
   macro avg       0.67      0.68      0.67        21
weighted avg       0.64      0.67      0.65        21

0.6666666666666666 0.6746031746031746 0.6507936507936508


In [69]:
accuracies = np.asarray(accuracies)
f1_macro_scores = np.asarray(f1_macro_scores)
f1_weighted_scores = np.asarray(f1_weighted_scores)

In [70]:
accuracies.mean(), accuracies.std()

(0.6865800865800866, 0.08516043899125629)

In [71]:
f1_macro_scores.mean(), f1_macro_scores.std()

(0.6158092273223852, 0.11295663269186255)

In [72]:
f1_weighted_scores.mean(), f1_weighted_scores.std()

(0.6668731692979813, 0.08396275733918727)