In [1]:
import numpy as np
import pandas as pd

In [2]:
from label_encoder import LabelEncode
from kfold import KFold, StratifiedKFold
from metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from scaler import MinMaxScaler, RobustScaler
from utility import train_test_split
from svc import SVC

In [3]:
data = pd.read_csv('GLASS.csv', header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,1,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,2,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [4]:
print('Total no of examples ::', len(data))

Total no of examples :: 214


In [5]:
print('Is any data missing ::', data.isnull().values.any())

Is any data missing :: False


In [6]:
data[10].value_counts()

2    76
1    70
7    29
3    17
5    13
6     9
Name: 10, dtype: int64

### Only six classes

In [7]:
lb = LabelEncode()
data[10] = lb.fit_transform(data[10])

In [8]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,1,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,0
1,2,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,0
2,3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,0
3,4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,0
4,5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,0


In [9]:
data[10].value_counts()

1    76
0    70
5    29
2    17
3    13
4     9
Name: 10, dtype: int64

In [10]:
data.drop(columns=[0, 10]).describe()

Unnamed: 0,1,2,3,4,5,6,7,8,9
count,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0
mean,1.518365,13.40785,2.684533,1.444907,72.650935,0.497056,8.956963,0.175047,0.057009
std,0.003037,0.816604,1.442408,0.49927,0.774546,0.652192,1.423153,0.497219,0.097439
min,1.51115,10.73,0.0,0.29,69.81,0.0,5.43,0.0,0.0
25%,1.516523,12.9075,2.115,1.19,72.28,0.1225,8.24,0.0,0.0
50%,1.51768,13.3,3.48,1.36,72.79,0.555,8.6,0.0,0.0
75%,1.519157,13.825,3.6,1.63,73.0875,0.61,9.1725,0.0,0.1
max,1.53393,17.38,4.49,3.5,75.41,6.21,16.19,3.15,0.51


### Normalizing 
- column 2 (MinMaxScaler)
- column 5 (MinMaxScaler)
- column 7 (RobustScaler)

In [11]:
for col in [2, 5]:
    sc = MinMaxScaler()
    data[col] = sc.fit_transform(data[col])

In [12]:
col = 7
sc = RobustScaler()
data[col] = sc.fit_transform(data[col].values.reshape(-1, 1))

In [13]:
data.drop(columns=[0, 10]).describe()

Unnamed: 0,1,2,3,4,5,6,7,8,9
count,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0
mean,1.518365,0.402684,2.684533,1.444907,0.50731,0.497056,0.382802,0.175047,0.057009
std,0.003037,0.122798,1.442408,0.49927,0.138312,0.652192,1.52617,0.497219,0.097439
min,1.51115,0.0,0.0,0.29,0.0,0.0,-3.399464,0.0,0.0
25%,1.516523,0.327444,2.115,1.19,0.441071,0.1225,-0.386059,0.0,0.0
50%,1.51768,0.386466,3.48,1.36,0.532143,0.555,0.0,0.0,0.0
75%,1.519157,0.465414,3.6,1.63,0.585268,0.61,0.613941,0.0,0.1
max,1.53393,1.0,4.49,3.5,1.0,6.21,8.13941,3.15,0.51


In [14]:
X, y = data.drop(columns = [0, 10]).values, data[10].values

In [15]:
X, y, X_test, y_test = train_test_split(X, y, test_size=0.1, shuffle=True)

(214, 9) (214,)


In [16]:
def push_cv_scores(accuracies, f1_macro_scores, f1_weighted_scores, y_true, y_pred):
    _, _, f1_macro, _ = f1_score(y_true, y_pred, average='macro')
    f1_macro_scores.append(f1_macro)
    
    _, _, f1_weighted, _ = f1_score(y_true, y_pred, average='weighted')
    f1_weighted_scores.append(f1_weighted)
    
    accuracies.append(accuracy_score(y_true, y_pred))

In [33]:
def get_cross_val_score(accuracies, f1_macro_scores, f1_weighted_scores):
    accuracies = np.asarray(accuracies)
    f1_macro_scores = np.asarray(f1_macro_scores)
    f1_weighted_scores = np.asarray(f1_weighted_scores)
    
    cv_results = [accuracies, f1_macro_scores, f1_weighted_scores]
    
    for i, name in enumerate(['accuracy', 'macro avg f1-score', 'weighted avg f1-score']):
        msg = "%s: %f (+/- %f)" % (name, 100*cv_results[i].mean(), 100*cv_results[i].std())
        print(msg)

In [34]:
f1_macro_scores = []
f1_weighted_scores = []
accuracies = []

for i, (train_index, val_index) in enumerate(StratifiedKFold(n_splits=10).split(X, y)):
    X_train, X_val, y_train, y_val = X[train_index], X[val_index], y[train_index], y[val_index]
    model = SVC(C=0.5, kernel='poly', coef0=1.0, degree=5, gamma='scale')
    classifiers = model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    y_train_pred = model.predict(X_train)
    print('Train Accuracy :: ', accuracy_score(y_train, y_train_pred))
    print(classification_report(y_val, y_pred))
    
    push_cv_scores(accuracies, f1_macro_scores, f1_weighted_scores, y_val, y_pred)

get_cross_val_score(accuracies, f1_macro_scores, f1_weighted_scores)

Train Accuracy ::  0.7630057803468208
              precision    recall  f1-score   support

           0       0.57      0.67      0.62         6
           1       0.57      0.50      0.53         8
           2       0.00      0.00      0.00         2
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         1
           5       1.00      1.00      1.00         2

    accuracy                           0.60        20
   macro avg       0.69      0.69      0.69        20
weighted avg       0.60      0.60      0.60        20

Train Accuracy ::  0.7803468208092486
              precision    recall  f1-score   support

           0       0.50      0.43      0.46         7
           1       0.45      0.71      0.56         7
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00         1
           5       1.00      1.00      1.00         2

 

In [35]:
model = SVC(C=0.5, kernel='poly', degree=5, coef0=1.0)

In [36]:
classifiers = model.fit(X, y)

In [37]:
y_pred = model.predict(X_test)
y_train_pred = model.predict(X)
print('Train accuracy ::', accuracy_score(y, y_train_pred))
print(classification_report(y_test, y_pred))

Train accuracy :: 0.7668393782383419
              precision    recall  f1-score   support

           0       0.88      1.00      0.93         7
           1       0.50      0.20      0.29         5
           2       0.00      0.00      0.00         0
           3       0.50      1.00      0.67         2
           4       1.00      1.00      1.00         1
           5       1.00      0.83      0.91         6

    accuracy                           0.76        21
   macro avg       0.65      0.67      0.63        21
weighted avg       0.79      0.76      0.75        21

