# Random Forest and SVM Examples Using the Glass Dataset

In [4]:
# Import libraries.
import pandas as pd
from sklearn import ensemble
from sklearn import svm
from sklearn.model_selection import train_test_split
from error_metrics import *

# Read in data.
data = pd.read_csv('./data/glass.csv')
data.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [5]:
# Get features and response/target data.
features = list(data)
features.remove('Type')
data_x = data[features]
data_y = data['Type']

# Split into training and test sets.
x_train, x_test, y_train, y_test = train_test_split(data_x, data_y, test_size=0.3, random_state=4)

### 1. Use a Random Forest

In [6]:
# Build a sequence of Random Forest models for different n_est and depth values.
n_ests = [5, 10, 50, 100]
depths = [2, 4, 6, 8]
for n in n_ests:
    for dp in depths:
        mod = ensemble.RandomForestClassifier(n_estimators=n, max_depth=dp)
        mod.fit(x_train, y_train)
        y_hat = mod.predict(x_test)
        print('--------EVALUATING MODEL: n_estimators = '+str(n)+', max_depth = '+str(dp)+'--------')
        print_multiclass_classif_error_report(y_test, y_hat)

--------EVALUATING MODEL: n_estimators = 5, max_depth = 2--------
Accuracy: 0.7230769230769231
Avg. F1 (Micro): 0.723076923076923
Avg. F1 (Macro): 0.4827724686215252
Avg. F1 (Weighted): 0.7025100685478043
Confusion Matrix: 
[[16  4  0  0  0  0]
 [ 5 19  0  2  0  0]
 [ 1  0  0  0  0  0]
 [ 0  2  0  2  0  0]
 [ 2  1  0  0  0  0]
 [ 0  1  0  0  0 10]]
              precision    recall  f1-score   support

           1       0.80      0.67      0.73        24
           2       0.73      0.70      0.72        27
           3       0.00      0.00      0.00         0
           5       0.50      0.50      0.50         4
           6       0.00      0.00      0.00         0
           7       0.91      1.00      0.95        10

   micro avg       0.72      0.72      0.72        65
   macro avg       0.49      0.48      0.48        65
weighted avg       0.77      0.72      0.74        65

--------EVALUATING MODEL: n_estimators = 5, max_depth = 4--------
Accuracy: 0.7538461538461538
Avg. F1 (Mi

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


--------EVALUATING MODEL: n_estimators = 50, max_depth = 4--------
Accuracy: 0.7846153846153846
Avg. F1 (Micro): 0.7846153846153847
Avg. F1 (Macro): 0.6343733855398755
Avg. F1 (Weighted): 0.7774076188922424
Confusion Matrix: 
[[16  4  0  0  0  0]
 [ 4 20  0  2  0  0]
 [ 1  0  0  0  0  0]
 [ 0  1  0  2  0  1]
 [ 0  1  0  0  2  0]
 [ 0  0  0  0  0 11]]
              precision    recall  f1-score   support

           1       0.80      0.76      0.78        21
           2       0.77      0.77      0.77        26
           3       0.00      0.00      0.00         0
           5       0.50      0.50      0.50         4
           6       0.67      1.00      0.80         2
           7       1.00      0.92      0.96        12

   micro avg       0.78      0.78      0.78        65
   macro avg       0.62      0.66      0.63        65
weighted avg       0.80      0.78      0.79        65

--------EVALUATING MODEL: n_estimators = 50, max_depth = 6--------
Accuracy: 0.8307692307692308
Avg. F1 

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


--------EVALUATING MODEL: n_estimators = 100, max_depth = 4--------
Accuracy: 0.8153846153846154
Avg. F1 (Micro): 0.8153846153846154
Avg. F1 (Macro): 0.672118702553485
Avg. F1 (Weighted): 0.8089058767319636
Confusion Matrix: 
[[17  3  0  0  0  0]
 [ 4 20  0  2  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  3  0  1]
 [ 0  1  0  0  2  0]
 [ 0  0  0  0  0 11]]
              precision    recall  f1-score   support

           1       0.85      0.77      0.81        22
           2       0.77      0.83      0.80        24
           3       0.00      0.00      0.00         0
           5       0.75      0.60      0.67         5
           6       0.67      1.00      0.80         2
           7       1.00      0.92      0.96        12

   micro avg       0.82      0.82      0.82        65
   macro avg       0.67      0.69      0.67        65
weighted avg       0.83      0.82      0.82        65

--------EVALUATING MODEL: n_estimators = 100, max_depth = 6--------
Accuracy: 0.8461538461538461
Avg. F1

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


--------EVALUATING MODEL: n_estimators = 100, max_depth = 8--------
Accuracy: 0.8769230769230769
Avg. F1 (Micro): 0.8769230769230769
Avg. F1 (Macro): 0.729933110367893
Avg. F1 (Weighted): 0.8711431266615213
Confusion Matrix: 
[[17  3  0  0  0  0]
 [ 1 23  0  2  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  3  0  1]
 [ 0  0  0  0  3  0]
 [ 0  0  0  0  0 11]]
              precision    recall  f1-score   support

           1       0.85      0.89      0.87        19
           2       0.88      0.88      0.88        26
           3       0.00      0.00      0.00         0
           5       0.75      0.60      0.67         5
           6       1.00      1.00      1.00         3
           7       1.00      0.92      0.96        12

   micro avg       0.88      0.88      0.88        65
   macro avg       0.75      0.72      0.73        65
weighted avg       0.89      0.88      0.88        65



  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


### 2. Use a Support Vector Machine

In [10]:
# Make a sequence of SVM classifiers for different values of error term c. **Note: c=1.0 is default.
cs = [0.2, 0.5, 1.0, 2.0, 5.0, 6.0, 10.0]
for c in cs:
    # Create model and fit.
    mod = svm.SVC(C=c)
    mod.fit(x_train, y_train)
    
    # Make predictions.
    y_hat = mod.predict(x_test)
    print('----- EVALUATING MODEL: C = '+str(c)+'-----')
    print_multiclass_classif_error_report(y_test, y_hat)

----- EVALUATING MODEL: C = 0.2-----
Accuracy: 0.6615384615384615
Avg. F1 (Micro): 0.6615384615384615
Avg. F1 (Macro): 0.37663865546218495
Avg. F1 (Weighted): 0.6213833225597931
Confusion Matrix: 
[[17  3  0  0  0  0]
 [10 16  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  4  0  0  0  0]
 [ 2  1  0  0  0  0]
 [ 0  1  0  0  0 10]]
              precision    recall  f1-score   support

           1       0.85      0.57      0.68        30
           2       0.62      0.64      0.63        25
           3       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         0
           7       0.91      1.00      0.95        10

   micro avg       0.66      0.66      0.66        65
   macro avg       0.40      0.37      0.38        65
weighted avg       0.77      0.66      0.70        65

----- EVALUATING MODEL: C = 0.5-----
Accuracy: 0.7538461538461538
Avg. F1 (Micro): 0.7538461538461538
Avg. F1 (Macro): 0.478082803908

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
