# Random Foresrt and SVM Examples Using the Glass Dataset

In [4]:
import pandas as pd
from sklearn import ensemble
from sklearn import svm
from sklearn.model_selection import train_test_split
from error_metrics import *

# Read and inspect the data.
data = pd.read_csv('./data/glass.csv')
data.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [5]:
# Get features and response/target data.
features = list(data)
features.remove('Type')
data_x = data[features]
data_y = data['Type']

# Split into training and test sets.
x_train, x_test, y_train, y_test = train_test_split(data_x, data_y, test_size=0.3, random_state=4)

### 1. Use a Random Forest

In [6]:
# Build a sequence of Random Forest models for different n_est and depth values.
n_ests = [5, 10, 50, 100]
depths = [2, 4, 6, 8]
for n in n_ests:
    for dp in depths:
        mod = ensemble.RandomForestClassifier(n_estimators=n, max_depth=dp)
        mod.fit(x_train, y_train)
        y_hat = mod.predict(x_test)
        print('------ EVALUATING MODEL: n_estimators = ' + str(n) + ', max_depth = ' + str(dp) + ' -----')
        print_multiclass_classif_error_report(y_test, y_hat)

------ EVALUATING MODEL: n_estimators = 5, max_depth = 2 -----
Accuracy: 0.784615384615
Avg. F1 (Micro): 0.784615384615
Avg. F1 (Macro): 0.626662497394
Avg. F1 (Weighted): 0.770012026747
             precision    recall  f1-score   support

          1       0.80      0.76      0.78        21
          2       0.77      0.83      0.80        24
          3       0.00      0.00      0.00         0
          5       0.25      0.50      0.33         2
          6       1.00      1.00      1.00         3
          7       1.00      0.73      0.85        15

avg / total       0.83      0.78      0.80        65

Confusion Matrix: 
[[16  3  0  0  0  1]
 [ 3 20  0  1  0  2]
 [ 1  0  0  0  0  0]
 [ 1  1  0  1  0  1]
 [ 0  0  0  0  3  0]
 [ 0  0  0  0  0 11]]
------ EVALUATING MODEL: n_estimators = 5, max_depth = 4 -----
Accuracy: 0.753846153846
Avg. F1 (Micro): 0.753846153846
Avg. F1 (Macro): 0.570454545455
Avg. F1 (Weighted): 0.743916083916
             precision    recall  f1-score   support


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


------ EVALUATING MODEL: n_estimators = 50, max_depth = 8 -----
Accuracy: 0.876923076923
Avg. F1 (Micro): 0.876923076923
Avg. F1 (Macro): 0.730974962701
Avg. F1 (Weighted): 0.871525851924
             precision    recall  f1-score   support

          1       0.85      0.94      0.89        18
          2       0.88      0.85      0.87        27
          3       0.00      0.00      0.00         0
          5       0.75      0.60      0.67         5
          6       1.00      1.00      1.00         3
          7       1.00      0.92      0.96        12

avg / total       0.89      0.88      0.88        65

Confusion Matrix: 
[[17  3  0  0  0  0]
 [ 1 23  0  2  0  0]
 [ 0  1  0  0  0  0]
 [ 0  0  0  3  0  1]
 [ 0  0  0  0  3  0]
 [ 0  0  0  0  0 11]]
------ EVALUATING MODEL: n_estimators = 100, max_depth = 2 -----
Accuracy: 0.753846153846
Avg. F1 (Micro): 0.753846153846
Avg. F1 (Macro): 0.416203235592
Avg. F1 (Weighted): 0.705164501828
             precision    recall  f1-score   suppo

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


------ EVALUATING MODEL: n_estimators = 100, max_depth = 4 -----
Accuracy: 0.815384615385
Avg. F1 (Micro): 0.815384615385
Avg. F1 (Macro): 0.671813452248
Avg. F1 (Weighted): 0.809052396878
             precision    recall  f1-score   support

          1       0.80      0.80      0.80        20
          2       0.81      0.81      0.81        26
          3       0.00      0.00      0.00         0
          5       0.75      0.60      0.67         5
          6       0.67      1.00      0.80         2
          7       1.00      0.92      0.96        12

avg / total       0.83      0.82      0.82        65

Confusion Matrix: 
[[16  4  0  0  0  0]
 [ 3 21  0  2  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  3  0  1]
 [ 0  1  0  0  2  0]
 [ 0  0  0  0  0 11]]


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


------ EVALUATING MODEL: n_estimators = 100, max_depth = 6 -----
Accuracy: 0.846153846154
Avg. F1 (Micro): 0.846153846154
Avg. F1 (Macro): 0.692112597547
Avg. F1 (Weighted): 0.838795986622
             precision    recall  f1-score   support

          1       0.85      0.85      0.85        20
          2       0.85      0.85      0.85        26
          3       0.00      0.00      0.00         0
          5       0.50      0.50      0.50         4
          6       1.00      1.00      1.00         3
          7       1.00      0.92      0.96        12

avg / total       0.86      0.85      0.85        65

Confusion Matrix: 
[[17  3  0  0  0  0]
 [ 2 22  0  2  0  0]
 [ 1  0  0  0  0  0]
 [ 0  1  0  2  0  1]
 [ 0  0  0  0  3  0]
 [ 0  0  0  0  0 11]]
------ EVALUATING MODEL: n_estimators = 100, max_depth = 8 -----
Accuracy: 0.876923076923
Avg. F1 (Micro): 0.876923076923
Avg. F1 (Macro): 0.729933110368
Avg. F1 (Weighted): 0.871143126662
             precision    recall  f1-score   supp

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


### 2. Use a Support Vector Machine

In [9]:
# Make a sequence of SVM classifiers for different values of error term c. **Note: c=1.0 is default.
cs = [0.2, 0.5, 1.0, 2.0, 5.0, 6.0, 10.0]
for c in cs:
    # Create model and fit
    mod = svm.SVC(C=c)
    mod.fit(x_train, y_train)
    
    # Make predictions
    y_hat = mod.predict(x_test)
    print('--------- EVALUATING MODEL: C = ' + str(c) + ' ------------')
    print_multiclass_classif_error_report(y_test, y_hat)

--------- EVALUATING MODEL: C = 0.2 ------------
Accuracy: 0.661538461538
Avg. F1 (Micro): 0.661538461538
Avg. F1 (Macro): 0.376638655462
Avg. F1 (Weighted): 0.62138332256
             precision    recall  f1-score   support

          1       0.85      0.57      0.68        30
          2       0.62      0.64      0.63        25
          3       0.00      0.00      0.00         0
          5       0.00      0.00      0.00         0
          6       0.00      0.00      0.00         0
          7       0.91      1.00      0.95        10

avg / total       0.77      0.66      0.70        65

Confusion Matrix: 
[[17  3  0  0  0  0]
 [10 16  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  4  0  0  0  0]
 [ 2  1  0  0  0  0]
 [ 0  1  0  0  0 10]]
--------- EVALUATING MODEL: C = 0.5 ------------
Accuracy: 0.753846153846
Avg. F1 (Micro): 0.753846153846
Avg. F1 (Macro): 0.478082803909
Avg. F1 (Weighted): 0.723539797616
             precision    recall  f1-score   support

          1       0.80      

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
