In [1]:
import sys
import os

# Source: https://stackoverflow.com/questions/16780014/import-file-from-parent-directory
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__name__))))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder

import category_encoders as ce

from lusi.ecoc import SVMRandomInvariantsECOC
from lusi.types import InvariantTypes

In [2]:
df = pd.read_csv("../data/glass.csv", header=None)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,1
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,1
...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,7
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,7
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,7
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,7


In [3]:
labels = np.unique(df.iloc[:, -1].values)
label_mapping = {
    orig_label: new_label
    for orig_label, new_label in zip(labels, range(len(labels)))
}

In [4]:
df.iloc[:, -1] = df.iloc[:, -1].apply(lambda x: label_mapping[x])
np.unique(df.iloc[:, -1].values)

array([0, 1, 2, 3, 4, 5])

In [5]:
X, y = df.iloc[:, :-1].values, df.iloc[:, -1].values
X

array([[ 1.52101, 13.64   ,  4.49   , ...,  8.75   ,  0.     ,  0.     ],
       [ 1.51761, 13.89   ,  3.6    , ...,  7.83   ,  0.     ,  0.     ],
       [ 1.51618, 13.53   ,  3.55   , ...,  7.78   ,  0.     ,  0.     ],
       ...,
       [ 1.52065, 14.36   ,  0.     , ...,  8.44   ,  1.64   ,  0.     ],
       [ 1.51651, 14.38   ,  0.     , ...,  8.48   ,  1.57   ,  0.     ],
       [ 1.51711, 14.23   ,  0.     , ...,  8.62   ,  1.67   ,  0.     ]])

In [6]:
num_dimensions = X.shape[1]
num_classes = len(np.unique(y))

print("Num dimensions: ", num_dimensions)
print("Num classes: ", num_classes)

Num dimensions:  9
Num classes:  6


In [7]:
def run_single_experiment(X_train, X_test, y_train, y_test, clf, model_parameters):
    model = GridSearchCV(clf, model_parameters, cv=5, scoring='accuracy', n_jobs=4)
    model.fit(X_train, y_train)
    
    print('Best estimator: ', model.best_estimator_)
    
    y_hat = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_hat)
    
    print('Accuracy: ', accuracy)
    
    return accuracy


def run_multiple_experiments(X, y, train_size, clf, model_parameters, seeds):
    accuracies = []
    
    for seed in seeds:
        print(f"Running experiment with random_state={seed} and train_size={train_size}")

        X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, random_state=seed)
        model_parameters['random_state'] = [seed]
        
        acc = run_single_experiment(X_train, X_test, y_train, y_test, clf, model_parameters)
        accuracies.append(acc)
    
    return accuracies

In [8]:
accuracies = []
seeds = [42, 47, 1998, 451, 1981]

## Experiment 1: Using 80% of the data

In [9]:
train_size = 0.8

### Baseline model

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=47)

In [11]:
encoding = np.eye(num_classes)
encoding

array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]])

In [12]:
# Define hyperparameters that will be used in the Grid Search
baseline_parameters = {
    'kernel': ['rbf'],
    'C': [0.01, 0.1, 1.0, 10.0],
    'gamma': [0.01, 0.1, 1.0, 'auto'],
    'num_invariants': [0],
}

In [13]:
svm_baseline = SVMRandomInvariantsECOC(encoding)

In [14]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        svm_baseline,
        baseline_parameters,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.8
Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=0, random_state=42)
Accuracy:  0.7906976744186046
Running experiment with random_state=47 and train_size=0.8
Best estimator:  SVMRandomInvariantsECOC(C=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=0, random_state=47)
Accuracy:  0.5581395348837209
Running experiment with random_state=1998 and train_size=0.8
Best estimator:  SVMRandomInvariantsECOC(C=1.0,
           

### Random projections ECOC

In [15]:
ecoc_proj_params = {
    'kernel': ['rbf'],
    'C': [0.001, 0.01, 0.1, 1.0, 10.0],
    'gamma': [0.001, 0.01, 0.1, 1.0, 'auto'],
    'delta': [0.001, 0.01, 0.1, 1.0],
    'num_invariants': np.arange(1, num_dimensions + 1),
}

In [16]:
ecoc_projections = SVMRandomInvariantsECOC(
    encoding,
    invariant_type=InvariantTypes.PROJECTION,
    tolerance=20,
    random_state=47,
)

In [17]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        ecoc_projections,
        ecoc_proj_params,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.8
Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=4, random_state=42,
                        tolerance=20)
Accuracy:  0.7906976744186046
Running experiment with random_state=47 and train_size=0.8
Best estimator:  SVMRandomInvariantsECOC(C=1.0, delta=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, random_state=47, tolerance=20)
Accuracy:  0.5581395348837209
Running experiment with random_state=1998 and train_size=0.8
Best estimator

### Random hyperplanes ECOC

In [18]:
ecoc_hyper_params = {
    'kernel': ['rbf'],
    'C': [0.001, 0.01, 0.1, 1.0, 10.0],
    'gamma': [0.001, 0.01, 0.1, 1.0, 'auto'],
    'delta': [0.001, 0.01, 0.1, 1.0],
    'num_invariants': np.arange(1, num_dimensions + 1),
}

In [19]:
ecoc_hyperplanes = SVMRandomInvariantsECOC(
    encoding,
    invariant_type=InvariantTypes.HYPERPLANE,
    tolerance=20,
    random_state=47,
)

In [20]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        ecoc_hyperplanes,
        ecoc_hyper_params,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.8
Best estimator:  SVMRandomInvariantsECOC(C=0.1, delta=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=1, random_state=42, tolerance=20)
Accuracy:  0.7906976744186046
Running experiment with random_state=47 and train_size=0.8
Best estimator:  SVMRandomInvariantsECOC(C=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0,
                        invariant_type=<InvariantTypes

## Experiment 2: Using 50% of the data

In [21]:
train_size = 0.5

In [22]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        svm_baseline,
        baseline_parameters,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=0, random_state=42)
Accuracy:  0.6542056074766355
Running experiment with random_state=47 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=0, random_state=47)
Accuracy:  0.6728971962616822
Running experiment with random_state=1998 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=0, random_state=1998)
Accuracy:  0.6542056074766355
Running experiment with random_state=451 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=0, random_state=451)
Accuracy:  0.7009345794392523
Running experiment with random_state=1981 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=0.01,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.01, num_invariants=0, random_state=1981)
Accuracy:  0.5794392523364486


In [23]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        ecoc_projections,
        ecoc_proj_params,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        num_invariants=1, random_state=42, tolerance=20)
Accuracy:  0.6728971962616822
Running experiment with random_state=47 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=0.1, delta=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=4, random_state=47,
                        tolerance=20)
Accuracy:  0.6822429906542056
Running experiment with random_state=1998 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1, num_invariants=3, random_state=1998,
                        tolerance=20)
Accuracy:  0.6635514018691588
Running experiment with random_state=451 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=9, random_state=451,
                        tolerance=20)
Accuracy:  0.6822429906542056
Running experiment with random_state=1981 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=0.001, delta=0.01,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.01, num_invariants=4, random_state=1981,
                        tolerance=20)
Accuracy:  0.6074766355140186


In [24]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        ecoc_hyperplanes,
        ecoc_hyper_params,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=10.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=9, random_state=42, tolerance=20)
Accuracy:  0.6261682242990654
Running experiment with random_state=47 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=0.1, delta=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=1, random_state=47, tolerance=20)
Accuracy:  0.6728971962616822
Running experiment with random_state=1998 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=7, random_state=1998, tolerance=20)
Accuracy:  0.5514018691588785
Running experiment with random_state=451 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=1.0, delta=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=4, random_state=451, tolerance=20)
Accuracy:  0.6542056074766355
Running experiment with random_state=1981 and train_size=0.5




Best estimator:  SVMRandomInvariantsECOC(C=0.1, delta=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=1, random_state=1981, tolerance=20)
Accuracy:  0.6635514018691588


## Experiment 3: Using 30% of the data

In [25]:
train_size = 0.3

In [26]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        svm_baseline,
        baseline_parameters,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.3
Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        num_invariants=0, random_state=42)
Accuracy: 



 0.6533333333333333
Running experiment with random_state=47 and train_size=0.3
Best estimator:  SVMRandomInvariantsECOC(C=0.01,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1, num_invariants=0, random_state=47)
Accuracy:  0.5866666666666667
Running experiment with random_state=1998 and train_size=0.3




Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1, num_invariants=0, random_state=1998)
Accuracy:  0.6933333333333334
Running experiment with random_state=451 and train_size=0.3




Best estimator:  SVMRandomInvariantsECOC(C=0.01,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        num_invariants=0, random_state=451)
Accuracy:  0.6066666666666667
Running experiment with random_state=1981 and train_size=0.3




Best estimator:  SVMRandomInvariantsECOC(C=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=0, random_state=1981)
Accuracy:  0.6266666666666667


In [27]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        ecoc_projections,
        ecoc_proj_params,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.3




Best estimator:  SVMRandomInvariantsECOC(C=0.001,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.01, num_invariants=1, random_state=42,
                        tolerance=20)
Accuracy:  0.6533333333333333
Running experiment with random_state=47 and train_size=0.3




Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1, num_invariants=9, random_state=47,
                        tolerance=20)
Accuracy:  0.5866666666666667
Running experiment with random_state=1998 and train_size=0.3




Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1, num_invariants=9, random_state=1998,
                        tolerance=20)
Accuracy:  0.68
Running experiment with random_state=451 and train_size=0.3




Best estimator:  SVMRandomInvariantsECOC(C=0.001,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        num_invariants=7, random_state=451, tolerance=20)
Accuracy:  0.62
Running experiment with random_state=1981 and train_size=0.3




Best estimator:  SVMRandomInvariantsECOC(C=10.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1, num_invariants=4, random_state=1981,
                        tolerance=20)
Accuracy:  0.5533333333333333


In [28]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        ecoc_hyperplanes,
        ecoc_hyper_params,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.3




Best estimator:  SVMRandomInvariantsECOC(C=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=7, random_state=42, tolerance=20)
Accuracy:  0.6466666666666666
Running experiment with random_state=47 and train_size=0.3




Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=1, random_state=47, tolerance=20)
Accuracy:  0.5266666666666666
Running experiment with random_state=1998 and train_size=0.3




Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=9, random_state=1998, tolerance=20)
Accuracy:  0.58
Running experiment with random_state=451 and train_size=0.3




Best estimator:  SVMRandomInvariantsECOC(C=0.001, delta=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=1, random_state=451, tolerance=20)
Accuracy:  0.6066666666666667
Running experiment with random_state=1981 and train_size=0.3




Best estimator:  SVMRandomInvariantsECOC(C=0.01, delta=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=4, random_state=1981, tolerance=20)
Accuracy:  0.6533333333333333


## Experiment 4: Using 20% of the data

In [29]:
train_size = 0.2

In [30]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        svm_baseline,
        baseline_parameters,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=0.01,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=0, random_state=42)
Accuracy:  0.6046511627906976
Running experiment with random_state=47 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=0.01,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        num_invariants=0, random_state=47)
Accuracy:  0.5988372093023255
Running experiment with random_state=1998 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1, num_invariants=0, random_state=1998)
Accuracy:  0.6627906976744186
Running experiment with random_state=451 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=0.01,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1, num_invariants=0, random_state=451)
Accuracy:  0.5523255813953488
Running experiment with random_state=1981 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=0, random_state=1981)
Accuracy:  0.5988372093023255


In [31]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        ecoc_projections,
        ecoc_proj_params,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=0.001, delta=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=1, random_state=42,
                        tolerance=20)
Accuracy:  0.5930232558139535
Running experiment with random_state=47 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=0.1, delta=0.01,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        num_invariants=6, random_state=47, tolerance=20)
Accuracy:  0.6104651162790697
Running experiment with random_state=1998 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=10.0, delta=0.01,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1, num_invariants=7, random_state=1998,
                        tolerance=20)
Accuracy:  0.627906976744186
Running experiment with random_state=451 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=0.001,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1, random_state=451, tolerance=20)
Accuracy:  0.5406976744186046
Running experiment with random_state=1981 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.01, num_invariants=2, random_state=1981,
                        tolerance=20)
Accuracy:  0.45930232558139533


In [32]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        ecoc_hyperplanes,
        ecoc_hyper_params,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=0.001, delta=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=1, random_state=42, tolerance=20)
Accuracy:  0.5930232558139535
Running experiment with random_state=47 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=0.1, delta=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=2, random_state=47, tolerance=20)
Accuracy:  0.5755813953488372
Running experiment with random_state=1998 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=10.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        random_state=1998, tolerance=20)
Accuracy:  0.5116279069767442
Running experiment with random_state=451 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=0.001, delta=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=1, random_state=451, tolerance=20)
Accuracy:  0.5697674418604651
Running experiment with random_state=1981 and train_size=0.2




Best estimator:  SVMRandomInvariantsECOC(C=10.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.01,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        random_state=1981, tolerance=20)
Accuracy:  0.47674418604651164


## Experiment 5: Using 10% of the data

In [33]:
train_size = 0.1

In [34]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        svm_baseline,
        baseline_parameters,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.1
Best estimator:  SVMRandomInvariantsECOC(C=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.01, num_invariants=0, random_state=42)
Accuracy:  0.43523316062176165
Running experiment with random_state=47 and train_size=0.1




Best estimator:  SVMRandomInvariantsECOC(C=0.01,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1, num_invariants=0, random_state=47)
Accuracy:  0.6010362694300518
Running experiment with random_state=1998 and train_size=0.1
Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.01, num_invariants=0, random_state=1998)
Accuracy:  0.44559585492227977
Running experiment with random_state=451 and train_size=0.1




Best estimator:  SVMRandomInvariantsECOC(C=0.01,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=0, random_state=451)
Accuracy:  0.44559585492227977
Running experiment with random_state=1981 and train_size=0.1
Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.01, num_invariants=0, random_state=1981)
Accuracy:  0.42487046632124353




In [35]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        ecoc_projections,
        ecoc_proj_params,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.1




Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.001, num_invariants=2, random_state=42,
                        tolerance=20)
Accuracy:  0.44559585492227977
Running experiment with random_state=47 and train_size=0.1




Best estimator:  SVMRandomInvariantsECOC(C=0.01,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1, num_invariants=9, random_state=47,
                        tolerance=20)
Accuracy:  0.5906735751295337
Running experiment with random_state=1998 and train_size=0.1




Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.001, num_invariants=4, random_state=1998,
                        tolerance=20)
Accuracy:  0.44559585492227977
Running experiment with random_state=451 and train_size=0.1




Best estimator:  SVMRandomInvariantsECOC(C=0.01, delta=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0, num_invariants=1, random_state=451,
                        tolerance=20)
Accuracy:  0.44559585492227977
Running experiment with random_state=1981 and train_size=0.1




Best estimator:  SVMRandomInvariantsECOC(C=0.1, delta=0.01,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        random_state=1981, tolerance=20)
Accuracy:  0.5595854922279793


In [36]:
accuracies.append(
    run_multiple_experiments(
        X,
        y,
        train_size,
        ecoc_hyperplanes,
        ecoc_hyper_params,
        seeds,
    )
)

Running experiment with random_state=42 and train_size=0.1




Best estimator:  SVMRandomInvariantsECOC(C=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=7, random_state=42, tolerance=20)
Accuracy:  0.36787564766839376
Running experiment with random_state=47 and train_size=0.1




Best estimator:  SVMRandomInvariantsECOC(C=10.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.1,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=8, random_state=47, tolerance=20)
Accuracy:  0.6062176165803109
Running experiment with random_state=1998 and train_size=0.1




Best estimator:  SVMRandomInvariantsECOC(C=0.1, delta=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.01,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=1, random_state=1998, tolerance=20)
Accuracy:  0.43005181347150256
Running experiment with random_state=451 and train_size=0.1




Best estimator:  SVMRandomInvariantsECOC(C=0.01, delta=0.1,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=1.0,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        num_invariants=1, random_state=451, tolerance=20)
Accuracy:  0.44559585492227977
Running experiment with random_state=1981 and train_size=0.1




Best estimator:  SVMRandomInvariantsECOC(C=1.0,
                        encoding=array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]]),
                        gamma=0.01,
                        invariant_type=<InvariantTypes.HYPERPLANE: 'HYPERPLANE'>,
                        random_state=1981, tolerance=20)
Accuracy:  0.37305699481865284


In [37]:
train_sizes = [*['80%'] * 3, *['50%'] * 3, *['30%'] * 3, *['20%'] * 3, *['10%'] * 3]
problem = ['Glass'] * 15
models = ['Baseline', 'Random Projections', 'Random Hyperplanes'] * 5
accuracies = np.array(accuracies)

out_df = pd.DataFrame({
    'problem': problem,
    'size': train_sizes,
    'model': models,
    'accuracy_1': accuracies[:, 0],
    'accuracy_2': accuracies[:, 1],
    'accuracy_3': accuracies[:, 2],
    'accuracy_4': accuracies[:, 3],
    'accuracy_5': accuracies[:, 4],
})

In [38]:
out_df.to_csv('glass_multiple_fits.csv', index=False)