In [250]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

In [2]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [3]:
DATA_HOME = '/content/drive/MyDrive/data/cba'

In [209]:
def create_df(name):
  name_path = os.path.join(DATA_HOME, name + '.names')
  data_path = os.path.join(DATA_HOME, name + '.data')
  df_name = pd.read_csv(name_path)
  df_data = pd.read_csv(data_path, names=df_name.columns)
  for col in df_data.columns:
    if df_data[col].dtype == 'O':
      encoder = LabelEncoder()
      encoder.fit(df_data[col])
      df_data[col] = encoder.transform(df_data[col])
  return df_data

In [241]:
australian = create_df('australian')
german = create_df('german')
iris = create_df('iris')
tic_tac_toe = create_df('tic-tac-toe')
zoo = create_df('zoo')
monks = create_df('monks')
seeds = create_df('seeds')
messidor_features = create_df('messidor_features')

In [212]:
australian['class'].unique()

array([0, 1])

In [214]:
german['class'].unique()

array([1, 2])

In [215]:
iris['class'].unique()

array([0, 1, 2])

In [242]:
tic_tac_toe['class'].unique()

array([1, 0])

In [217]:
zoo['class'].unique()

array([1, 4, 2, 7, 6, 5, 3])

In [218]:
monks['class'].unique()

array([1, 0])

In [219]:
seeds['class'].unique()

array([1, 2, 3])

In [220]:
messidor_features['class'].unique()

array([0, 1])

In [247]:
def svm_model(df, test_split, kernel, seed):
  X = df.drop('class', axis=1)
  y = df['class']
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=seed)
  clf = svm.SVC(kernel=kernel)
  clf.fit(X_train, y_train)
  y_pred = clf.predict(X_test)
  return accuracy_score(y_test, y_pred), f1_score(y_test, y_pred, average='weighted')

In [261]:
def multi_svm_model(df, test_split, kernel, seed):
  clfs = []
  y_pred = []
  y_pred_to_class = []
  X = df.drop('class', axis = 1)
  y = df[['class']]
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=seed)
  for i in df['class'].unique():
    y_train_new = [1 if t==i else 0 for t in y_train['class']]
    clf = svm.SVC(kernel=kernel)
    clf.fit(X_train, y_train_new)
    clfs.append(clf)
  for clf in clfs:
    y_pred.append(clf.predict(X_test))
  y_pred_enc = np.transpose(np.array(y_pred)) # y_pred_enc is now an one-hot encoded class
  encoder = OneHotEncoder()
  encoder.fit(y)
  y_test_enc = encoder.transform(y_test).toarray()
  return accuracy_score(y_test_enc, y_pred_enc), f1_score(y_test_enc, y_pred_enc, average='weighted')

In [262]:
australian = create_df('australian')
german = create_df('german')
iris = create_df('iris')
tic_tac_toc = create_df('tic-tac-toe')
zoo = create_df('zoo')
monks = create_df('monks')
seeds = create_df('seeds')
messidor_features = create_df('messidor_features')

In [263]:
multi_svm_model(iris, 0.3, 'linear', 42)

(0.7333333333333333, 0.8266666666666668)

In [264]:
multi_svm_model(iris, 0.3, 'poly', 42)

(0.9777777777777777, 0.9893004115226337)

In [265]:
multi_svm_model(iris, 0.3, 'rbf', 42)

(0.9777777777777777, 0.9893004115226337)

In [266]:
multi_svm_model(iris, 0.3, 'sigmoid', 42)

(0.0, 0.0)

In [267]:
multi_svm_model(zoo, 0.3, 'linear', 42)

(0.4838709677419355, 0.4838709677419355)

In [268]:
multi_svm_model(zoo, 0.3, 'poly', 42)

(0.45161290322580644, 0.46718576195773087)

In [269]:
multi_svm_model(zoo, 0.3, 'rbf', 42)

(0.4838709677419355, 0.4838709677419355)

In [270]:
multi_svm_model(zoo, 0.3, 'sigmoid', 42)

(0.4838709677419355, 0.37220843672456577)

In [271]:
multi_svm_model(seeds, 0.3, 'linear', 42)

(0.8888888888888888, 0.9298404095152063)

In [272]:
multi_svm_model(seeds, 0.3, 'poly', 42)

(0.7936507936507936, 0.8694253750317822)

In [273]:
multi_svm_model(seeds, 0.3, 'rbf', 42)

(0.7936507936507936, 0.8558097833460152)

In [274]:
multi_svm_model(seeds, 0.3, 'sigmoid', 42)

(0.0, 0.0)

In [275]:
svm_model(australian, 0.3, 'linear', 42)

(0.8695652173913043, 0.8687570512495865)

In [276]:
svm_model(australian, 0.3, 'poly', 42)

(0.6376811594202898, 0.5230595711318653)

In [277]:
svm_model(australian, 0.3, 'rbf', 42)

(0.6859903381642513, 0.6369313463286405)

In [278]:
svm_model(australian, 0.3, 'sigmoid', 42)

(0.6859903381642513, 0.676137275206744)

In [279]:
svm_model(german, 0.3, 'linear', 42)

(0.7633333333333333, 0.7511683992082548)

In [280]:
svm_model(german, 0.3, 'poly', 42)

(0.7, 0.5798356727148237)

In [281]:
svm_model(german, 0.3, 'rbf', 42)

(0.7133333333333334, 0.6093882275132274)

In [282]:
svm_model(german, 0.3, 'sigmoid', 42)

(0.58, 0.5812729933110368)

In [283]:
svm_model(tic_tac_toe, 0.3, 'linear', 42)

(0.6701388888888888, 0.5377829752829753)

In [284]:
svm_model(tic_tac_toe, 0.3, 'poly', 42)

(0.9479166666666666, 0.9473843838206872)

In [285]:
svm_model(tic_tac_toe, 0.3, 'rbf', 42)

(0.875, 0.8671264611471141)

In [286]:
svm_model(tic_tac_toe, 0.3, 'sigmoid', 42)

(0.6354166666666666, 0.5506710518965421)

In [287]:
svm_model(monks, 0.3, 'linear', 42)

(0.7153846153846154, 0.7152330360922585)

In [288]:
svm_model(monks, 0.3, 'poly', 42)

(0.9307692307692308, 0.9307321847544217)

In [289]:
svm_model(monks, 0.3, 'rbf', 42)

(0.9384615384615385, 0.9384615384615385)

In [290]:
svm_model(monks, 0.3, 'sigmoid', 42)

(0.46923076923076923, 0.29971808296415625)

In [291]:
svm_model(messidor_features, 0.3, 'linear', 42)

(0.7485549132947977, 0.7481242523196652)

In [292]:
svm_model(messidor_features, 0.3, 'poly', 42)

(0.6705202312138728, 0.6568107134373267)

In [293]:
svm_model(messidor_features, 0.3, 'rbf', 42)

(0.6589595375722543, 0.6538209330867456)

In [294]:
svm_model(messidor_features, 0.3, 'sigmoid', 42)

(0.41329479768786126, 0.41488158945477177)