# Multi-class and Multi-Label Classification Using Support Vector Machine

## import package

In [95]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from sklearn import svm
from sklearn.metrics import hamming_loss
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
from imblearn.over_sampling import SMOTE

## Split traning and test set

In [None]:
df=pd.read_csv('Frogs_MFCCs.csv')
df_train=df.sample(frac=0.7, replace=False, random_state=1)
df_test=df.drop(df_train.index)
x_train=df_train.iloc[:,:-4]
x_test=df_test.iloc[:,:-4]

## train a classifier for each label for multi-calss classification

### research exact match and hamming score/loss for evaluating multi-label classification

#### Family

In [96]:
# Family
y_train_Family = df_train[["Family"]]
y_test_Family=df_test[["Family"]]

svc = svm.SVC(gamma='auto')
model_Family = svc.fit(x_train, y_train_Family)
y_predict_Family = model_Family.predict(x_test)

hamming_loss_Family=hamming_loss(y_test_Family, y_predict_Family)
exact_match_score_Family=accuracy_score(y_test_Family, y_predict_Family)
print('Family hamming loss:',hamming_loss_Family)
print('Family exact match score:',exact_match_score_Family)

Family hamming loss: 0.07503473830477073
Family exact match score: 0.9249652616952293


#### Genus

In [97]:
# Genus
y_train_Genus = df_train[["Genus"]]
y_test_Genus=df_test[["Genus"]]

svc = svm.SVC(gamma='auto')
model_Genus = svc.fit(x_train, y_train_Genus)
y_predict_Genus = model_Genus.predict(x_test)

hamming_loss_Genus=hamming_loss(y_test_Genus, y_predict_Genus)
exact_match_score_Genus=accuracy_score(y_test_Genus, y_predict_Genus)
print('Genus hamming loss:',hamming_loss_Genus)
print('Genus exact match score:',exact_match_score_Genus)

Genus hamming loss: 0.09448818897637795
Genus exact match score: 0.905511811023622


#### Species

In [98]:
# Species
y_train_Species = df_train[["Species"]]
y_test_Species=df_test[["Species"]]

svc = svm.SVC(gamma='auto')
model_Species = svc.fit(x_train, y_train_Species)
y_predict_Species = model_Species.predict(x_test)

hamming_loss_Species=hamming_loss(y_test_Species, y_predict_Species)
exact_match_score_Species=accuracy_score(y_test_Species, y_predict_Species)
print('Species hamming loss:',hamming_loss_Species)
print('Species exact match score:',exact_match_score_Species)

Species hamming loss: 0.08337193144974525
Species exact match score: 0.9166280685502547


### Train SVM for each of labels, using Gaussian kernels and one versus all classifiers(standarduzed and raw attributes)

In [99]:
# standarlize features
scaler = StandardScaler()
model_train=scaler.fit(x_train)
train=model_train.transform(x_train)
std_x_train=pd.DataFrame(train)

model_test=scaler.fit(x_test)
test=model_test.transform(x_test)
std_x_test=pd.DataFrame(test)

#### Family

In [128]:
# Family (raw attributes)
parameters={'kernel':['rbf'],'C':np.logspace(-3,6,10),'gamma':np.linspace(0.1,2,20)}
clf_Family = GridSearchCV(svm.SVC(), param_grid=parameters, cv=10)
model_clf_Family=clf_Family.fit(x_train, y_train_Family)
best_params_Family=model_clf_Family.best_params_
print('Family best paramters:',best_params_Family)

Family best paramters: {'C': 10.0, 'gamma': 2.0, 'kernel': 'rbf'}


In [129]:
# Family (standardized attributes)
model_std_clf_Family=clf_Family.fit(std_x_train, y_train_Family)
std_best_params_Family=model_std_clf_Family.best_params_
print('Family std best paramters:',std_best_params_Family)

Family std best paramters: {'C': 10.0, 'gamma': 0.1, 'kernel': 'rbf'}


#### Genus

In [130]:
# Genus (raw attributes)
parameters={'kernel':['rbf'],'C':np.logspace(-3,6,10),'gamma':np.linspace(0.1,2,20)}
clf_Genus = GridSearchCV(svm.SVC(), param_grid=parameters, cv=10)
model_clf_Genus=clf_Genus.fit(x_train, y_train_Genus)
best_params_Genus=model_clf_Genus.best_params_
print('Genus best paramters:',best_params_Genus)

Genus best paramters: {'C': 10.0, 'gamma': 1.0999999999999999, 'kernel': 'rbf'}


In [131]:
# Genus (standardized attributes)
model_std_clf_Genus=clf_Genus.fit(std_x_train, y_train_Genus)
std_best_params_Genus=model_std_clf_Genus.best_params_
print('Genus std best paramters:',std_best_params_Genus)

Genus std best paramters: {'C': 10.0, 'gamma': 0.1, 'kernel': 'rbf'}


#### Species

In [132]:
# Species (raw attributes)
parameters={'kernel':['rbf'],'C':np.logspace(-3,6,10),'gamma':np.linspace(0.1,2,20)}
clf_Species = GridSearchCV(svm.SVC(), param_grid=parameters, cv=10)
model_clf_Species=clf_Genus.fit(x_train, y_train_Species)
best_params_Species=model_clf_Species.best_params_
print('Species best paramters:',best_params_Species)

Species best paramters: {'C': 10.0, 'gamma': 1.0999999999999999, 'kernel': 'rbf'}


In [133]:
# Species (standardized attributes)
model_std_clf_Species=clf_Genus.fit(std_x_train, y_train_Genus)
std_best_params_Species=model_std_clf_Species.best_params_
print('Species std best paramters:',std_best_params_Species)

Species std best paramters: {'C': 10.0, 'gamma': 0.1, 'kernel': 'rbf'}


### Train L1-penalized SVMs for each of labels, using Gaussian kernels and one versus all classifiers(standarduzed and raw attributes)

#### Family

In [114]:
# Family (raw attributes)
parameters={'C':np.logspace(-3,6,10)}
clf_Family = GridSearchCV(LinearSVC(penalty='l1', dual=False), param_grid=parameters, cv=10)
model_clf_Family=clf_Family.fit(x_train, y_train_Family)
best_params_Family=model_clf_Family.best_params_
print('Family best paramters:',best_params_Family)

Family best paramters: {'C': 10.0}


In [115]:
# Family (standardized attributes)
model_std_clf_Family=clf_Family.fit(std_x_train, y_train_Family)
std_best_params_Family=model_std_clf_Family.best_params_
print('Family std best paramters:',std_best_params_Family)

Family std best paramters: {'C': 1.0}


#### Genus

In [116]:
# Genus (raw attributes)
parameters={'C':np.logspace(-3,6,10)}
clf_Genus = GridSearchCV(LinearSVC(penalty='l1', dual=False), param_grid=parameters, cv=10)
model_clf_Genus=clf_Genus.fit(x_train, y_train_Genus)
best_params_Genus=model_clf_Genus.best_params_
print('Genus best paramters:',best_params_Genus)

Genus best paramters: {'C': 1000.0}


In [117]:
# Genus (standardized attributes)
model_std_clf_Genus=clf_Genus.fit(std_x_train, y_train_Genus)
std_best_params_Genus=model_std_clf_Genus.best_params_
print('Genus std best paramters:',std_best_params_Genus)

Genus std best paramters: {'C': 100.0}


#### Specises

In [118]:
# Species (raw attributes)
parameters={'C':np.logspace(-3,6,10)}
clf_Species = GridSearchCV(LinearSVC(penalty='l1', dual=False), param_grid=parameters, cv=10)
model_clf_Species=clf_Genus.fit(x_train, y_train_Species)
best_params_Species=model_clf_Species.best_params_
print('Species best paramters:',best_params_Species)

Species best paramters: {'C': 10.0}


In [119]:
# Species (standardized attributes)
model_std_clf_Species=clf_Genus.fit(std_x_train, y_train_Genus)
std_best_params_Species=model_std_clf_Species.best_params_
print('Species std best paramters:',std_best_params_Species)

Species std best paramters: {'C': 100.0}


### Train L1-penalized SVMs for each of labels, using Gaussian kernels and one versus all classifiers(standarduzed and raw attributes),using SMOTE to remedy class imbalance

#### Family

In [120]:
smt = SMOTE()
# Family (raw attributes)
x_train_Family_new, y_train_Family_new = smt.fit_sample(x_train, y_train_Family)
parameters={'C':np.logspace(-3,6,10)}
clf_Family = GridSearchCV(LinearSVC(penalty='l1', dual=False), param_grid=parameters, cv=10)
smote_model_clf_Family=clf_Family.fit(x_train_Family_new, y_train_Family_new)
smote_best_params_Family=smote_model_clf_Family.best_params_
print('SMOTE Family best paramters:',smote_best_params_Family)

SMOTE Family best paramters: {'C': 100.0}


In [121]:
# Family (standardized attributes)
std_x_train_Family_new, std_y_train_Family_new = smt.fit_sample(std_x_train, y_train_Family)
parameters={'C':np.logspace(-3,6,10)}
clf_Family = GridSearchCV(LinearSVC(penalty='l1', dual=False), param_grid=parameters, cv=10)
std_smote_model_clf_Family=clf_Family.fit(std_x_train_Family_new, std_y_train_Family_new)
std_smote_best_params_Family=std_smote_model_clf_Family.best_params_
print('std SMOTE Family best paramters:',std_smote_best_params_Family)

std SMOTE Family best paramters: {'C': 100.0}


#### Genus

In [122]:
# Genus (raw attributes)
x_train_Genus_new, y_train_Genus_new = smt.fit_sample(x_train, y_train_Genus)
parameters={'C':np.logspace(-3,6,10)}
clf_Genus = GridSearchCV(LinearSVC(penalty='l1', dual=False), param_grid=parameters, cv=10)
smote_model_clf_Genus=clf_Genus.fit(x_train_Genus_new, y_train_Genus_new)
smote_best_params_Genus=smote_model_clf_Genus.best_params_
print('SMOTE Genus best paramters:',smote_best_params_Genus)

SMOTE Genus best paramters: {'C': 100000.0}


In [123]:
# Genus (standardized attributes)
std_x_train_Genus_new, std_y_train_Genus_new = smt.fit_sample(std_x_train, y_train_Genus)
parameters={'C':np.logspace(-3,6,10)}
clf_Genus = GridSearchCV(LinearSVC(penalty='l1', dual=False), param_grid=parameters, cv=10)
std_smote_model_clf_Genus=clf_Genus.fit(std_x_train_Genus_new, std_y_train_Genus_new)
std_smote_best_params_Genus=std_smote_model_clf_Genus.best_params_
print('std SMOTE Genus best paramters:',std_smote_best_params_Genus)

std SMOTE Genus best paramters: {'C': 100000.0}


#### Species

In [124]:
# Species (raw attributes)
x_train_Species_new, y_train_Species_new = smt.fit_sample(x_train, y_train_Species)
parameters={'C':np.logspace(-3,6,10)}
clf_Genus = GridSearchCV(LinearSVC(penalty='l1', dual=False), param_grid=parameters, cv=10)
smote_model_clf_Species=clf_Species.fit(x_train_Species_new, y_train_Species_new)
smote_best_params_Species=smote_model_clf_Species.best_params_
print('SMOTE Species best paramters:',smote_best_params_Species)

SMOTE Species best paramters: {'C': 100.0}


In [125]:
# Species (standardized attributes)
std_x_train_Species_new, std_y_train_Species_new = smt.fit_sample(std_x_train, y_train_Species)
parameters={'C':np.logspace(-3,6,10)}
clf_Species = GridSearchCV(LinearSVC(penalty='l1', dual=False), param_grid=parameters, cv=10)
std_smote_model_clf_Species=clf_Species.fit(std_x_train_Species_new, std_y_train_Species_new)
std_smote_best_params_Species=std_smote_model_clf_Species.best_params_
print('std SMOTE Species best paramters:',std_smote_best_params_Species)

std SMOTE Species best paramters: {'C': 10.0}
