**Datasets** You are provided with the training and testing datatset (see _train.txt_ and _test.txt_), including 120 training data and 30 testing data, respectively. It covers 3 classes, corresponding to setosa, versicolor, virginica. They are derived from the Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris), contains 3 classes of 50 instances each, where each class refers to a type of iris plant. Your task is to classify each iris plant as one of the three possible types.

# 1

In [1]:
# Author: Yohandi (120040025)

import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.datasets import load_iris

df_train = pd.read_csv("train.txt", sep = '\t')
X_train = df_train.iloc[:,1:].values
y_train = np.ravel(df_train.iloc[:,:1].values)

df_test = pd.read_csv("test.txt", sep = '\t')
X_test = df_test.iloc[:,1:].values
y_test = np.ravel(df_test.iloc[:,:1].values)

clf = SVC(kernel = "linear", C = 10 ** 5)
clf.fit(X_train, y_train)

f = open("SVM_linear.txt", "w")

print("{:.16f}".format(1 - clf.score(X_train, y_train)), file = f)
print("{:.16f}".format(1 - clf.score(X_test, y_test)), file = f)

def vectorToCommaSeparated(vect):
    try:
        tmp = []
        for content in vect:
            if(content.size > 1):
                tmp.append(vectorToCommaSeparated(content))
            else:
                tmp.append(str(content))
        return ",".join(tmp)
    except:
        return vect    

idx = 0                           
data = load_iris()
target_names_list = list(data.target_names)
types = [target_names_list.index(str) for str in ["setosa", "versicolor", "virginica"]]

for class_type in types:
    print(vectorToCommaSeparated(clf.coef_[class_type]), file = f)
    print(vectorToCommaSeparated(clf.intercept_[class_type]), file = f)
    print(vectorToCommaSeparated(clf.support_[idx:idx + clf.n_support_[class_type]]), file = f)
#     print(vectorToCommaSeparated(clf.support_vectors_[idx:idx + clf.n_support_[class_type]]), file = f)
    idx += clf.n_support_[class_type]

f.close()

# 2

In [24]:
# Author: Yohandi (120040025)

import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.datasets import load_iris

df_train = pd.read_csv("train.txt", sep = '\t')
X_train = df_train.iloc[:,1:].values
y_train = np.ravel(df_train.iloc[:,:1].values)

df_test = pd.read_csv("test.txt", sep = '\t')
X_test = df_test.iloc[:,1:].values
y_test = np.ravel(df_test.iloc[:,:1].values)


def vectorToCommaSeparated(vect):
    try:
        tmp = []
        for content in vect:
            if(content.size > 1):
                tmp.append(vectorToCommaSeparated(content))
            else:
                tmp.append(str(content))
        return ",".join(tmp)
    except:
        return vect

def findSlack(class_type):
    global clf, X_train, X_test
    
    slack = 0
    result = clf.decision_function(X_train)
    for elements in result:
        elements_score = min([abs(elements[_] - class_type) for _ in range(3)])
        if elements_score > slack:
            slack = elements_score
    result = clf.decision_function(X_test)
    for elements in result:
        elements_score = min([abs(elements[_] - class_type) for _ in range(3)])
        if elements_score > slack:
            slack = elements_score
            
    return slack
    
f = open("SVM_slack.txt", "w")

for t in range(1, 11):
    if(t != 1):
        print("", file = f)
    
    clf = SVC(kernel = "linear", C = 0.1 * t)
    clf.fit(X_train, y_train)    
    
    print("{:.16f}".format(1 - clf.score(X_train, y_train)), file = f)
    print("{:.16f}".format(1 - clf.score(X_test, y_test)), file = f)

    idx = 0                           
    data = load_iris()
    target_names_list = list(data.target_names)
    types = [target_names_list.index(str) for str in ["setosa", "versicolor", "virginica"]]

    for class_type in types:
        print(vectorToCommaSeparated(clf.coef_[class_type]), file = f)
        print(vectorToCommaSeparated(clf.intercept_[class_type]), file = f)
        print(vectorToCommaSeparated(clf.support_[idx:idx + clf.n_support_[class_type]]), file = f)
#         print(vectorToCommaSeparated(clf.support_vectors_[idx:idx + clf.n_support_[class_type]]), file = f)
        idx += clf.n_support_[class_type]
        print(findSlack(class_type), file = f)

f.close()

# 3

In [3]:
# Author: Yohandi (120040025)
# Kindly run this part first

import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.datasets import load_iris

def vectorToCommaSeparated(vect):
    try:
        tmp = []
        for content in vect:
            if(content.size > 1):
                tmp.append(vectorToCommaSeparated(content))
            else:
                tmp.append(str(content))
        return ",".join(tmp)
    except:
        return vect

In [4]:
# Author: Yohandi (120040025)

df_train = pd.read_csv("train.txt", sep = '\t')
X_train = df_train.iloc[:,1:].values
y_train = np.ravel(df_train.iloc[:,:1].values)

df_test = pd.read_csv("test.txt", sep = '\t')
X_test = df_test.iloc[:,1:].values
y_test = np.ravel(df_test.iloc[:,:1].values)

f = open("SVM_poly2.txt", "w")

clf = SVC(kernel = "poly", degree = 2, C = 1)
clf.fit(X_train, y_train)    

print("{:.16f}".format(1 - clf.score(X_train, y_train)), file = f)
print("{:.16f}".format(1 - clf.score(X_test, y_test)), file = f)

idx = 0                           
data = load_iris()
target_names_list = list(data.target_names)
types = [target_names_list.index(str) for str in ["setosa", "versicolor", "virginica"]]

for class_type in types:
    print(vectorToCommaSeparated(clf.intercept_[class_type]), file = f)
    print(vectorToCommaSeparated(clf.support_[idx:idx + clf.n_support_[class_type]]), file = f)
#         print(vectorToCommaSeparated(clf.support_vectors_[idx:idx + clf.n_support_[class_type]]), file = f)
    idx += clf.n_support_[class_type]
    
f.close()

In [5]:
# Author: Yohandi (120040025)

import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.datasets import load_iris

df_train = pd.read_csv("train.txt", sep = '\t')
X_train = df_train.iloc[:,1:].values
y_train = np.ravel(df_train.iloc[:,:1].values)

df_test = pd.read_csv("test.txt", sep = '\t')
X_test = df_test.iloc[:,1:].values
y_test = np.ravel(df_test.iloc[:,:1].values)

f = open("SVM_poly3.txt", "w")

clf = SVC(kernel = "poly", degree = 3, C = 1)
clf.fit(X_train, y_train)    

print("{:.16f}".format(1 - clf.score(X_train, y_train)), file = f)
print("{:.16f}".format(1 - clf.score(X_test, y_test)), file = f)

idx = 0                           
data = load_iris()
target_names_list = list(data.target_names)
types = [target_names_list.index(str) for str in ["setosa", "versicolor", "virginica"]]

for class_type in types:
    print(vectorToCommaSeparated(clf.intercept_[class_type]), file = f)
    print(vectorToCommaSeparated(clf.support_[idx:idx + clf.n_support_[class_type]]), file = f)
#         print(vectorToCommaSeparated(clf.support_vectors_[idx:idx + clf.n_support_[class_type]]), file = f)
    idx += clf.n_support_[class_type]
    
f.close()

In [6]:
# Author: Yohandi (120040025)

import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.datasets import load_iris

df_train = pd.read_csv("train.txt", sep = '\t')
X_train = df_train.iloc[:,1:].values
y_train = np.ravel(df_train.iloc[:,:1].values)

df_test = pd.read_csv("test.txt", sep = '\t')
X_test = df_test.iloc[:,1:].values
y_test = np.ravel(df_test.iloc[:,:1].values)

f = open("SVM_rbf.txt", "w")

clf = SVC(kernel = "rbf", gamma = 0.5, C = 1)
clf.fit(X_train, y_train)    

print("{:.16f}".format(1 - clf.score(X_train, y_train)), file = f)
print("{:.16f}".format(1 - clf.score(X_test, y_test)), file = f)

idx = 0                           
data = load_iris()
target_names_list = list(data.target_names)
types = [target_names_list.index(str) for str in ["setosa", "versicolor", "virginica"]]

for class_type in types:
    print(vectorToCommaSeparated(clf.intercept_[class_type]), file = f)
    print(vectorToCommaSeparated(clf.support_[idx:idx + clf.n_support_[class_type]]), file = f)
#         print(vectorToCommaSeparated(clf.support_vectors_[idx:idx + clf.n_support_[class_type]]), file = f)
    idx += clf.n_support_[class_type]
    
f.close()

In [7]:
# Author: Yohandi (120040025)

import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.datasets import load_iris

df_train = pd.read_csv("train.txt", sep = '\t')
X_train = df_train.iloc[:,1:].values
y_train = np.ravel(df_train.iloc[:,:1].values)

df_test = pd.read_csv("test.txt", sep = '\t')
X_test = df_test.iloc[:,1:].values
y_test = np.ravel(df_test.iloc[:,:1].values)

f = open("SVM_sigmoid.txt", "w")

clf = SVC(kernel = "sigmoid", gamma = 0.25, C = 1)
clf.fit(X_train, y_train)    

print("{:.16f}".format(1 - clf.score(X_train, y_train)), file = f)
print("{:.16f}".format(1 - clf.score(X_test, y_test)), file = f)

idx = 0                           
data = load_iris()
target_names_list = list(data.target_names)
types = [target_names_list.index(str) for str in ["setosa", "versicolor", "virginica"]]

for class_type in types:
    print(vectorToCommaSeparated(clf.intercept_[class_type]), file = f)
    print(vectorToCommaSeparated(clf.support_[idx:idx + clf.n_support_[class_type]]), file = f)
#         print(vectorToCommaSeparated(clf.support_vectors_[idx:idx + clf.n_support_[class_type]]), file = f)
    idx += clf.n_support_[class_type]
    
f.close()