# Support Vector Regression (SVR)

## Importing the libraries

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import svm
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier


## Importing the dataset

In [5]:
dataset = pd.read_excel('Classification iris(2).xlsx')
# X = dataset.iloc[:, :-1].values
# y = dataset['class'].values
# print(dataset.head())

## 2.2.1 Spliting the dataset

In [6]:
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 0) cant be used since the split should be per class and trainsplit has no such functionality
#the split also must contain the original order of the data
#thus we should do it manually
training_id = []
test_id = []
train_ratio = 0.7
test_ratio = 0.3
classes = dataset['class'].unique()
for cls in classes:
    cls_dataset = dataset[dataset['class'] == cls]

    num_train = int(train_ratio * len(cls_dataset))  #35
    num_test = len(cls_dataset) - num_train #15

    cls_id = cls_dataset['instance_id'].tolist()

    training_id.extend(cls_id[:num_train])
    test_id.extend(cls_id[num_train:])

#     print(f"Class {cls}:")
#     print(f"Training IDs: {cls_id[:num_train]}")
#     print(f"Test IDs: {cls_id[num_train:]}")

#make sure its still maintains the original order of the data
training_id_sorted = sorted(training_id)
test_id_sorted = sorted(test_id)

print("Q2.2.1 Split training set and test set:")
print(f"Training set: {training_id_sorted}")
print(f"Test set: {test_id_sorted}")


Q2.2.1 Split training set and test set:
Training set: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135]
Test set: [36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150]


## Feature Scaling and Lable Encoder

In [8]:
#In Support Vector Machines (SVM), feature scaling or normalization are not strictly required, but are highly recommended,
#as it can significantly improve model performance and convergence speed.
#SVM tries to find the optimal hyperplane that separates the data points of different classes with the maximum margin.
#google reference
sc = StandardScaler()

X_train = dataset.loc[dataset['instance_id'].isin(training_id_sorted), ['sepal length', 'sepal width', 'petal length', 'petal width']].values
y_train = dataset.loc[dataset['instance_id'].isin(training_id_sorted), 'class'].values

X_test = dataset.loc[dataset['instance_id'].isin(test_id_sorted), ['sepal length', 'sepal width', 'petal length', 'petal width']].values
y_test = dataset.loc[dataset['instance_id'].isin(test_id_sorted), 'class'].values

X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)

# test
# print("Training set mean (should be ~0):", np.round(np.mean(X_train_scaled, axis=0), decimals=6))
# print("Training set std (should be ~1):", np.round(np.std(X_train_scaled, axis=0), decimals=6))
# print("Test set mean:", np.mean(X_test_scaled, axis=0))
# print("Test set std:", np.std(X_test_scaled, axis=0))

encode = LabelEncoder()
y_train_encoded = encode.fit_transform(y_train)
y_test_encoded = encode.transform(y_test)
class_names = encode.classes_

# test
# for idx, cls in enumerate(class_names):
#     print(f"{cls}: {idx}")


## 2.2.2 Calculation using Standard SVM Model (Linear Kernel)

In [9]:
total_train_errors = 0
total_test_errors = 0
class_results = {}
linearly_separable_classes = []

#(hard margin)
svm_model = SVC(kernel='linear', C=1e5, random_state=0)

# OvR or OvA
ovr = OneVsRestClassifier(svm_model)
ovr.fit(X_train_scaled, y_train_encoded)

y_train_pred = ovr.predict(X_train_scaled)
y_test_pred = ovr.predict(X_test_scaled)

total_train_errors = np.sum(y_train_pred != y_train_encoded) / len(y_train_encoded)
total_test_errors = np.sum(y_test_pred != y_test_encoded) / len(y_test_encoded)

for idx, cls in enumerate(class_names):
    estimator = ovr.estimators_[idx]

    w = estimator.coef_[0].tolist()
    b = estimator.intercept_[0]

    support_vectors_indices = estimator.support_
    support_vectors_instance_ids = [training_id_sorted[i] for i in support_vectors_indices]

    y_train_binary = (y_train_encoded == idx).astype(int)
    y_test_binary = (y_test_encoded == idx).astype(int)

    estimator = ovr.estimators_[idx]

    y_train_pred_binary = estimator.predict(X_train_scaled)
    y_test_pred_binary = estimator.predict(X_test_scaled)

    train_error = np.sum(((y_train_pred != y_train_encoded) & (y_train_encoded == idx))/ len(y_train_binary))
    test_error = np.sum(((y_test_pred != y_test_encoded) & (y_test_encoded == idx))/ len(y_test_binary))

    positive_class_indices = np.where(y_train_encoded == idx)[0]
    negative_class_indices = np.where(y_train_encoded != idx)[0]
    _train_pred_binary = (y_train_pred == idx).astype(int)

# True Positives: Correctly classified positive samples
    tp = np.sum(y_train_pred_binary[positive_class_indices] == 1)

# False Negatives: Misclassified positive samples
    fn = np.sum(y_train_pred_binary[positive_class_indices] == 0)

# True Negatives: Correctly classified negative samples
    tn = np.sum(y_train_pred_binary[negative_class_indices] == 0)

# False Positives: Misclassified negative samples
    fp = np.sum(y_train_pred_binary[negative_class_indices] == 1)

    if fn == 0 and fp == 0:
      linearly_separable_classes.append(cls)

    class_results[cls] = {
        'training_error': train_error,
        'testing_error': test_error,
        'w': w,
        'b': b,
        'support_vectors': support_vectors_instance_ids
    }

print("\nQ2.2.2 Calculation using Standard SVM Model, my name is filbert hamijoyo:")

print(f"total training error: {total_train_errors}, total testing error: {total_test_errors}")
print(" ")

for cls in class_names:
    result = class_results[cls]
    print(f"class {cls}:")
    print(f"training error: {result['training_error']}, testing error: {result['testing_error']}")
    print(f"w: {result['w']}, b: {result['b']}")
    print(f"support vector indices: {result['support_vectors']}")
    print(" ")

##google reference
#When the data is not linearly separable, a linear SVM will still attempt to find the best possible linear hyperplane that separates the classes, but it may not be able to achieve a perfect separation.
print(f"Linear separable classes: {linearly_separable_classes}")


Q2.2.2 Calculation using Standard SVM Model:
total training error: 0.05714285714285714, total testing error: 0.08888888888888889
 
class Iris-setosa:
training error: 0.0, testing error: 0.022222222222222223
w: [-0.22149841131541403, 0.5197614640017161, -0.6204881374367106, -0.7018619843079239], b: -0.860042567927232
support vector indices: [58, 65, 24, 26]
 
class Iris-versicolor:
training error: 0.04761904761904762, testing error: 0.06666666666666667
w: [1.2403741375019308, -1.5570912008406594, -1.2096160736109596, -0.13571915771171916], b: -0.7952621398225709
support vector indices: [2, 4, 9, 10, 13, 14, 26, 31, 32, 35, 102, 103, 104, 106, 107, 108, 109, 112, 113, 114, 117, 119, 120, 123, 124, 126, 127, 129, 130, 131, 133, 134, 135, 51, 52, 53, 55, 56, 57, 58, 59, 60, 62, 64, 65, 66, 67, 68, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85]
 
class Iris-virginica:
training error: 0.009523809523809525, testing error: 0.0
w: [-0.887004583972157, -2.6477127808175283, 9.59

## 2.2.3 Calculation using SVM with Slack Variables (Linear Kernel)

In [10]:
C_values = [0.25 * t for t in range(1, 5)]  # C = 0.25, 0.5, 0.75, 1.0

print("\nQ2.2.3 Calculation using SVM with Slack Variables (C = 0.25 × t, where t = 1,...,4):")

for CC in C_values:
    total_train_errors = 0
    total_test_errors = 0
    class_results = {}

    print("-------------------------------------------")
    print(f"C={CC},")

    svm_model = SVC(kernel='linear', C=CC, random_state=0)

    #OvR OvA
    ovr_classifier = OneVsRestClassifier(svm_model)
    ovr_classifier.fit(X_train_scaled, y_train_encoded)
    #predict
    y_train_pred = ovr_classifier.predict(X_train_scaled)
    y_test_pred = ovr_classifier.predict(X_test_scaled)

    total_train_errors = np.sum(y_train_pred != y_train_encoded) / len(y_train_encoded)
    total_test_errors = np.sum(y_test_pred != y_test_encoded) / len(y_test_encoded)

    for idx, cls in enumerate(class_names):
        estimator = ovr_classifier.estimators_[idx]

        w = estimator.coef_[0].tolist()
        b = estimator.intercept_[0]

        support_vectors_indices = estimator.support_
        support_vectors_instance_ids = [training_id_sorted[i] for i in support_vectors_indices]

        y_train_binary = (y_train_encoded == idx).astype(int)
        y_train_mapped = y_train_binary * 2 - 1

        #decision function values for support vectors
        decision_values_sv = estimator.decision_function(X_train_scaled[support_vectors_indices])
        y_train_mapped_sv = y_train_mapped[support_vectors_indices]

        #slack
        slack_vars_sv = np.maximum(0, 1 - y_train_mapped_sv * decision_values_sv).tolist()

        #predict
        y_train_pred_binary = (y_train_pred == idx).astype(int)
        y_test_pred_binary = (y_test_pred == idx).astype(int)

        train_error = np.sum(((y_train_pred != y_train_encoded) & (y_train_encoded == idx))/ len(y_train_binary))
        test_error = np.sum(((y_test_pred != y_test_encoded) & (y_test_encoded == idx))/ len(y_test_binary))

        class_results[cls] = {
            'training_error': train_error,
            'testing_error': test_error,
            'w': w,
            'b': b,
            'support_vectors': support_vectors_instance_ids,
            'slack_variables': slack_vars_sv
        }

    print(f"total training error: {total_train_errors}, total testing error: {total_test_errors}")
    for cls in class_names:
        res = class_results[cls]
        print(f"\nclass {cls}:")
        print(f"training error: {res['training_error']}, testing error: {res['testing_error']}")
        print(f"w: {np.round(res['w'], 4)}, b: {np.round(res['b'], 4)}")
        print(f"support vector indices: {res['support_vectors']}")
        print(f"slack variables: {res['slack_variables']}")


Q2.2.3 Calculation using SVM with Slack Variables (C = 0.25 × t, where t = 1,...,4):
-------------------------------------------
C=0.25,
total training error: 0.08571428571428572, total testing error: 0.08888888888888889

class Iris-setosa:
training error: 0.0, testing error: 0.022222222222222223
w: [-0.2066  0.5757 -0.566  -0.595 ], b: -0.7129
support vector indices: [58, 65, 80, 9, 24, 26]
slack variables: [0.006352642943328601, 0.13320812782976255, 0.0, 0.0001248379583259629, 0.0, 0.0840452868089453]

class Iris-versicolor:
training error: 0.0761904761904762, testing error: 0.06666666666666667
w: [ 0.3367 -0.9205 -0.1189 -0.3382], b: -0.8506
support vector indices: [2, 3, 4, 9, 10, 13, 14, 26, 31, 35, 102, 103, 104, 106, 107, 108, 109, 112, 113, 114, 115, 117, 119, 120, 122, 123, 124, 127, 128, 129, 130, 131, 133, 134, 135, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85]
slack variables: [0.4778

## 2.2.4 Calculation using SVM with Kernel Functions

In [11]:
print("Q2.2.4 Calculation using SVM with Kernel Functions:")
print("-------------------------------------------")
print("(a) 2nd-order Polynomial Kernel")

svm_poly2 = SVC(kernel='poly', degree=2, gamma='scale', coef0=0, C=1.0, random_state=0)

#OvR OvA
ovr_poly2 = OneVsRestClassifier(svm_poly2)
ovr_poly2.fit(X_train_scaled, y_train_encoded)
#predict
y_train_pred = ovr_poly2.predict(X_train_scaled)
y_test_pred = ovr_poly2.predict(X_test_scaled)

total_train_errors = np.sum(y_train_pred != y_train_encoded) / len(y_train_encoded)
total_test_errors = np.sum(y_test_pred != y_test_encoded) / len(y_test_encoded)
print(f"total training error: {total_train_errors}, total testing error: {total_test_errors}")

class_results = {}


for idx, cls in enumerate(class_names):
    estimator = ovr_poly2.estimators_[idx]

    b = estimator.intercept_[0]

    support_vectors_indices = estimator.support_
    support_vectors_instance_ids = [training_id_sorted[i] for i in support_vectors_indices]

    train_error = np.sum(((y_train_pred != y_train_encoded) & (y_train_encoded == idx))/ len(y_train_binary))
    test_error = np.sum(((y_test_pred != y_test_encoded) & (y_test_encoded == idx))/ len(y_test_binary))


    class_results[cls] = {
        'training_error': train_error,
        'testing_error': test_error,
        'b': b,
        'support_vectors': support_vectors_instance_ids
    }

for cls in class_names:
    res = class_results[cls]
    print(f"\nclass {cls}:")
    print(f"training error: {res['training_error']}, testing error: {res['testing_error']}")
    print(f"w: , b: {np.round(res['b'], 4)}")
    print(f"support vector indices: {res['support_vectors']}")





print("-------------------------------------------")
print("\n(b) 3rd-order Polynomial Kernel")

svm_poly3 = SVC(kernel='poly', degree=3, gamma='scale', coef0=0, C=1.0, random_state=0)

#OvR OvA
ovr_poly3 = OneVsRestClassifier(svm_poly3)
ovr_poly3.fit(X_train_scaled, y_train_encoded)

#predict
y_train_pred = ovr_poly3.predict(X_train_scaled)
y_test_pred = ovr_poly3.predict(X_test_scaled)

total_train_errors = np.sum(y_train_pred != y_train_encoded) / len(y_train_encoded)
total_test_errors = np.sum(y_test_pred != y_test_encoded) / len(y_test_encoded)
print(f"total training error: {total_train_errors}, total testing error: {total_test_errors}")

class_results = {}

for idx, cls in enumerate(class_names):
    estimator = ovr_poly3.estimators_[idx]

    b = estimator.intercept_[0]

    support_vectors_indices = estimator.support_
    support_vectors_instance_ids = [training_id_sorted[i] for i in support_vectors_indices]

    train_error = np.sum(((y_train_pred != y_train_encoded) & (y_train_encoded == idx))/ len(y_train_binary))
    test_error = np.sum(((y_test_pred != y_test_encoded) & (y_test_encoded == idx))/ len(y_test_binary))


    class_results[cls] = {
        'training_error': train_error,
        'testing_error': test_error,
        'b': b,
        'support_vectors': support_vectors_instance_ids
    }

for cls in class_names:
    res = class_results[cls]
    print(f"\nclass {cls}:")
    print(f"training error: {res['training_error']}, testing error: {res['testing_error']}")
    print(f"w: , b: {np.round(res['b'], 4)}")
    print(f"support vector indices: {res['support_vectors']}")





print("------------------1234-------------------------")
print("")
print("(c) Radial Basis Function Kernel with σ = 1")

# RBF kernel, gamma = 1 / (2 * sigma^2)
gamma_value = 1 / (2 * (1 ** 2))  # σ = 1
svm_rbf = SVC(kernel='rbf', gamma=gamma_value, C=1.0, random_state=0)

#OvR OvA
ovr_rbf = OneVsRestClassifier(svm_rbf)
ovr_rbf.fit(X_train_scaled, y_train_encoded)

#predict
y_train_pred = ovr_rbf.predict(X_train_scaled)
y_test_pred = ovr_rbf.predict(X_test_scaled)

total_train_errors = np.sum(y_train_pred != y_train_encoded) / len(y_train_encoded)
total_test_errors = np.sum(y_test_pred != y_test_encoded) / len(y_test_encoded)
print(f"total training error: {total_train_errors}, total testing error: {total_test_errors}")

class_results = {}

for idx, cls in enumerate(class_names):
    estimator = ovr_rbf.estimators_[idx]

    b = estimator.intercept_[0]

    support_vectors_indices = estimator.support_
    support_vectors_instance_ids = [training_id_sorted[i] for i in support_vectors_indices]

    train_error = np.sum(((y_train_pred != y_train_encoded) & (y_train_encoded == idx))/ len(y_train_binary))
    test_error = np.sum(((y_test_pred != y_test_encoded) & (y_test_encoded == idx))/ len(y_test_binary))


    class_results[cls] = {
        'training_error': train_error,
        'testing_error': test_error,
        'b': b,
        'support_vectors': support_vectors_instance_ids
    }

for cls in class_names:
    res = class_results[cls]
    print(f"class {cls}:")
    print(f"training error: {res['training_error']}, testing error: {res['testing_error']}")
    print(f"w: , b: {np.round(res['b'], 4)}")
    print(f"support vector indices: {res['support_vectors']}")





print("-------------------------------------------")
print("")
print("(d) Sigmoidal Kernel with σ = 1")

# Sigmoid kernel, gamma = 1 / σ
svm_sigmoid = SVC(kernel='sigmoid', gamma=1, coef0=0, C=1.0, random_state=0)

#OvR OvA
ovr_sigmoid = OneVsRestClassifier(svm_sigmoid)
ovr_sigmoid.fit(X_train_scaled, y_train_encoded)

#predict
y_train_pred = ovr_sigmoid.predict(X_train_scaled)
y_test_pred = ovr_sigmoid.predict(X_test_scaled)

total_train_errors = np.sum(y_train_pred != y_train_encoded) / len(y_train_encoded)
total_test_errors = np.sum(y_test_pred != y_test_encoded) / len(y_test_encoded)
print(f"total training error: {total_train_errors}, total testing error: {total_test_errors}")

class_results = {}

for idx, cls in enumerate(class_names):
    estimator = ovr_sigmoid.estimators_[idx]

    b = estimator.intercept_[0]

    support_vectors_indices = estimator.support_
    support_vectors_instance_ids = [training_id_sorted[i] for i in support_vectors_indices]

    train_error = np.sum(((y_train_pred != y_train_encoded) & (y_train_encoded == idx))/ len(y_train_binary))
    test_error = np.sum(((y_test_pred != y_test_encoded) & (y_test_encoded == idx))/ len(y_test_binary))


    class_results[cls] = {
        'training_error': train_error,
        'testing_error': test_error,
        'b': b,
        'support_vectors': support_vectors_instance_ids
    }




for cls in class_names:
    res = class_results[cls]
    print(f"class {cls}:")
    print(f"training error: {res['training_error']}, testing error: {res['testing_error']}")
    print(f"w: , b: {res['b']}")
    print(f"support vector indices: {res['support_vectors']}")

Q2.2.4 Calculation using SVM with Kernel Functions:
-------------------------------------------
(a) 2nd-order Polynomial Kernel
total training error: 0.24761904761904763, total testing error: 0.26666666666666666

class Iris-setosa:
training error: 0.0, testing error: 0.022222222222222223
w: , b: -2.338
support vector indices: [101, 103, 104, 105, 106, 108, 109, 110, 112, 113, 114, 115, 116, 119, 121, 123, 126, 129, 131, 133, 2, 3, 4, 6, 8, 9, 10, 12, 13, 14, 19, 21, 24, 25, 26, 27, 29, 30, 31, 32, 35]

class Iris-versicolor:
training error: 0.0, testing error: 0.0
w: , b: 1.2868
support vector indices: [102, 104, 107, 111, 112, 114, 117, 120, 122, 124, 125, 126, 127, 128, 130, 134, 135, 51, 52, 53, 54, 55, 57, 58, 59, 64, 66, 69, 71, 73, 76, 77, 78, 79, 84]

class Iris-virginica:
training error: 0.24761904761904766, testing error: 0.24444444444444446
w: , b: -1.1291
support vector indices: [1, 2, 3, 4, 7, 8, 9, 10, 12, 13, 14, 21, 24, 25, 26, 27, 28, 29, 30, 31, 32, 35, 51, 52, 53, 54,