In [1]:
from ucimlrepo import fetch_ucirepo
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
# fetch dataset
spambase = fetch_ucirepo(id=94)

In [3]:
# data (as pandas dataframes)
X = spambase.data.features
y = spambase.data.targets

In [4]:
# variable information
print(spambase.variables)

                          name     role        type demographic  \
0               word_freq_make  Feature  Continuous        None   
1            word_freq_address  Feature  Continuous        None   
2                word_freq_all  Feature  Continuous        None   
3                 word_freq_3d  Feature  Continuous        None   
4                word_freq_our  Feature  Continuous        None   
5               word_freq_over  Feature  Continuous        None   
6             word_freq_remove  Feature  Continuous        None   
7           word_freq_internet  Feature  Continuous        None   
8              word_freq_order  Feature  Continuous        None   
9               word_freq_mail  Feature  Continuous        None   
10           word_freq_receive  Feature  Continuous        None   
11              word_freq_will  Feature  Continuous        None   
12            word_freq_people  Feature  Continuous        None   
13            word_freq_report  Feature  Continuous        Non

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=41)

In [6]:
Accuracy=[]
regularization=[0.001,0.1,1,10,100]

In [7]:
print(y_train.shape)
y_train = y_train.to_numpy().ravel()
y_test = y_test.to_numpy().ravel()

(3680, 1)


In [8]:
def metrics(y_test,y_pred):
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Calculate precision
    precision = precision_score(y_test, y_pred)

    # Calculate recall
    recall = recall_score(y_test, y_pred)

    # Calculate F1-score
    f1 = f1_score(y_test, y_pred)

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-Score:", f1)
    Accuracy.append(accuracy)

        

<h2> Regularization 0.001 <h2>

In [9]:
svm_classifier = SVC(C=0.001, kernel='linear', random_state=42)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)

In [10]:
metrics(y_test,y_pred)

Accuracy: 0.8849077090119435
Precision: 0.9037900874635568
Recall: 0.8093994778067886
F1-Score: 0.8539944903581267


<h2> Regularization 0.01 <h2>

In [11]:
svm_classifier = SVC(C=0.1, kernel='linear', random_state=42)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)

In [12]:
metrics(y_test,y_pred)

Accuracy: 0.9446254071661238
Precision: 0.9414893617021277
Recall: 0.9242819843342036
F1-Score: 0.932806324110672


<h2> Regularization  1 </h2>

In [13]:
svm_classifier = SVC(C=1, kernel='linear', random_state=42)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)

In [14]:
metrics(y_test,y_pred)

Accuracy: 0.9457111834961998
Precision: 0.9393139841688655
Recall: 0.9295039164490861
F1-Score: 0.9343832020997375


<h2> regularization 10 <h2>

In [9]:
svm_classifier = SVC(C=10, kernel='linear', random_state=42)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)

In [10]:
metrics(y_test,y_pred)

Accuracy: 0.9424538545059717
Precision: 0.9319371727748691
Recall: 0.9295039164490861
F1-Score: 0.930718954248366


<h2>  regularization 100 </h2>

In [None]:
svm_classifier = SVC(C=100, kernel='linear', random_state=42,max_iter=1000)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)

In [None]:
metrics(y_test,y_pred)

In [None]:
plt.plot(regularization, Accuracy)
plt.xlabel('regularization Label')
plt.ylabel('Accuracy Label')
plt.title('Line Plot Example')
plt.show()

# kernel Tricks

In [None]:
Accuracy=[]
x_labels=['poly degree 2','poly degree 3','sigmoid','RBF']

In [None]:
svm_classifier = SVC(C=1, kernel='poly',degree = 2, random_state=42)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)
metrics(y_test,y_pred)

In [None]:
svm_classifier = SVC(C=1, kernel='poly',degree = 3, random_state=42)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)
metrics(y_test,y_pred)

In [None]:
svm_classifier = SVC(C=1, kernel='sigmoid', gamma=0.1, coef0=0.5, random_state=42)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)
metrics(y_test,y_pred)

In [None]:
svm_classifier = SVC(C=1, kernel='rbf', random_state=42)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)
metrics(y_test,y_pred)

# Part C


In [None]:
Accuracy=[]
x_labels=['poly degree 1 c=0.01','poly degree 1 c=100','poly degree 3 c=0.01','poly degree 3 c=100']

In [None]:
svm_classifier = SVC(C=0.01, kernel='poly',degree = 1, random_state=42)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)
metrics(y_test,y_pred)

In [None]:
svm_classifier = SVC(C=100, kernel='poly',degree = 1, random_state=42)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)
metrics(y_test,y_pred)

In [None]:
svm_classifier = SVC(C=0.01, kernel='poly',degree = 3, random_state=42)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)
metrics(y_test,y_pred)

In [None]:
svm_classifier = SVC(C=100, kernel='poly',degree = 3, random_state=42)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)
metrics(y_test,y_pred)

In [None]:
plt.plot(x_labels, Accuracy)
plt.xlabel('parameter Label')
plt.ylabel('Accuracy Label')
plt.title('Line Plot Example')
plt.show()