In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split

training_set_percentage = 0.6


# load data
df = pd.read_csv('HTRU\HTRU_2.csv', header=None)


# get the data we'll need as X and y 
X = df.iloc[:,0:8].values   # change 0 and 1 to other features if you like
y = df.iloc[:,8].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-training_set_percentage, shuffle=True, random_state=3)


In [30]:
##----------Linear Kernel------------------

clf = svm.SVC(kernel="linear", C=10)
clf.fit(X_train,y_train)

In [31]:
##-------------Linear Kernel Perfomance---------------


from sklearn.metrics import f1_score, precision_score
# get predictions from the trained classifier
y_hat = clf.predict(X_test)
print("F1 score: ", f1_score(y_test, y_hat, average="weighted"))

Z = clf.predict(X_test)
compare = np.array(np.array(Z) == np.array(y_test))
compare = np.vstack((compare,Z,y_test))

performance = np.sum(compare[0,:])/len(compare[0,:])
print(f"performance: {performance}")

# Evaluate the model on the test set
accuracy = clf.score(X_test, y_test)
y_pred = clf.predict(X_test)

# Calculate precision
precision = precision_score(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")

F1 score:  0.9775713274842224
performance: 0.9782122905027933
Accuracy: 0.9782122905027933
Precision: 0.9307282415630551


In [28]:
##-------------RBF Kernel-----------------

clf2 = svm.SVC(kernel="rbf", gamma="scale", C=10000)
clf2.fit(X_train,y_train)

In [29]:
##-------------RBF Kernel Performcance-----------------


y_hat = clf2.predict(X_test)
print("F1 score: ", f1_score(y_test, y_hat, average="weighted"))

Z = clf2.predict(X_test)
compare = np.array(np.array(Z) == np.array(y_test))
compare = np.vstack((compare,Z,y_test))

performance = np.sum(compare[0,:])/len(compare[0,:])
print(f"performance: {performance}")

# Evaluate the model on the test set
accuracy = clf2.score(X_test, y_test)
y_pred = clf2.predict(X_test)

# Calculate precision
precision = precision_score(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")

F1 score:  0.9786371757309131
performance: 0.9793296089385475
Accuracy: 0.9793296089385475
Precision: 0.945750452079566


In [24]:
##---------Polynomial Kernel--------------------

clf3 = svm.SVC(kernel="poly", degree=3, gamma="scale", C=10000)
clf3.fit(X_train,y_train)


In [25]:
##--------------Polynomial Kernel Performance-----------------


y_hat = clf3.predict(X_test)
print("F1 score: ", f1_score(y_test, y_hat, average="weighted"))

Z = clf3.predict(X_test)
compare = np.array(np.array(Z) == np.array(y_test))
compare = np.vstack((compare,Z,y_test))

performance = np.sum(compare[0,:])/len(compare[0,:])
print(f"performance: {performance}")


# Evaluate the model on the test set
accuracy = clf3.score(X_test, y_test)
y_pred = clf3.predict(X_test)

# Calculate precision
precision = precision_score(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")


F1 score:  0.9783657108139427
performance: 0.979050279329609
Accuracy: 0.979050279329609
Precision: 0.9423423423423424
