In [60]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

In [61]:
# data = pd.read_csv('training_dataset/raw_dataset.csv')
# data = pd.read_csv('training_dataset/raw_scores_pvallog.csv')
# data = pd.read_csv('training_dataset/ionocyte_raw_dataset.csv')
data = pd.read_csv('training_dataset/ionocyte_scores_pvallog.csv')

In [62]:
data['disease_ontology_label'] = (data['disease_ontology_label'] == 'COVID-19').astype(int)
X = data.drop(['NAME', 'disease_ontology_label'], axis=1)
y = data['disease_ontology_label']

In [63]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [64]:
# Feature selection using SelectKBest with f_classif
selector = SelectKBest(score_func=f_classif, k=40)  # Select top 100 features
X_train = selector.fit_transform(X_train, y_train)
X_test = selector.transform(X_test)

In [65]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [66]:
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

In [67]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')

print("--------------------------------------------------")
print("Support Vector Machine Results")
print("--------------------------------------------------")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

print(classification_report(y_test, y_pred))

--------------------------------------------------
Support Vector Machine Results
--------------------------------------------------
Accuracy: 0.7521367521367521
Precision: 0.733434650455927
Recall: 0.7438095238095238
              precision    recall  f1-score   support

           0       0.64      0.71      0.67        42
           1       0.83      0.77      0.80        75

    accuracy                           0.75       117
   macro avg       0.73      0.74      0.74       117
weighted avg       0.76      0.75      0.75       117

