In [1]:
from data_processing import preprocess_data
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn import svm
import pandas as pd
import numpy as np
import joblib

# Set a random seed for reproducibility
RANDOM_SEED = 1000

# Load the data
data = pd.read_csv('dementia_dataset.csv')

# Preprocess the data (assuming preprocess_data is a custom function)
X = preprocess_data(data.iloc[:, 3:])
y = data.iloc[:, 2]

# Set up Stratified K-Fold cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)

best_accuracy = 0.0
best_model = None

for train_index, test_index in kf.split(X, y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Create and train the SVC classifier
    cls = svm.SVC(kernel="linear", random_state=RANDOM_SEED)
    cls.fit(X_train, y_train)

    # Make predictions
    y_pred = cls.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Fold Accuracy: {accuracy:.4f}")

    # Check if this model has the highest accuracy so far
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = cls

# Save the best model
joblib.dump(best_model, 'best_svc_model.pkl')
print(f"Best model saved with accuracy: {best_accuracy:.4f}")

# To load and use the saved model later
# best_model = joblib.load('best_svc_model.pkl')


Fold Accuracy: 0.8533
Fold Accuracy: 0.9200
Fold Accuracy: 0.9333
Fold Accuracy: 0.9324
Fold Accuracy: 0.9324
Best model saved with accuracy: 0.9333


In [2]:
import joblib

# Load the best model from the .pkl file
best_model = joblib.load('best_svc_model.pkl')

print(best_model)  

SVC(kernel='linear', random_state=1000)


In [3]:
from data_processing import preprocess_data
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import pandas as pd
import numpy as np

data = pd.read_csv('dementia_dataset.csv')
X=preprocess_data(data.iloc[:,3:])
y=data.iloc[:,2]

X_train,X_test, y_train, y_test = train_test_split(X,y,test_size=0.25)

from sklearn import svm
#create a classifier
cls = svm.SVC(kernel="linear")
cls.fit(X_train,y_train)
y_pred = cls.predict(X_test)

# Get unique classes from y for target_names, converted to string
target_names = [str(label) for label in np.unique(y)]

# Print the classification report
print(classification_report(y_test, y_pred, target_names=target_names, labels=np.unique(y)))

 

              precision    recall  f1-score   support

   Converted       0.67      0.40      0.50        10
    Demented       0.90      0.93      0.92        30
 Nondemented       0.93      0.98      0.95        54

    accuracy                           0.90        94
   macro avg       0.83      0.77      0.79        94
weighted avg       0.89      0.90      0.89        94



In [4]:
from data_processing import preprocess_data
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import pandas as pd
import numpy as np

data = pd.read_csv('dementia_dataset.csv')
X=preprocess_data(data.iloc[:,3:])
y=data.iloc[:,2]

X_train,X_test, y_train, y_test = train_test_split(X,y,test_size=0.25)

from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
#create a classifier
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

# Get unique classes from y for target_names, converted to string
target_names = [str(label) for label in np.unique(y)]

# Print the classification report
print(classification_report(y_test, y_pred, target_names=target_names, labels=np.unique(y)))


 

              precision    recall  f1-score   support

   Converted       1.00      0.38      0.55         8
    Demented       0.94      0.97      0.96        33
 Nondemented       0.93      1.00      0.96        53

    accuracy                           0.94        94
   macro avg       0.96      0.78      0.82        94
weighted avg       0.94      0.94      0.93        94



In [5]:
from data_processing import k_fold_cross_validation
k_fold_cross_validation(SVC,X,y)

AttributeError: 'DataFrame' object has no attribute '_validate_params'