In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.datasets import make_blobs
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


column_names = ['Time', 
                'Ankle_h_fwd_acc', 'Ankle_v_acc', 'Ankle_h_lat_acc',
                'thigh_h_fwd_acc', 'thigh_v_acc', 'thigh_h_lat_acc',
                'trunk_h_fwd_acc', 'trunk_v_acc', 'trunk_h_lat_acc',
                'annotation']

url = '/kaggle/input/freezing-of-gait/FOG/freezing-of-gait-exploration-main/freezing-of-gait-exploration-main/dataset_fog_release/dataset'
filenames = ['/S01R01.txt', '/S01R02.txt', '/S02R01.txt', '/S02R02.txt', '/S03R01.txt', '/S03R02.txt', '/S03R03.txt',
             '/S04R01.txt', '/S05R01.txt', '/S05R02.txt', '/S06R01.txt', '/S06R02.txt', '/S07R01.txt', '/S07R02.txt',
             '/S08R01.txt', '/S09R01.txt', '/S10R01.txt']

# Split file names into two parts
files_part1 = filenames[:14]
files_part2 = filenames[14:17]


In [2]:
from sklearn.model_selection import cross_val_score, StratifiedKFold

rows = []
for i in files_part1: 
    with open(url+i, 'r') as file:
        text_dataset = file.read()
    rows = rows + text_dataset.strip().split('\n')
df = pd.DataFrame([row.split() for row in rows])
df.columns = column_names
df.columns = column_names

In [3]:
X = df.drop("annotation", axis = 1)                  # Apply drop() function
y = df.drop(column_names[:-1], axis=1)
y = np.array(y).ravel()

In [4]:
from sklearn.model_selection import cross_val_score, StratifiedKFold

# Assuming you have the necessary imports and 'files_part2' defined

rows_part1 = []
for i in files_part1: 
    with open(url+i, 'r') as file:
        text_dataset = file.read()
    rows_part1 = rows_part1 + text_dataset.strip().split('\n')
df_part1 = pd.DataFrame([row.split() for row in rows_part1])
df_part1.columns = column_names

rows_part2 = []
for i in files_part2: 
    with open(url+i, 'r') as file:
        text_dataset = file.read()
    rows_part2 = rows_part2 + text_dataset.strip().split('\n')
df_part2 = pd.DataFrame([row.split() for row in rows_part2])
df_part2.columns = column_names

X_train = df_part1.drop("annotation", axis=1)
y_train = df_part1["annotation"]

X_val = df_part2.drop("annotation", axis=1)
y_val = df_part2["annotation"]

# Create the classifier object
knn = KNeighborsClassifier(n_neighbors=7)

# Create the StratifiedKFold object
skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

# Perform 5-fold cross-validation on the training set
scores = cross_val_score(knn, X_train, y_train, cv=skf)

# Print the scores for each fold
for fold, score in enumerate(scores, start=1):
    print(f"Fold {fold}: {score}")

# Calculate the mean score across all folds
mean_score = scores.mean()
print("Mean Score:", mean_score)



Fold 1: 0.9724161801530707
Fold 2: 0.9723314155338229
Fold 3: 0.9724974129131834
Fold 4: 0.9726704740108145
Fold 5: 0.9727339511754068
Mean Score: 0.9725298867572597


In [5]:
# Train on the entire training set and validate on the separate validation set
knn.fit(X_train, y_train)
y_pred = knn.predict(X_val)

# Calculate the validation accuracy
val_accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", val_accuracy)
# Calculate the validation precision
precision = precision_score(y_val, y_pred, average='weighted')

# Calculate the validation F1 score
f1 = f1_score(y_val, y_pred, average='weighted')

# Print the validation precision and F1 score
print('Validation Precision:', precision)
print('Validation F1 Score:', f1)

Validation Accuracy: 0.6713500317600651
Validation Precision: 0.6771863315360674
Validation F1 Score: 0.6679019569291367


In [6]:
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Assuming you have the necessary imports and 'files_part2' defined

rows_part1 = []
for i in filenames: 
    with open(url+i, 'r') as file:
        text_dataset = file.read()
    rows_part1 = rows_part1 + text_dataset.strip().split('\n')
df_part1 = pd.DataFrame([row.split() for row in rows_part1])
df_part1.columns = column_names

X_train = df_part1.drop("annotation", axis=1)
y_train = df_part1["annotation"]


# Create the classifier object
knn = KNeighborsClassifier(n_neighbors=7)

# Create the StratifiedKFold object
skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

# Perform 5-fold cross-validation on the training set
scores = cross_val_score(knn, X_train, y_train, cv=skf)

metrics_dict = {'Accuracy': [], 'Precision': [], 'F1 Score': []}

# Perform cross-validation and collect the metrics for each fold
for fold, (train_index, test_index) in enumerate(skf.split(X_train, y_train), start=1):
    X_train_fold, X_test_fold = X_train.iloc[train_index], X_train.iloc[test_index]
    y_train_fold, y_test_fold = y_train.iloc[train_index], y_train.iloc[test_index]
    
    # Fit the classifier to the training fold
    knn.fit(X_train_fold, y_train_fold)
    
    # Predict on the test fold
    y_pred_fold = knn.predict(X_test_fold)
    
    # Calculate and store the metrics for the fold
    accuracy = accuracy_score(y_test_fold, y_pred_fold)
    precision = precision_score(y_test_fold, y_pred_fold, average='weighted')
    f1 = f1_score(y_test_fold, y_pred_fold, average='weighted')
    
    metrics_dict['Accuracy'].append(accuracy)
    metrics_dict['Precision'].append(precision)
    metrics_dict['F1 Score'].append(f1)
    
    # Print the metrics for the fold
    print(f"Fold {fold}:")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"F1 Score: {f1}")
    print()
    
# Calculate the mean scores across all folds
mean_accuracy = sum(metrics_dict['Accuracy']) / len(metrics_dict['Accuracy'])
mean_precision = sum(metrics_dict['Precision']) / len(metrics_dict['Precision'])
mean_f1 = sum(metrics_dict['F1 Score']) / len(metrics_dict['F1 Score'])

# Print the mean scores
print("Mean Accuracy:", mean_accuracy)
print("Mean Precision:", mean_precision)
print("Mean F1 Score:", mean_f1)


Fold 1:
Accuracy: 0.9733039955367617
Precision: 0.9728507802978912
F1 Score: 0.9729589004827524

Fold 2:
Accuracy: 0.9731892861425838
Precision: 0.9727145125705786
F1 Score: 0.9728204129427671

Fold 3:
Accuracy: 0.9729441546286665
Precision: 0.9724576730588136
F1 Score: 0.9725627449008549

Fold 4:
Accuracy: 0.9723419287392101
Precision: 0.9718536527825495
F1 Score: 0.9719731942847996

Fold 5:
Accuracy: 0.9727069141267595
Precision: 0.9722411225020472
F1 Score: 0.9723561979076295

Mean Accuracy: 0.9728972558347962
Mean Precision: 0.972423548242376
Mean F1 Score: 0.9725342901037607


In [7]:
# Read data from files in files_part2
rows_part2 = []
for i in files_part2: 
    with open(url + i, 'r') as file:
        text_dataset = file.read()
    rows_part2 = rows_part2 + text_dataset.strip().split('\n')
df_part2 = pd.DataFrame([row.split() for row in rows_part2])
df_part2.columns = column_names

# Preprocess the data in df_part2 (handle missing values, convert columns to numeric types, etc.)

# Split DataFrame into features (X_test) and labels (y_test)
X_test = df_part2.drop("annotation", axis=1)
y_test = df_part2["annotation"]

# Use the trained KNN model to predict on X_test
y_pred_test = knn.predict(X_test)

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Calculate the test accuracy
test_accuracy = accuracy_score(y_test, y_pred_test)
print("Test Accuracy:", test_accuracy)

# Calculate the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred_test)

# Calculate the true positive, true negative, false positive, and false negative values from the confusion matrix
TP = conf_matrix[1, 1]
TN = conf_matrix[0, 0]
FP = conf_matrix[0, 1]
FN = conf_matrix[1, 0]

# Calculate Sensitivity (True Positive Rate) and Specificity (True Negative Rate)
sensitivity = TP / (TP + FN)
specificity = TN / (TN + FP)

# Calculate Precision (Positive Predictive Value) and Negative Predictive Value
precision = precision_score(y_test, y_pred_test, average='weighted')
negative_predictive_value = TN / (TN + FN)

# Calculate Fall out (False Positive Rate) and False Negative Rate
false_positive_rate = FP / (FP + TN)
false_negative_rate = FN / (TP + FN)

# Calculate F1 Score
f1 = f1_score(y_test, y_pred_test, average='weighted')

# Print the metrics
print("Sensitivity (True Positive Rate):", sensitivity)
print("Specificity (True Negative Rate):", specificity)
print("Precision (Positive Predictive Value):", precision)
print("Negative Predictive Value:", negative_predictive_value)
print("Fall out (False Positive Rate):", false_positive_rate)
print("False Negative Rate:", false_negative_rate)
print("F1 Score:", f1)

Test Accuracy: 0.9886918238242305
Sensitivity (True Positive Rate): 0.9950493048077241
Specificity (True Negative Rate): 0.9956429426740055
Precision (Positive Predictive Value): 0.9886242003861324
Negative Predictive Value: 0.9932392754430609
Fall out (False Positive Rate): 0.004357057325994425
False Negative Rate: 0.004950695192275887
F1 Score: 0.9886395191800952
