In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Paths for training and testing data
TRAIN_FOLDER = "D:/Amrita/Sem-4/Machine Learning Lab/End Sem Project/Excel/4. Feature Extraction/X_train_features_extraction.csv"
TEST_FOLDER = "D:/Amrita/Sem-4/Machine Learning Lab/End Sem Project/Excel/4. Feature Extraction/X_test_features_extraction.csv"

# Load data from a folder
def load_data_from_folder(folder_path):
    data = []
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        if file_name.endswith(".csv"):
            df = pd.read_csv(file_path)
            data.append(df)
    return pd.concat(data, axis=0, ignore_index=True)

# Load training and testing data
train_data = load_data_from_folder(TRAIN_FOLDER)
test_data = load_data_from_folder(TEST_FOLDER)

# Separate features and labels
X_train = train_data.iloc[:, :-1]  # All columns except the last as features
y_train = train_data.iloc[:, -1]   # The last column as labels
X_test = test_data.iloc[:, :-1]
y_test = test_data.iloc[:, -1]

# Encode labels
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Decision Tree Classifier
def decision_tree_classifier(X_train, y_train, X_test, y_test):
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print("\nDecision Tree Classifier")
    print(classification_report(y_test, y_pred))
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
    # Predict on new data
    new_data = np.array([X_test[0]])
    prediction = clf.predict(new_data)
    print(f"Prediction for new sample: {prediction[0]} (0=Awake, 1=Sleep)")

# Random Forest Classifier
def random_forest_classifier(X_train, y_train, X_test, y_test):
    clf = RandomForestClassifier()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print("\nRandom Forest Classifier")
    print(classification_report(y_test, y_pred))
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
    # Predict on new data
    new_data = np.array([X_test[0]])
    prediction = clf.predict(new_data)
    print(f"Prediction for new sample: {prediction[0]} (0=Awake, 1=Sleep)")

# SVM Classifier
def svm_classifier(X_train, y_train, X_test, y_test):
    clf = SVC(probability=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print("\nSVM Classifier")
    print(classification_report(y_test, y_pred))
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
    # Predict on new data
    new_data = np.array([X_test[0]])
    prediction = clf.predict(new_data)
    print(f"Prediction for new sample: {prediction[0]} (0=Awake, 1=Sleep)")

# k-NN Classifier
def knn_classifier(X_train, y_train, X_test, y_test):
    clf = KNeighborsClassifier()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print("\nK-NN Classifier")
    print(classification_report(y_test, y_pred))
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
    # Predict on new data
    new_data = np.array([X_test[0]])
    prediction = clf.predict(new_data)
    print(f"Prediction for new sample: {prediction[0]} (0=Awake, 1=Sleep)")



# Run all classifiers
decision_tree_classifier(X_train, y_train, X_test, y_test)
random_forest_classifier(X_train, y_train, X_test, y_test)
svm_classifier(X_train, y_train, X_test, y_test)
knn_classifier(X_train, y_train, X_test, y_test)