In [None]:
import pandas as pd
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [None]:
#mount drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive




# **Creating CSV**


In [None]:
csv_file = '/content/drive/MyDrive/FYPRepository/interactional_features.csv'
path_to_features = '/content/drive/MyDrive/FYPRepository/interactional_features.csv'
path_to_metadata = '/content/drive/MyDrive/FYPRepository/normalized.csv'
path_to_save_merged_file = '/content/drive/My Drive/FYPRepository/interactional_train.csv'
df = pd.read_csv(csv_file)
df['Audio_File'] = df['Audio_File'].astype(str) + '.wav'
df.to_csv(csv_file, index=False)

metadata_df = pd.read_csv(path_to_metadata)
features_df = pd.read_csv(path_to_features)

# Check if the column exists and then rename it
if 'Audio File' in metadata_df.columns:
    metadata_df.rename(columns={'Audio File': 'Audio_File'}, inplace=True)
else:
    print("Column 'Audio File' not found in metadata_df")

if 'Audio_File' not in features_df.columns:
    print("Column 'Audio_File' not found in features_df")


merged_df = pd.merge(metadata_df, features_df, on='Audio_File')
merged_df.to_csv(path_to_save_merged_file, index=False)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **SVM Using 10 Interactional Features**

In [None]:
# Load the CSV file
df = pd.read_csv('/content/drive/My Drive/FYPRepository/InteractionalFeatures/interactional_train.csv')

# Drop 'diagnosis_HC' and set 'diagnosis_AD' as the target variable
df.drop(['diagnosis_HC'], axis=1, inplace=True)
X = df.drop(['Audio_File', 'diagnosis_AD'], axis=1)  # Features
y = df['diagnosis_AD']  # Target

# Splitting the data into training and test sets (70-30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SVM parameters
C_values = [0.1, 1, 10, 100, 1000]
gamma_values = [1, 0.1, 0.01, 0.001, 0.0001]
kernels = ['rbf', 'poly']
# Store results
results = []
# Train and evaluate models
for kernel in kernels:
    for C in C_values:
        for gamma in gamma_values:
            model = svm.SVC(kernel=kernel, C=C, gamma=gamma)
            model.fit(X_train_scaled, y_train)

            # Predictions and evaluation
            y_pred = model.predict(X_test_scaled)
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred)
            recall = recall_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            auc = roc_auc_score(y_test, y_pred)

            results.append({
                'Kernel': kernel,
                'C': C,
                'Gamma': gamma,
                'Accuracy': accuracy,
                'Precision': precision,
                'Recall': recall,
                'F1 Score': f1,
                'AUC': auc
            })

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Display the results table
print(results_df)



   Kernel       C   Gamma  Accuracy  Precision  Recall  F1 Score       AUC
0     rbf     0.1  1.0000  0.484848   0.484848  1.0000  0.653061  0.500000
1     rbf     0.1  0.1000  0.484848   0.484848  1.0000  0.653061  0.500000
2     rbf     0.1  0.0100  0.484848   0.484848  1.0000  0.653061  0.500000
3     rbf     0.1  0.0010  0.484848   0.484848  1.0000  0.653061  0.500000
4     rbf     0.1  0.0001  0.484848   0.484848  1.0000  0.653061  0.500000
5     rbf     1.0  1.0000  0.393939   0.388889  0.4375  0.411765  0.395221
6     rbf     1.0  0.1000  0.606061   0.565217  0.8125  0.666667  0.612132
7     rbf     1.0  0.0100  0.515152   0.500000  0.9375  0.652174  0.527574
8     rbf     1.0  0.0010  0.484848   0.484848  1.0000  0.653061  0.500000
9     rbf     1.0  0.0001  0.484848   0.484848  1.0000  0.653061  0.500000
10    rbf    10.0  1.0000  0.454545   0.450000  0.5625  0.500000  0.457721
11    rbf    10.0  0.1000  0.484848   0.476190  0.6250  0.540541  0.488971
12    rbf    10.0  0.0100

# **SVM using All Interactional Features**


In [None]:
df = pd.read_csv('/content/drive/My Drive/FYPRepository/InteractionalFeatures/all_interactional_train.csv')

# Drop 'diagnosis_HC' and set 'diagnosis_AD' as the target variable
df.drop(['diagnosis_HC'], axis=1, inplace=True)
X = df.drop(['Audio_File', 'diagnosis_AD'], axis=1)  # Features
y = df['diagnosis_AD']  # Target

# Splitting the data into training and test sets (70-30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SVM parameters
C_values = [0.1, 1, 10, 100, 1000]
gamma_values = [1, 0.1, 0.01, 0.001, 0.0001]
kernels = ['rbf', 'poly']

# Store results
results = []

# Train and evaluate models
for kernel in kernels:
    for C in C_values:
        for gamma in gamma_values:
            model = svm.SVC(kernel=kernel, C=C, gamma=gamma)
            model.fit(X_train_scaled, y_train)

            # Predictions and evaluation
            y_pred = model.predict(X_test_scaled)
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred)
            recall = recall_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            auc = roc_auc_score(y_test, y_pred)

            # Store results
            results.append({
                'kernel': kernel,
                'C': C,
                'gamma': gamma,
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1': f1,
                'auc': auc
            })

# Convert results to a DataFrame for visualization
results_df = pd.DataFrame(results)
print(results_df)


   kernel       C   gamma  accuracy  precision  recall        f1       auc
0     rbf     0.1  1.0000  0.484848   0.484848  1.0000  0.653061  0.500000
1     rbf     0.1  0.1000  0.484848   0.484848  1.0000  0.653061  0.500000
2     rbf     0.1  0.0100  0.484848   0.484848  1.0000  0.653061  0.500000
3     rbf     0.1  0.0010  0.484848   0.484848  1.0000  0.653061  0.500000
4     rbf     0.1  0.0001  0.484848   0.484848  1.0000  0.653061  0.500000
5     rbf     1.0  1.0000  0.454545   0.450000  0.5625  0.500000  0.457721
6     rbf     1.0  0.1000  0.575758   0.550000  0.6875  0.611111  0.579044
7     rbf     1.0  0.0100  0.636364   0.576923  0.9375  0.714286  0.645221
8     rbf     1.0  0.0010  0.484848   0.484848  1.0000  0.653061  0.500000
9     rbf     1.0  0.0001  0.484848   0.484848  1.0000  0.653061  0.500000
10    rbf    10.0  1.0000  0.484848   0.476190  0.6250  0.540541  0.488971
11    rbf    10.0  0.1000  0.545455   0.526316  0.6250  0.571429  0.547794
12    rbf    10.0  0.0100

# **Fusion**

In [None]:
path_to_features = '/content/drive/MyDrive/FYPRepository/InteractionalFeatures/all_interactional_features.csv'
path_to_metadata = '/content/drive/MyDrive/dataset/egemaps_normalized_data.csv'
path_to_save_merged_file = '/content/drive/My Drive/FYPRepository/InteractionalFeatures/fusion_train.csv'


metadata_df = pd.read_csv(path_to_metadata)
features_df = pd.read_csv(path_to_features)

# Check if the column exists and then rename it
if 'Audio File' in metadata_df.columns:
    metadata_df.rename(columns={'Audio File': 'Audio_File'}, inplace=True)
else:
    print("Column 'Audio File' not found in metadata_df")

if 'Audio_File' not in features_df.columns:
    print("Column 'Audio_File' not found in features_df")


merged_df = pd.merge(metadata_df, features_df, on='Audio_File')
merged_df.to_csv(path_to_save_merged_file, index=False)


In [None]:
df = pd.read_csv('/content/drive/My Drive/FYPRepository/InteractionalFeatures/fusion_train.csv')
df.dropna()
# Drop 'diagnosis_HC' and set 'diagnosis_AD' as the target variable
df.drop(['diagnosis_HC'], axis=1, inplace=True)
X = df.drop(['Audio_File', 'diagnosis_AD'], axis=1)  # Features
y = df['diagnosis_AD']  # Target

# Splitting the data into training and test sets (70-30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SVM parameters
C_values = [0.1, 1, 10, 100, 1000]
gamma_values = [1, 0.1, 0.01, 0.001, 0.0001]
kernels = ['rbf', 'poly']

# Store results
results = []

# Train and evaluate models
for kernel in kernels:
    for C in C_values:
        for gamma in gamma_values:
            model = svm.SVC(kernel=kernel, C=C, gamma=gamma)
            model.fit(X_train_scaled, y_train)

            # Predictions and evaluation
            y_pred = model.predict(X_test_scaled)
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred)
            recall = recall_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            auc = roc_auc_score(y_test, y_pred)

            # Store results
            results.append({
                'kernel': kernel,
                'C': C,
                'gamma': gamma,
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1': f1,
                'auc': auc
            })

# Convert results to a DataFrame for visualization
results_df = pd.DataFrame(results)
print(results_df)


   kernel       C   gamma  accuracy  precision  recall        f1       auc
0     rbf     0.1  1.0000  0.484848   0.484848  1.0000  0.653061  0.500000
1     rbf     0.1  0.1000  0.484848   0.484848  1.0000  0.653061  0.500000
2     rbf     0.1  0.0100  0.484848   0.484848  1.0000  0.653061  0.500000
3     rbf     0.1  0.0010  0.484848   0.484848  1.0000  0.653061  0.500000
4     rbf     0.1  0.0001  0.484848   0.484848  1.0000  0.653061  0.500000
5     rbf     1.0  1.0000  0.484848   0.484848  1.0000  0.653061  0.500000
6     rbf     1.0  0.1000  0.515152   0.500000  0.8750  0.636364  0.525735
7     rbf     1.0  0.0100  0.757576   0.722222  0.8125  0.764706  0.759191
8     rbf     1.0  0.0010  0.757576   0.700000  0.8750  0.777778  0.761029
9     rbf     1.0  0.0001  0.484848   0.484848  1.0000  0.653061  0.500000
10    rbf    10.0  1.0000  0.484848   0.484848  1.0000  0.653061  0.500000
11    rbf    10.0  0.1000  0.484848   0.480000  0.7500  0.585366  0.492647
12    rbf    10.0  0.0100

# **Fusion(LR, SVM, MLP and Random Forest)**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Load your data
df = pd.read_csv('/content/drive/My Drive/FYPRepository/InteractionalFeatures/fusion_train.csv')
df.dropna(inplace=True)
df.drop(['diagnosis_HC'], axis=1, inplace=True)
X = df.drop(['Audio_File', 'diagnosis_AD'], axis=1)  # Features
y = df['diagnosis_AD']  # Target

# Splitting the data into training and test sets (70-30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

C_values = [0.1, 1, 10, 100, 1000]
gamma_values = [1, 0.1, 0.01, 0.001, 0.0001]
kernels = ['rbf', 'poly']
LR_C_values = [0.1, 10, 100]
n_estimators = 60
max_depth = 5
hidden_layer_sizes = [(2, 3), (3, 4)]
learning_rate_init = 0.01

# Store results
results_svm = []
results_lr = []
results_rf = []
results_mlp = []

# SVM training and evaluation
for kernel in kernels:
    for C in C_values:
        for gamma in gamma_values:
            svm_model = SVC(kernel=kernel, C=C, gamma=gamma)
            svm_model.fit(X_train_scaled, y_train)
            y_pred = svm_model.predict(X_test_scaled)
            results_svm.append({
                'kernel': kernel,
                'C': C,
                'gamma': gamma,
                'accuracy': accuracy_score(y_test, y_pred),
                'precision': precision_score(y_test, y_pred),
                'recall': recall_score(y_test, y_pred),
                'f1': f1_score(y_test, y_pred),
                'auc': roc_auc_score(y_test, y_pred)
            })

# LR training and evaluation
for C in LR_C_values:
    lr_model = LogisticRegression(C=C)
    lr_model.fit(X_train_scaled, y_train)
    y_pred = lr_model.predict(X_test_scaled)
    results_lr.append({
        'C': C,
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred),
        'recall': recall_score(y_test, y_pred),
        'f1': f1_score(y_test, y_pred),
        'auc': roc_auc_score(y_test, y_pred)
    })

# RF training and evaluation
rf_model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
rf_model.fit(X_train_scaled, y_train)
y_pred = rf_model.predict(X_test_scaled)
results_rf.append({
    'n_estimators': n_estimators,
    'max_depth': max_depth,
    'accuracy': accuracy_score(y_test, y_pred),
    'precision': precision_score(y_test, y_pred),
    'recall': recall_score(y_test, y_pred),
    'f1': f1_score(y_test, y_pred),
    'auc': roc_auc_score(y_test, y_pred)
})

for hidden_layers in hidden_layer_sizes:
    mlp_model = MLPClassifier(hidden_layer_sizes=hidden_layers, activation='relu', learning_rate_init=learning_rate_init)
    mlp_model.fit(X_train_scaled, y_train)
    y_pred = mlp_model.predict(X_test_scaled)
    results_mlp.append({
        'hidden_layers': hidden_layers,
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred),
        'recall': recall_score(y_test, y_pred),
        'f1': f1_score(y_test, y_pred),
        'auc': roc_auc_score(y_test, y_pred)
    })

# Convert results to DataFrames
results_df_svm = pd.DataFrame(results_svm)
results_df_lr = pd.DataFrame(results_lr)
results_df_rf = pd.DataFrame(results_rf)
results_df_mlp = pd.DataFrame(results_mlp)

# Function to highlight the max value in each column
def highlight_max(s):
    is_max = s == s.max()
    return ['background-color: yellow' if v else '' for v in is_max]

# Display results with highlighted max accuracy
print("SVM Results:")
display(results_df_svm.style.apply(highlight_max, subset=['accuracy']))
print("\nLogistic Regression Results:")
display(results_df_lr.style.apply(highlight_max, subset=['accuracy']))
print("\nRandom Forest Results:")
display(results_df_rf.style.apply(highlight_max, subset=['accuracy']))
print("\nMLP Results:")
display(results_df_mlp.style.apply(highlight_max, subset=['accuracy']))


SVM Results:




Unnamed: 0,kernel,C,gamma,accuracy,precision,recall,f1,auc
0,rbf,0.1,1.0,0.484848,0.484848,1.0,0.653061,0.5
1,rbf,0.1,0.1,0.484848,0.484848,1.0,0.653061,0.5
2,rbf,0.1,0.01,0.484848,0.484848,1.0,0.653061,0.5
3,rbf,0.1,0.001,0.484848,0.484848,1.0,0.653061,0.5
4,rbf,0.1,0.0001,0.484848,0.484848,1.0,0.653061,0.5
5,rbf,1.0,1.0,0.484848,0.484848,1.0,0.653061,0.5
6,rbf,1.0,0.1,0.515152,0.5,0.875,0.636364,0.525735
7,rbf,1.0,0.01,0.757576,0.722222,0.8125,0.764706,0.759191
8,rbf,1.0,0.001,0.757576,0.7,0.875,0.777778,0.761029
9,rbf,1.0,0.0001,0.484848,0.484848,1.0,0.653061,0.5



Logistic Regression Results:


Unnamed: 0,C,accuracy,precision,recall,f1,auc
0,0.1,0.69697,0.65,0.8125,0.722222,0.700368
1,10.0,0.666667,0.631579,0.75,0.685714,0.669118
2,100.0,0.666667,0.631579,0.75,0.685714,0.669118



Random Forest Results:


Unnamed: 0,n_estimators,max_depth,accuracy,precision,recall,f1,auc
0,60,5,0.787879,0.8,0.75,0.774194,0.786765



MLP Results:


Unnamed: 0,hidden_layers,accuracy,precision,recall,f1,auc
0,"(2, 3)",0.666667,0.619048,0.8125,0.702703,0.670956
1,"(3, 4)",0.69697,0.714286,0.625,0.666667,0.694853


# **Interactional Only (SVM, LR, MLP, Random Forest)**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Load your data
df = pd.read_csv('/content/drive/My Drive/FYPRepository/InteractionalFeatures/all_interactional_train.csv')
df.dropna(inplace=True)
df.drop(['diagnosis_HC'], axis=1, inplace=True)
X = df.drop(['Audio_File', 'diagnosis_AD'], axis=1)  # Features
y = df['diagnosis_AD']  # Target

# Splitting the data into training and test sets (70-30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

C_values = [0.1, 1, 10, 100, 1000]
gamma_values = [1, 0.1, 0.01, 0.001, 0.0001]
kernels = ['rbf', 'poly']
LR_C_values = [0.1, 10, 100]
n_estimators = 60
max_depth = 5
hidden_layer_sizes = [(2, 3), (3, 4)]
learning_rate_init = 0.01

# Store results
results_svm = []
results_lr = []
results_rf = []
results_mlp = []

# SVM training and evaluation
for kernel in kernels:
    for C in C_values:
        for gamma in gamma_values:
            svm_model = SVC(kernel=kernel, C=C, gamma=gamma)
            svm_model.fit(X_train_scaled, y_train)
            y_pred = svm_model.predict(X_test_scaled)
            results_svm.append({
                'kernel': kernel,
                'C': C,
                'gamma': gamma,
                'accuracy': accuracy_score(y_test, y_pred),
                'precision': precision_score(y_test, y_pred),
                'recall': recall_score(y_test, y_pred),
                'f1': f1_score(y_test, y_pred),
                'auc': roc_auc_score(y_test, y_pred)
            })

# LR training and evaluation
for C in LR_C_values:
    lr_model = LogisticRegression(C=C)
    lr_model.fit(X_train_scaled, y_train)
    y_pred = lr_model.predict(X_test_scaled)
    results_lr.append({
        'C': C,
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred),
        'recall': recall_score(y_test, y_pred),
        'f1': f1_score(y_test, y_pred),
        'auc': roc_auc_score(y_test, y_pred)
    })

# RF training and evaluation
rf_model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
rf_model.fit(X_train_scaled, y_train)
y_pred = rf_model.predict(X_test_scaled)
results_rf.append({
    'n_estimators': n_estimators,
    'max_depth': max_depth,
    'accuracy': accuracy_score(y_test, y_pred),
    'precision': precision_score(y_test, y_pred),
    'recall': recall_score(y_test, y_pred),
    'f1': f1_score(y_test, y_pred),
    'auc': roc_auc_score(y_test, y_pred)
})

for hidden_layers in hidden_layer_sizes:
    mlp_model = MLPClassifier(hidden_layer_sizes=hidden_layers, activation='relu', learning_rate_init=learning_rate_init)
    mlp_model.fit(X_train_scaled, y_train)
    y_pred = mlp_model.predict(X_test_scaled)
    results_mlp.append({
        'hidden_layers': hidden_layers,
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred),
        'recall': recall_score(y_test, y_pred),
        'f1': f1_score(y_test, y_pred),
        'auc': roc_auc_score(y_test, y_pred)
    })

# Convert results to DataFrames
results_df_svm = pd.DataFrame(results_svm)
results_df_lr = pd.DataFrame(results_lr)
results_df_rf = pd.DataFrame(results_rf)
results_df_mlp = pd.DataFrame(results_mlp)

# Function to highlight the max value in each column
def highlight_max(s):
    is_max = s == s.max()
    return ['background-color: yellow' if v else '' for v in is_max]

# Display results with highlighted max accuracy
print("SVM Results:")
display(results_df_svm.style.apply(highlight_max, subset=['accuracy']))
print("\nLogistic Regression Results:")
display(results_df_lr.style.apply(highlight_max, subset=['accuracy']))
print("\nRandom Forest Results:")
display(results_df_rf.style.apply(highlight_max, subset=['accuracy']))
print("\nMLP Results:")
display(results_df_mlp.style.apply(highlight_max, subset=['accuracy']))


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


SVM Results:




Unnamed: 0,kernel,C,gamma,accuracy,precision,recall,f1,auc
0,rbf,0.1,1.0,0.484848,0.484848,1.0,0.653061,0.5
1,rbf,0.1,0.1,0.484848,0.484848,1.0,0.653061,0.5
2,rbf,0.1,0.01,0.484848,0.484848,1.0,0.653061,0.5
3,rbf,0.1,0.001,0.484848,0.484848,1.0,0.653061,0.5
4,rbf,0.1,0.0001,0.484848,0.484848,1.0,0.653061,0.5
5,rbf,1.0,1.0,0.454545,0.45,0.5625,0.5,0.457721
6,rbf,1.0,0.1,0.575758,0.55,0.6875,0.611111,0.579044
7,rbf,1.0,0.01,0.636364,0.576923,0.9375,0.714286,0.645221
8,rbf,1.0,0.001,0.484848,0.484848,1.0,0.653061,0.5
9,rbf,1.0,0.0001,0.484848,0.484848,1.0,0.653061,0.5



Logistic Regression Results:


Unnamed: 0,C,accuracy,precision,recall,f1,auc
0,0.1,0.666667,0.619048,0.8125,0.702703,0.670956
1,10.0,0.636364,0.6,0.75,0.666667,0.639706
2,100.0,0.575758,0.545455,0.75,0.631579,0.580882



Random Forest Results:


Unnamed: 0,n_estimators,max_depth,accuracy,precision,recall,f1,auc
0,60,5,0.515152,0.5,0.5625,0.529412,0.516544



MLP Results:


Unnamed: 0,hidden_layers,accuracy,precision,recall,f1,auc
0,"(2, 3)",0.454545,0.4375,0.4375,0.4375,0.454044
1,"(3, 4)",0.606061,0.571429,0.75,0.648649,0.610294


# **eGeMAPS Features**

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Load your data
df = pd.read_csv('/content/drive/MyDrive/FYPRepository/eGeMAPS Features/egemaps_normalized_data.csv')
df.dropna(inplace=True)
df.drop(['diagnosis_HC'], axis=1, inplace=True)
X = df.drop(['Audio File', 'diagnosis_AD'], axis=1)  # Features
y = df['diagnosis_AD']  # Target

# Splitting the data into training and test sets (70-30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

C_values = [0.1, 1, 10, 100, 1000]
gamma_values = [1, 0.1, 0.01, 0.001, 0.0001]
kernels = ['rbf', 'poly']
LR_C_values = [0.1, 10, 100]
n_estimators = 60
max_depth = 5
hidden_layer_sizes = [(2, 3), (3, 4)]
learning_rate_init = 0.01

# Store results
results_svm = []
results_lr = []
results_rf = []
results_mlp = []

# SVM training and evaluation
for kernel in kernels:
    for C in C_values:
        for gamma in gamma_values:
            svm_model = SVC(kernel=kernel, C=C, gamma=gamma)
            svm_model.fit(X_train_scaled, y_train)
            y_pred = svm_model.predict(X_test_scaled)
            results_svm.append({
                'kernel': kernel,
                'C': C,
                'gamma': gamma,
                'accuracy': accuracy_score(y_test, y_pred),
                'precision': precision_score(y_test, y_pred),
                'recall': recall_score(y_test, y_pred),
                'f1': f1_score(y_test, y_pred),
                'auc': roc_auc_score(y_test, y_pred)
            })

# LR training and evaluation
for C in LR_C_values:
    lr_model = LogisticRegression(C=C)
    lr_model.fit(X_train_scaled, y_train)
    y_pred = lr_model.predict(X_test_scaled)
    results_lr.append({
        'C': C,
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred),
        'recall': recall_score(y_test, y_pred),
        'f1': f1_score(y_test, y_pred),
        'auc': roc_auc_score(y_test, y_pred)
    })

# RF training and evaluation
rf_model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
rf_model.fit(X_train_scaled, y_train)
y_pred = rf_model.predict(X_test_scaled)
results_rf.append({
    'n_estimators': n_estimators,
    'max_depth': max_depth,
    'accuracy': accuracy_score(y_test, y_pred),
    'precision': precision_score(y_test, y_pred),
    'recall': recall_score(y_test, y_pred),
    'f1': f1_score(y_test, y_pred),
    'auc': roc_auc_score(y_test, y_pred)
})

for hidden_layers in hidden_layer_sizes:
    mlp_model = MLPClassifier(hidden_layer_sizes=hidden_layers, activation='relu', learning_rate_init=learning_rate_init)
    mlp_model.fit(X_train_scaled, y_train)
    y_pred = mlp_model.predict(X_test_scaled)
    results_mlp.append({
        'hidden_layers': hidden_layers,
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred),
        'recall': recall_score(y_test, y_pred),
        'f1': f1_score(y_test, y_pred),
        'auc': roc_auc_score(y_test, y_pred)
    })

# Convert results to DataFrames
results_df_svm = pd.DataFrame(results_svm)
results_df_lr = pd.DataFrame(results_lr)
results_df_rf = pd.DataFrame(results_rf)
results_df_mlp = pd.DataFrame(results_mlp)

# Function to highlight the max value in each column
def highlight_max(s):
    is_max = s == s.max()
    return ['background-color: yellow' if v else '' for v in is_max]

# Display results with highlighted max accuracy
print("SVM Results:")
display(results_df_svm.style.apply(highlight_max, subset=['accuracy']))
print("\nLogistic Regression Results:")
display(results_df_lr.style.apply(highlight_max, subset=['accuracy']))
print("\nRandom Forest Results:")
display(results_df_rf.style.apply(highlight_max, subset=['accuracy']))
print("\nMLP Results:")
display(results_df_mlp.style.apply(highlight_max, subset=['accuracy']))


SVM Results:




Unnamed: 0,kernel,C,gamma,accuracy,precision,recall,f1,auc
0,rbf,0.1,1.0,0.484848,0.484848,1.0,0.653061,0.5
1,rbf,0.1,0.1,0.484848,0.484848,1.0,0.653061,0.5
2,rbf,0.1,0.01,0.484848,0.484848,1.0,0.653061,0.5
3,rbf,0.1,0.001,0.484848,0.484848,1.0,0.653061,0.5
4,rbf,0.1,0.0001,0.484848,0.484848,1.0,0.653061,0.5
5,rbf,1.0,1.0,0.484848,0.484848,1.0,0.653061,0.5
6,rbf,1.0,0.1,0.484848,0.481481,0.8125,0.604651,0.494485
7,rbf,1.0,0.01,0.69697,0.6875,0.6875,0.6875,0.696691
8,rbf,1.0,0.001,0.69697,0.636364,0.875,0.736842,0.702206
9,rbf,1.0,0.0001,0.484848,0.484848,1.0,0.653061,0.5



Logistic Regression Results:


Unnamed: 0,C,accuracy,precision,recall,f1,auc
0,0.1,0.666667,0.631579,0.75,0.685714,0.669118
1,10.0,0.666667,0.647059,0.6875,0.666667,0.667279
2,100.0,0.666667,0.647059,0.6875,0.666667,0.667279



Random Forest Results:


Unnamed: 0,n_estimators,max_depth,accuracy,precision,recall,f1,auc
0,60,5,0.606061,0.6,0.5625,0.580645,0.604779



MLP Results:


Unnamed: 0,hidden_layers,accuracy,precision,recall,f1,auc
0,"(2, 3)",0.606061,0.578947,0.6875,0.628571,0.608456
1,"(3, 4)",0.636364,0.590909,0.8125,0.684211,0.641544
