In [None]:
import os
import pandas as pd
import opensmile 
from opensmile import Smile


In [None]:
root_cc = "ADReSS-IS2020/train/Full_wave_enhanced_audio/cc"
root_cd = "ADReSS-IS2020/train/Full_wave_enhanced_audio/cd"


def create_feature_dataframe(root):
    
    smile = Smile(
        feature_set=opensmile.FeatureSet.eGeMAPSv02,
        feature_level=opensmile.FeatureLevel.Functionals,
    )

    s = smile.process_folder(root, filetype='wav',include_root=False)
    
    return s

In [None]:
df_cc = create_feature_dataframe(root_cc)
df_cd = create_feature_dataframe(root_cd)

In [None]:
df_cc.head()

In [None]:
df_cd.head()

In [None]:
# Print the shape of the df_cd dataframe
print("Shape of df_cd:", df_cd.shape)

# Print the shape of the df_cc dataframe
print("Shape of df_cc:", df_cc.shape)


In [None]:
# Add a target label to df_cd and convert to float32
df_cd['target'] = 1.0
df_cd['target'] = df_cd['target'].astype('float32')

# Add a target label to df_cc and convert to float32
df_cc['target'] = 0.0
df_cc['target'] = df_cc['target'].astype('float32')

In [None]:
df_cd.head()
df_cd.dtypes

In [None]:
df_train = pd.concat([df_cd, df_cc], axis=0, ignore_index=True)
df_train.head()
df_train.shape


In [None]:
from sklearn.model_selection import train_test_split

# Assuming 'target' is your target column and the rest are features
X_train = df_train.drop('target', axis=1)
y_train = df_train['target']




In [None]:
X_train.head()

In [None]:
root_test = "ADReSS-IS2020/test/Full_wave_enhanced_audio"
df_test_without_target = create_feature_dataframe(root_test)


In [None]:

# Specify the path to your text file
file_path = 'ADReSS-IS2020/test/meta_data_test.txt'

# Read the text file into a DataFrame
df_test_metadata = pd.read_csv(file_path, delimiter=';')


In [None]:
df_test_metadata.head()

In [None]:
# Assuming you have two dataframes df1 and df2
df1_indexes = pd.DataFrame(df_test_without_target.index, columns=['df1_index'])
df2_indexes = pd.DataFrame(df_test_metadata["ID   "])

# Concatenate the index dataframes
df_indexes = pd.concat([df1_indexes, df2_indexes], axis=1)

# Print the indexes
print(df_indexes)

In [None]:
df_test_without_target.head()


In [None]:
# Reset the index and drop the old one
df_test_without_target.reset_index(drop=True, inplace=True)

In [None]:
df_test_without_target.head()

In [None]:
# Now lets create X_test, y_test 
X_test = df_test_without_target
y_test = df_test_metadata["Label "]

In [None]:
from sklearn.tree import DecisionTreeClassifier

# Create the Decision Tree model
clf = DecisionTreeClassifier()

# Train the model
clf.fit(X_train, y_train)

# Calculate and print the training accuracy
accuracy1 = clf.score(X_train, y_train)
print("Accuracy on training data is :", accuracy1*100)

# Calculate and print the testing accuracy
accuracy2 = clf.score(X_test, y_test)
print("Accuracy on testing data is :", accuracy2*100)



In [None]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'max_depth': [3, 5, 7, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 5],
    'max_features': ['auto', 'sqrt', 'log2']
}

# Create a DecisionTreeClassifier
clf = DecisionTreeClassifier()

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5)

# Fit the GridSearchCV object to the data
grid_search.fit(X_train, y_train)

# Print the best parameters
print(grid_search.best_params_)

In [None]:
# Train the model using the best parameters
best_clf = DecisionTreeClassifier(**grid_search.best_params_)
best_clf.fit(X_train, y_train)

# Calculate and print the training accuracy
train_accuracy = best_clf.score(X_train, y_train)
print(f'Training accuracy: {train_accuracy*100:.2f}%')

# Calculate and print the test accuracy
test_accuracy = best_clf.score(X_test, y_test)
print(f'Test accuracy: {test_accuracy*100:.2f}%')

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Create the Linear Discriminant Analysis model / object/ instance of a class in python.
lda = LinearDiscriminantAnalysis()

# Train the model/ fit the training data in a model
lda.fit(X_train, y_train)

# Calculate and print the training accuracy
train_accuracy = lda.score(X_train, y_train)
print(f'Training accuracy: {train_accuracy*100:.2f}%')

# Calculate and print the test accuracy
test_accuracy = lda.score(X_test, y_test)
print(f'Test accuracy: {test_accuracy*100:.2f}%')


In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Create the KNN model
knn = KNeighborsClassifier(n_neighbors=3)

# Train the model
knn.fit(X_train, y_train)

# Calculate and print the training accuracy
train_accuracy = knn.score(X_train, y_train)
print(f'Training accuracy: {train_accuracy*100:.2f}%')

# Calculate and print the test accuracy
test_accuracy = knn.score(X_test, y_test)
print(f'Test accuracy: {test_accuracy*100:.2f}%')

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

# Create a list of models
models = [
    ('Decision Tree', DecisionTreeClassifier()),
    ('Linear Discriminant Analysis', LinearDiscriminantAnalysis()),
    ('KNN', KNeighborsClassifier()),
    ('Random Forest', RandomForestClassifier())
]

# Create an empty dataframe to store the accuracy values
accuracy_df = pd.DataFrame(columns=['Model', 'Training Accuracy', 'Testing Accuracy'])

# Train and evaluate each model
for model_name, model in models:
    # Train the model
    model.fit(X_train, y_train)
    
    # Calculate the training accuracy
    train_accuracy = model.score(X_train, y_train)
    print(f'Training accuracy for {model_name}: {train_accuracy*100:.2f}%')
    
    # Calculate the testing accuracy
    test_accuracy = model.score(X_test, y_test)
    print(f'Testing accuracy for {model_name}: {test_accuracy*100:.2f}%')
    
   
