# Classify Voice Clips on Features
### In this notebook, we will combine all test results and analyze the results

## Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Load the Test Results

In [None]:
test_1_feature_results = pd.read_csv("test_evaluation_1_feature_results.csv")
test_1_feature_results.shape

In [None]:
test_2_features_results = pd.read_csv("test_evaluation_2_features_results.csv")
test_2_features_results.shape

In [None]:
test_nn_1_feature_results = pd.read_csv("test_evaluation_nn_1_feature_results.csv")
test_nn_1_feature_results.shape

In [None]:
test_nn_2_features_results = pd.read_csv("test_evaluation_nn_2_features_results.csv")
test_nn_2_features_results.shape

In [None]:
test_all_results = pd.concat([test_1_feature_results, test_2_features_results], ignore_index=True)
test_all_results.shape

In [None]:
test_nn_all_results = pd.concat([test_nn_1_feature_results, test_nn_2_features_results], ignore_index=True)
test_nn_all_results.shape

In [None]:
#test_nn_all_results.to_csv("test_evaluation_nn_all_results.csv", index=None)

In [None]:
test_all_results.columns

## Find the Best Taditional Models

In [None]:
pd.set_option('display.max_colwidth', None)

In [None]:
evals = test_all_results

In [None]:
evals[(evals.evaluation_data == 'individual set')].sort_values(by='f1_boring', ascending=False)

In [None]:
evals[(evals.evaluation_data == 'individual set') & (evals.model.str.contains("random forest"))
                                                       ].sort_values(by='f1_boring', ascending=False)

In [None]:
evals[(evals.evaluation_data == 'individual set') & (evals.model.str.contains("XGBoost"))
                                                       ].sort_values(by='f1_boring', ascending=False)

## Find the Best NN Models

In [None]:
pd.set_option('display.max_colwidth', None)

In [None]:
evals = test_nn_all_results

In [None]:
evals[(evals.evaluation_data == 'individual set')].sort_values(by='f1_boring', ascending=False)

In [None]:
evals[(evals.evaluation_data == 'individual set') & (evals.model.str.contains("CNN"))
                                                       ].sort_values(by='f1_boring', ascending=False)

## Plotting

In [None]:
results = pd.concat([test_all_results, test_nn_all_results], ignore_index=True)
results.shape

In [None]:
test_all_results.columns

In [None]:
cols = ['evaluation_data', 'model', 'accuracy', 'f1_boring', 'f1_engaging']

In [None]:
test_results = results[cols]

In [None]:
test_results = test_results[test_results.evaluation_data == 'individual set']

In [None]:
test_results

In [None]:
def map_model_name(model_name):
    if 'logistic regression' in model_name.lower():
        return 'logistic regression'
    elif 'random forest' in model_name.lower():
        return 'random forest'
    elif 'xgboost' in model_name.lower():
        return 'xgboost'  # Return the original name if no match
    elif 'cnn' in model_name.lower():
        return 'cnn'
    else:
        return 'denseNet'

In [None]:
test_results['model_name'] = test_results['model'].apply(map_model_name)

In [None]:
test_results

In [None]:
%pwd

In [None]:
# Melt the DataFrame to long format for easier plotting
melted_df = pd.melt(test_results, id_vars=['model_name'],
                    value_vars=['accuracy', 'f1_boring', 'f1_engaging'],
                    var_name='metric', value_name='value')

# Create the box plot
plt.figure(figsize=(12, 6))
sns.boxplot(x='model_name', y='value', hue='metric', data=melted_df)

# Customize the plot
#plt.title('Accuracy and F1 (Boring and Engaging) by Model')
plt.xlabel('')

plt.xticks(fontsize=12, fontweight='bold')
plt.yticks(fontsize=12, fontweight='bold')

plt.ylabel('Score', fontsize=12, fontweight='bold')

plt.legend(prop={'size': 12, 'weight': 'bold'})

plt.tight_layout()

# Show the plot
plt.show()