In [9]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import statistics

In [10]:
def cumulative_accuracy_plot(classifier_name, df, df_name, method=''):

    df['cml_accuracy'] = df['accuracy'].expanding().mean() #cumulative mean accuracy

    acc_fig, acc_ax = plt.subplots()
    sns.scatterplot(data=df, x=df.index, y='accuracy', marker='.', ax=acc_ax, label='Accuracy', alpha=0.5)
    sns.lineplot(data=df, x=df.index, y='cml_accuracy', ax=acc_ax, label='Cumulative Mean Accuracy')

    acc_ax.set_title(classifier_name + ' for ' + df_name)
    acc_ax.set_ylabel('Accuracy')
    acc_ax.set_xlabel("nth Sampling Iteration")

    return acc_fig, acc_ax

In [1]:
def accuracy_by_nfeatures(class_instance, file_name, classifier, n_features=29, n_iter=1000): 
    
    acc_metrics = {} #empty dictionary to store accuracy metrics

    for i in range(1, n_features+1): #range of fisher's criterion values / # of features, inclusive
           
        df = class_instance.fishers(top_n=i) #class instance applied to create df with top n features for every for loop iteration

        eval_metrics_df = classifier(df=df, n_iter=n_iter)
    
        acc = eval_metrics_df['accuracy']
        acc_train = eval_metrics_df['accuracy_train']

        mean = statistics.mean(acc)
        stdev = statistics.stdev(acc)

        mean_train = statistics.mean(acc_train)
        stdev_train = statistics.stdev(acc_train)
        acc_metrics[str(i)] =list([i, file_name, mean, stdev, mean_train, stdev_train]) #store list in dictionary

    acc_metrics = pd.DataFrame.from_dict(acc_metrics) 
    acc_metrics = acc_metrics.T
    acc_metrics.columns = ['n_features', 'file_name', 'mean', 'stdev', 'mean_train', 'stdev_train']
    acc_metrics

    return acc_metrics