In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import utils

sns.set_style('whitegrid')

In [None]:
def get_mean_time(data):
    return data['Training Time'].mean()

def plot_error(data, fptc, error_type='MAPE'):
    mean_time = get_mean_time(data)
    errors = []
    for i in range(19):
        if error_type == 'RMSE':
            errors.append(utils.compute_rmse([mean_time], [fptc.iloc[i]['FPTC']]))
        else:
            errors.append(utils.get_mape([mean_time], [fptc.iloc[i]['FPTC']]))
    fig, ax = plt.subplots(figsize=(10, 6))
    sns.lineplot(x=fptc['Cols'], y=errors)
    ax.set_xlabel('Number of Columns')
    if error_type == 'RMSE':
        ax.set_ylabel('RMSE')
    else:
        ax.set_ylabel('MAPE')
    plt.show()

def get_fptc(rows, cols, classes, iters):
    fptcs = pd.DataFrame()
    slopes = pd.read_csv('./notebook_data/median_slopes_logreg.csv')
    for i, r in slopes.iterrows():
        fptcs = pd.concat([fptcs, pd.DataFrame(
            {'FPTC': [utils.train_fptc_logreg(rows, cols, classes, r['Slope'], iters)], 
             'Cols': [r['Columns']]}
             )], ignore_index=True)
    return fptcs

In [None]:
base_path = './training_time_logreg/'

# Adult Dataset

In [None]:
path = base_path + 'training_time_adult.csv'

adult = pd.read_csv(path)
iters = round(adult['Iterations'].mean())
fptcs = get_fptc(30940, 103, 2, iters)
plot_error(adult, fptcs)

In [None]:
plot_error(adult, fptcs, 'RMSE')

# Antivirus Dataset

In [None]:
path = base_path + 'training_time_antivirus.csv'

data = pd.read_csv(path)
iters = round(data['Iterations'].mean())
fptcs = get_fptc(373, 531, 2, iters)
plot_error(data, fptcs)

In [None]:
plot_error(data, fptcs, 'RMSE')

# APS Dataset

In [None]:
path = base_path + 'training_time_aps_proc.csv'

data = pd.read_csv(path)
iters = round(data['Iterations'].mean())
fptcs = get_fptc(60000, 162, 2, iters)
plot_error(data, fptcs)

In [None]:
plot_error(data, fptcs, 'RMSE')

# Arcene Dataset

In [None]:
path = base_path + 'training_time_arcene_proc.csv'

data = pd.read_csv(path)
iters = round(data['Iterations'].mean())
fptcs = get_fptc(100, 10001, 2, iters)
plot_error(data, fptcs)

In [None]:
plot_error(data, fptcs, 'RMSE')

# Compas Dataset

In [None]:
path = base_path + 'training_time_compas.csv'

data = pd.read_csv(path)
iters = round(data['Iterations'].mean())
fptcs = get_fptc(6167, 399, 2, iters)
plot_error(data, fptcs)

In [None]:
plot_error(data, fptcs, 'RMSE')

# Dexter Dataset

In [None]:
path = base_path + 'training_time_dexter_proc.csv'

data = pd.read_csv(path)
iters = round(data['Iterations'].mean())
fptcs = get_fptc(300, 20000, 2, iters)
plot_error(data, fptcs)

In [None]:
plot_error(data, fptcs, 'RMSE')

# German Dataset

In [None]:
path = base_path + 'training_time_german.csv'

data = pd.read_csv(path)
iters = round(data['Iterations'].mean())
fptcs = get_fptc(1000, 59, 2, iters)
plot_error(data, fptcs)

In [None]:
plot_error(data, fptcs, 'RMSE')