In [1]:
import pandas as pd
import numpy as np
import os
from scipy.stats import ttest_ind, f_oneway

### Helper Functions

In [2]:
column_dict_no_sent = {
    'preprocessed_no_sent_last_7.csv': '7 Games No Sent.',
    'preprocessed_no_sent_last_7_wt.csv': '7 Games Wtd No Sent.',
    'preprocessed_no_sent_last_1.csv': '1 Game No Sent.',
    'preprocessed_no_sent_last_3.csv': '3 Games No Sent.',
    'preprocessed_no_sent_last_3_wt.csv': '3 Games Wtd No Sent.',
    'preprocessed_no_sent_last_ssn.csv': 'Season No Sent.',
    'preprocessed_no_sent_last_ssn_wt.csv': 'Season Wtd No Sent.',
    'preprocessed_sent_24_last_7.csv': '7 Games 24h Sent.',
    'preprocessed_sent_24_last_7_wt.csv': '7 Games Wtd 24h Sent.',
    'preprocessed_sent_96_last_7.csv': '7 Games 96h Sent.',
    'preprocessed_sent_96_last_7_wt.csv': '7 Games Wtd 96h Sent.',
    'preprocessed_sent_cross_last_7.csv': '7 Games Cross Sent.',
    'preprocessed_sent_cross_last_7_wt.csv': '7 Games Wtd Cross Sent.',
    'preprocessed_sent_cross_last_ssn_wt.csv': 'Season Wtd Cross Sent.',
    'preprocessed_sent_24_last_ssn_wt.csv': 'Season Wtd 24h Sent.',
    'preprocessed_sent_96_last_ssn_wt.csv': 'Season Wtd 96h Sent.',
    'preprocessed_sent_cross_last_1.csv': '1 Game Cross Sent.',
    'preprocessed_sent_24_last_1.csv': '1 Game 24h Sent.',
    'preprocessed_sent_96_last_1.csv': '1 Game 96h Sent.',
}

In [87]:
def get_box_plot(df, col_labels):
    scores = df['scores'].apply(pd.Series).transpose().astype('float')
    # scores = scores.rename(columns=col_labels)
    scores.boxplot(rot=90, figsize=(12,8), grid=False, fontsize=15)

def get_all_scores(df):
    scores_df = df['scores'].apply(pd.Series).transpose().astype('float')
    scores_df = scores_df.rename_axis('Fold').rename_axis('Dataset',axis='columns')
    return scores_df

def get_all_scores_nn(df):
    scores_df = df['scores'].apply(pd.Series).transpose().astype('float')
    scores_df = scores_df.rename_axis('Fold').rename_axis('Dataset',axis='columns')
    return scores_df

In [79]:
def get_file_fix_scores(filepath):
    df = pd.read_csv(filepath, index_col='file')
    df['scores'] = df['scores'].replace('\s+', ',', regex=True)
    df['scores'] = df['scores'].apply(lambda x: x[1:-2].split(','))
    df = df.sort_values('scores_mean',ascending=False)
    df = df.rename(column_dict_no_sent, axis=0)
    return df

def get_file_fix_scores_nn(filepath):
    df = pd.read_csv(filepath, index_col='file')
    df['scores'] = df['scores'].replace('\s+', '', regex=True)
    df['scores'] = df['scores'].apply(lambda x: x[1:-2].split(','))
    df = df.sort_values('scores_mean',ascending=False)
    df = df.rename(column_dict_no_sent, axis=0)
    return df

def get_file_nn_max_models(filepath):
    df = pd.read_csv(filepath, index_col='file').drop(['Unnamed: 0'], axis=1)
    df = df.rename(column_dict_no_sent, axis=0)
    idx_nn = df.groupby('file')['acc'].transform(max) == df['acc']
    df_max = df[idx_nn].sort_values('acc', ascending=False)
    return df_max

## Naive Bayes Results

7 games wtd and unweighted had same results so both included in final testing  
They remained identical across all sentiment categories as well

In [50]:
cols_7_wtd = ['7 Games Wtd 24h Sent.', '7 Games Wtd 96h Sent.', '7 Games Wtd No Sent.', '7 Games Wtd Cross Sent.']

df_nb_no_sent = get_file_fix_scores('./results/nb/nb_no_sent.csv')
df_nb_test = get_file_fix_scores('./results/nb/nb_sent.csv').drop(labels=cols_7_wtd, axis=0)
df_nb_test_scores = get_all_scores(df_nb_test)


In [8]:
df_nb_no_sent

Unnamed: 0_level_0,scores_mean,scores_std,smoothing,scores
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7 Games No Sent.,0.554455,0.129873,0.5336699,"[0.54545455, 0.63636364, 0.45454545, 0.3636363..."
7 Games Wtd No Sent.,0.554455,0.129873,0.5336699,"[0.54545455, 0.63636364, 0.45454545, 0.3636363..."
1 Game No Sent.,0.549455,0.145422,0.0001,"[0.54545455, 0.63636364, 0.45454545, 0.3636363..."
Season No Sent.,0.542,0.134076,0.01873817,"[0.45454545, 0.63636364, 0.36363636, 0.3636363..."
Season Wtd No Sent.,0.542,0.134076,0.01873817,"[0.45454545, 0.63636364, 0.36363636, 0.3636363..."
3 Games No Sent.,0.539,0.14779,1.232847e-09,"[0.54545455, 0.63636364, 0.54545455, 0.3636363..."
3 Games Wtd No Sent.,0.539,0.14779,1.232847e-09,"[0.54545455, 0.63636364, 0.54545455, 0.3636363..."


In [9]:
df_nb_test

Unnamed: 0_level_0,scores_mean,scores_std,smoothing,scores
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7 Games 24h Sent.,0.565909,0.132835,0.351119,"[0.54545455, 0.63636364, 0.45454545, 0.4545454..."
7 Games 96h Sent.,0.555636,0.12989,0.432876,"[0.54545455, 0.54545455, 0.45454545, 0.3636363..."
7 Games No Sent.,0.554455,0.129873,0.53367,"[0.54545455, 0.63636364, 0.45454545, 0.3636363..."
7 Games Cross Sent.,0.551545,0.128745,0.432876,"[0.45454545, 0.63636364, 0.45454545, 0.3636363..."


In [104]:
df_nb_test_scores.describe()

Dataset,7 Games 24h Sent.,7 Games 96h Sent.,7 Games No Sent.,7 Games Cross Sent.
count,100.0,100.0,100.0,100.0
mean,0.565909,0.555636,0.554455,0.551545
std,0.133505,0.130545,0.130527,0.129394
min,0.2,0.2,0.2,0.2
25%,0.454545,0.454545,0.454545,0.454545
50%,0.6,0.545455,0.572727,0.545455
75%,0.636364,0.636364,0.636364,0.636364
max,0.909091,0.818182,0.818182,0.818182


In [None]:
get_box_plot(df_nb_no_sent, column_dict_no_sent)

In [None]:
get_box_plot(df_nb_test, column_dict_no_sent)

## Random Forest Results

In [31]:
df_rf_no_sent = get_file_fix_scores('./results/rf/rf_no_sent2.csv')
df_rf_test = get_file_fix_scores('./results/rf/rf_sent.csv')
df_rf_test_scores = get_all_scores(df_rf_test)


In [32]:
df_rf_no_sent

Unnamed: 0_level_0,scores_mean,scores_std,n_estimators,max_depth,min_samples_split,min_samples_leaf,scores
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Season Wtd No Sent.,0.545545,0.146978,5,75,8,4,"[0.63636364, 0.45454545, 0.36363636, 0.5454545..."
7 Games No Sent.,0.537364,0.127389,8,150,2,8,"[0.54545455, 0.54545455, 0.63636364, 0.6363636..."
Season No Sent.,0.534909,0.146082,5,50,2,2,"[0.36363636, 0.54545455, 0.63636364, 0.6363636..."
1 Game No Sent.,0.533455,0.156722,8,100,4,2,"[0.54545455, 0.45454545, 0.72727273, 0.5454545..."
7 Games Wtd No Sent.,0.529909,0.133245,10,10,4,4,"[0.45454545, 0.72727273, 0.54545455, 0.7272727..."
3 Games No Sent.,0.492636,0.155328,8,100,12,4,"[0.36363636, 0.45454545, 0.63636364, 0.4545454..."
3 Games Wtd No Sent.,0.490818,0.157373,5,15,8,4,"[0.54545455, 0.45454545, 0.54545455, 0.4545454..."


In [33]:
df_rf_test

Unnamed: 0_level_0,scores_mean,scores_std,n_estimators,max_depth,min_samples_split,min_samples_leaf,scores
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Season Wtd Cross Sent.,0.548455,0.148408,10,,8,8,"[0.45454545, 0.54545455, 0.81818182, 0.6363636..."
Season Wtd 96h Sent.,0.538455,0.140735,5,15.0,4,1,"[0.72727273, 0.54545455, 0.81818182, 0.6363636..."
Season Wtd No Sent.,0.536091,0.151027,10,75.0,2,1,"[0.54545455, 0.36363636, 0.54545455, 0.4545454..."
Season Wtd 24h Sent.,0.534909,0.153007,8,200.0,4,8,"[0.72727273, 0.45454545, 0.81818182, 0.6363636..."


In [105]:
df_rf_test_scores.describe()

file,Season Wtd Cross Sent.,Season Wtd 96h Sent.,Season Wtd No Sent.,Season Wtd 24h Sent.
count,100.0,100.0,100.0,100.0
mean,0.548455,0.538455,0.536091,0.534909
std,0.149155,0.141444,0.151787,0.153778
min,0.2,0.181818,0.1,0.2
25%,0.440909,0.454545,0.454545,0.4
50%,0.572727,0.545455,0.5,0.545455
75%,0.636364,0.636364,0.636364,0.636364
max,0.9,0.818182,0.909091,0.818182


In [None]:
get_box_plot(df_rf_no_sent, column_dict_no_sent)

In [None]:
get_box_plot(df_rf_test, column_dict_no_sent)

## AdaBoost Results


In [29]:
df_ada_no_sent = get_file_fix_scores('./results/ada/ada_resust_no_sent2.csv')
df_ada_test = get_file_fix_scores('./results/ada/ada_result_sent.csv')
df_ada_test_scores = get_all_scores(df_ada_test)


In [18]:
df_ada_no_sent

Unnamed: 0_level_0,scores_mean,scores_std,n,depth,learn_rate,scores
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1 Game No Sent.,0.617364,0.156713,100,1,0.1,"[0.63636364, 0.63636364, 0.36363636, 0.6363636..."
7 Games No Sent.,0.563091,0.145766,10,1,10.0,"[0.63636364, 0.45454545, 0.54545455, 0.5454545..."
7 Games Wtd No Sent.,0.563091,0.145766,10,1,10.0,"[0.63636364, 0.45454545, 0.54545455, 0.5454545..."
3 Games No Sent.,0.553545,0.15388,100,1,10.0,"[0.45454545, 0.81818182, 0.45454545, 0.3636363..."
3 Games Wtd No Sent.,0.553545,0.15388,100,1,10.0,"[0.45454545, 0.81818182, 0.45454545, 0.3636363..."
Season No Sent.,0.530909,0.12698,5000,1,1.0,"[0.54545455, 0.45454545, 0.72727273, 0.6363636..."
Season Wtd No Sent.,0.530909,0.12698,5000,1,1.0,"[0.54545455, 0.45454545, 0.72727273, 0.6363636..."


In [19]:
df_ada_test

Unnamed: 0_level_0,scores_mean,scores_std,n,depth,learn_rate,scores
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1 Game Cross Sent.,0.652091,0.136065,75,1,0.1,"[0.63636364, 0.81818182, 0.81818182, 0.6363636..."
1 Game No Sent.,0.620545,0.156446,75,1,0.1,"[0.63636364, 0.63636364, 0.27272727, 0.6363636..."
1 Game 96h Sent.,0.596455,0.147799,75,1,0.1,"[0.63636364, 0.63636364, 0.63636364, 0.6363636..."
1 Game 24h Sent.,0.596091,0.14099,75,1,0.05,"[0.45454545, 0.72727273, 0.45454545, 0.6363636..."


In [100]:
df_ada_test_scores.head()

file,1 Game Cross Sent.,1 Game No Sent.,1 Game 96h Sent.,1 Game 24h Sent.
0,0.636364,0.636364,0.636364,0.454545
1,0.818182,0.636364,0.636364,0.727273
2,0.818182,0.272727,0.636364,0.454545
3,0.636364,0.636364,0.636364,0.636364
4,0.545455,0.454545,0.454545,0.454545


In [None]:
get_box_plot(df_ada_no_sent, column_dict_no_sent)

In [None]:
get_box_plot(df_ada_test, column_dict_no_sent)

## Neural Network Results

In [88]:
df_nn_no_sent = get_file_nn_max_models('./results/nn/NN_no_sent_final.csv')
df_nn_test = get_file_fix_scores_nn('./results/nn1/nn_sent_results.csv').drop(['Unnamed: 0'], axis=1)
df_nn_test_scores = get_all_scores_nn(df_nn_test)

In [90]:
df_nn_test_scores.describe()


Dataset,7 Games Wtd No Sent.,7 Games Wtd 96h Sent.,7 Games Wtd Cross Sent.,7 Games Wtd 24h Sent.
count,100.0,100.0,100.0,100.0
mean,0.552,0.546,0.539,0.533
std,0.152077,0.130593,0.133254,0.151127
min,0.0,0.2,0.0,0.0
25%,0.4,0.4,0.5,0.4
50%,0.5,0.5,0.5,0.6
75%,0.7,0.7,0.6,0.6
max,0.9,0.8,0.8,0.8


Note that because of computational speed issues only repeated 5 times for no sentiment nn model. Could explain the higher values. 

In [23]:
df_nn_no_sent

Unnamed: 0_level_0,acc,loss,layers,units,dropout_rate,learn_rate
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
7 Games Wtd No Sent.,58.6,0.9,4,256,0.3,0.001855
1 Game No Sent.,57.4,0.83,4,256,0.3,0.001855
7 Games No Sent.,57.4,2.3,2,2048,0.3,0.013
Season No Sent.,56.4,0.81,6,512,0.2,0.013
Season Wtd No Sent.,55.6,0.98,4,512,0.3,0.018549
3 Games No Sent.,54.2,0.79,8,256,0.4,0.018549
3 Games Wtd No Sent.,53.4,0.89,6,512,0.3,0.018549
3 Games Wtd No Sent.,53.4,0.86,6,512,0.2,0.018549
3 Games Wtd No Sent.,53.4,0.86,4,256,0.3,0.001855


In [24]:
df_nn_test

Unnamed: 0_level_0,scores_mean,scores_std,layers,units,dropout_rate,learn_rate,scores
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
7 Games Wtd No Sent.,0.557,0.140894,4,256,0.3,0.001855,"[0.5, , 0.699999988079071, , 0.400000005960464..."
7 Games Wtd 96h Sent.,0.546,0.129938,4,256,0.2,0.001855,"[0.5, , 0.5, , 0.5, , 0.699999988079071, , 0.5..."
7 Games Wtd Cross Sent.,0.544,0.121095,4,256,0.3,0.001855,"[0.699999988079071, , 0.6000000238418579, , 0...."
7 Games Wtd 24h Sent.,0.538,0.140556,4,256,0.3,0.018549,"[0.6000000238418579, , 0.5, , 0.30000001192092..."


## Statistical Testing


In [93]:
test_dfs = [
    ['ada_boost', df_ada_test],
    ['random_forest', df_rf_test],
    ['neural_network', df_nn_test],
    ['naive_bayes', df_nb_test]
]

### T-Test

In [None]:
def ttest_cols(df, i, j):
    if i == j:
        print('Err: indices the same')
        return 
    tstat, pvalue = ttest_ind(df.iloc[:, i], df.iloc[:, j])
    return tstat, pvalue

tstat1, pvalue1 = ttest_cols(df_nb_test, 1, 2)
print(tstat1, pvalue1)



### One-Sided One Sample Test
This would be to compare to the baseline 52.1% or whatever is needed to be profitable  
Make an inference to a population in comparison to some set value. 
One-sided - want to know if accuracy is > 53.1%, don't care if lower  
A one-sided p-value is the probability that the test statistic is greater than (or less than) the calculated value.  
p-value less than 0.05 to be statistically significant (and a p-value of <0.01 as highly statistically significant  

### ANOVA Test - Difference between all Datasets



In [95]:
def anova_test(df):
    return f_oneway(df.iloc[:, 0], df.iloc[:, 1], df.iloc[:,2], df.iloc[:,2])

anova_stat = anova_test(df_nb_test)
anova_stat

F_onewayResult(statistic=47.48404497248537, pvalue=6.231731567243136e-07)

In [99]:
anovas = []
for df in test_dfs:
    result = anova_test(df[1])
    anovas.append({'classifier': df[0], 'anova_stat': result.statistic, 'pvalue': result.pvalue})

pd.DataFrame.from_records(anovas, index='classifier')

Unnamed: 0_level_0,anova_stat,pvalue
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1
ada_boost,38144510.0,3.899624e-42
random_forest,29.89028,7.516047e-06
neural_network,469803.5,1.117105e-30
naive_bayes,47.48404,6.231732e-07


p-value < 0.01 so 99% confident we can reject Ho that all means are equal. Therefore can reject Ho that all the accuracies are the same. 
AdaBoost and Neural Networks close to 0 while Random Forest and Naive bayes are still quite low but not nearly as close to 0 as the other classifiers. 


### Tukey Test If Anova Shows they're different


In [217]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from scipy.stats import tukey_hsd
def tukey_ci(res, alpha):
    conf = res.confidence_interval(confidence_level=1-alpha)
    for ((i, j), l) in np.ndenumerate(conf.low):
        # filter out self comparisons
        if i != j:
            h = conf.high[i,j]
            print(f"({i} - {j}) {l:>6.3f} {h:>6.3f}")

# def tukey_on_df_old(df, alpha=0.05):
#     groups = []
#     for i in range(0,4):
#         groups.append(df.iloc[:, i])
#     result = tukey_hsd(groups[0], groups[1], groups[2], groups[3])
#     tukey_ci(result, alpha)
#     cols = df.columns
#     df = pd.DataFrame(result.pvalue, columns = [cols], index = [cols])      
#     df_reject_ho = df.lt(alpha)
#     return df, df_reject_ho


def tukey_on_df(df, alpha=0.05):
    scores = []
    for i in range(0,4):
        scores.append(df.iloc[:, i].values)
    scores_flat = [e for sub in scores for e in sub]
    new_df = pd.DataFrame({'scores': scores_flat,'group': np.repeat(df.columns, repeats=100)})
    tukey = pairwise_tukeyhsd(endog=new_df['scores'],
                          groups=new_df['group'],
                          alpha=alpha)
    print(tukey)
    return tukey


#### AdaBoost Tukey

In [157]:
df_ada_test['scores_mean'].round(4)*100

file
1 Game Cross Sent.    65.21
1 Game No Sent.       62.05
1 Game 96h Sent.      59.65
1 Game 24h Sent.      59.61
Name: scores_mean, dtype: float64

In [220]:
tukey_res_ada = tukey_on_df(df_ada_test_scores, 0.05)

            Multiple Comparison of Means - Tukey HSD, FWER=0.05            
      group1             group2       meandiff p-adj   lower  upper  reject
---------------------------------------------------------------------------
  1 Game 24h Sent.   1 Game 96h Sent.   0.0004    1.0  -0.053 0.0537  False
  1 Game 24h Sent. 1 Game Cross Sent.    0.056 0.0355  0.0026 0.1094   True
  1 Game 24h Sent.    1 Game No Sent.   0.0245 0.6384 -0.0289 0.0778  False
  1 Game 96h Sent. 1 Game Cross Sent.   0.0556 0.0373  0.0023  0.109   True
  1 Game 96h Sent.    1 Game No Sent.   0.0241 0.6495 -0.0293 0.0775  False
1 Game Cross Sent.    1 Game No Sent.  -0.0315 0.4234 -0.0849 0.0218  False
---------------------------------------------------------------------------


if the p-value is > 0.05 then fail to reject Ho that they have the same means  
p-value < 0.05 then can reject Ho and accept Ha that they have different means


Models 0 and 2 are not equally accurate in predicting outcomes.   
We’re 95% confident that a batch of 100 tests of model 0 is 0.2% - 10.9% more accurate than model 1
  
Models 0 and 3 are not equally accurate in predicting outcomes.   
We’re 95% confident that a batch of 100 tests of model 0 is 0.3% - 10.9% more accurate than model 3

1 Game cross significantly different at 5% from both 96h and 24h. Rest of averages not significantly different. 

#### Random Forest Tukey
No difference between means for Random Forest.

In [148]:
df_rf_test['scores_mean'].round(4)*100

file
Season Wtd Cross Sent.    54.85
Season Wtd 96h Sent.      53.85
Season Wtd No Sent.       53.61
Season Wtd 24h Sent.      53.49
Name: scores_mean, dtype: float64

In [221]:
tukey_res_rf = tukey_on_df(df_rf_test_scores, 0.1)

                Multiple Comparison of Means - Tukey HSD, FWER=0.10                
        group1                 group2         meandiff p-adj   lower  upper  reject
-----------------------------------------------------------------------------------
  Season Wtd 24h Sent.   Season Wtd 96h Sent.   0.0035 0.9983 -0.0449  0.052  False
  Season Wtd 24h Sent. Season Wtd Cross Sent.   0.0135 0.9182 -0.0349  0.062  False
  Season Wtd 24h Sent.    Season Wtd No Sent.   0.0012 0.9999 -0.0473 0.0497  False
  Season Wtd 96h Sent. Season Wtd Cross Sent.     0.01 0.9647 -0.0385 0.0585  False
  Season Wtd 96h Sent.    Season Wtd No Sent.  -0.0024 0.9995 -0.0508 0.0461  False
Season Wtd Cross Sent.    Season Wtd No Sent.  -0.0124 0.9362 -0.0608 0.0361  False
-----------------------------------------------------------------------------------


No significant differences between means at the 95% level or even the 90% confidence level. The anova statistic was much smaller so all the means do overlap still. Can conclude if there is a difference it is just significant enough to pass the anova test but comparing group means they are similar. 


#### Neural Network Tukey Test
No significant difference between tests at the 90% confidence level.

In [166]:
df_nn_test['scores_mean'].round(4)*100

file
7 Games Wtd No Sent.       55.7
7 Games Wtd 96h Sent.      54.6
7 Games Wtd Cross Sent.    54.4
7 Games Wtd 24h Sent.      53.8
Name: scores_mean, dtype: float64

In [222]:
tukey_res_nn = tukey_on_df(df_nn_test_scores, 0.1)

                 Multiple Comparison of Means - Tukey HSD, FWER=0.10                 
         group1                  group2         meandiff p-adj   lower  upper  reject
-------------------------------------------------------------------------------------
  7 Games Wtd 24h Sent.   7 Games Wtd 96h Sent.    0.013 0.9166 -0.0332 0.0592  False
  7 Games Wtd 24h Sent. 7 Games Wtd Cross Sent.    0.006 0.9907 -0.0402 0.0522  False
  7 Games Wtd 24h Sent.    7 Games Wtd No Sent.    0.019 0.7803 -0.0272 0.0652  False
  7 Games Wtd 96h Sent. 7 Games Wtd Cross Sent.   -0.007 0.9855 -0.0532 0.0392  False
  7 Games Wtd 96h Sent.    7 Games Wtd No Sent.    0.006 0.9907 -0.0402 0.0522  False
7 Games Wtd Cross Sent.    7 Games Wtd No Sent.    0.013 0.9166 -0.0332 0.0592  False
-------------------------------------------------------------------------------------


#### Naive Bayes Tukey Test

Fail to reject the Ho that there is no significant difference in the accuracies between any individual models.

In [172]:
df_nb_test['scores_mean'].round(4)*100

file
7 Games 24h Sent.      56.59
7 Games 96h Sent.      55.56
7 Games No Sent.       55.45
7 Games Cross Sent.    55.15
Name: scores_mean, dtype: float64

In [223]:
tukaey_res_nb = tukey_on_df(df_nb_test_scores, 0.1)


             Multiple Comparison of Means - Tukey HSD, FWER=0.10             
       group1              group2       meandiff p-adj   lower  upper  reject
-----------------------------------------------------------------------------
  7 Games 24h Sent.   7 Games 96h Sent.  -0.0103 0.9453 -0.0529 0.0323  False
  7 Games 24h Sent. 7 Games Cross Sent.  -0.0144 0.8656  -0.057 0.0282  False
  7 Games 24h Sent.    7 Games No Sent.  -0.0115 0.9262  -0.054 0.0311  False
  7 Games 96h Sent. 7 Games Cross Sent.  -0.0041 0.9962 -0.0467 0.0385  False
  7 Games 96h Sent.    7 Games No Sent.  -0.0012 0.9999 -0.0438 0.0414  False
7 Games Cross Sent.    7 Games No Sent.   0.0029 0.9986 -0.0397 0.0455  False
-----------------------------------------------------------------------------
