# McNemar Test

## Import All Necessary Libraries

In [21]:
import pandas as pd
import numpy as np
from statsmodels.stats.contingency_tables import mcnemar 
import os

## Read All the Data

In [22]:
dir = '/Users/angelaoryza/Documents/TA/noisy-rnnids/rnnids-py/results/mcnemar/http'
subdir = os.listdir(dir)

In [23]:
df_dict = {}
for file in subdir:
    if file.endswith('.csv'):
        df = pd.read_csv(f'./{file}')
        file = file.split('.csv')[0]
        df_dict[file] = df

In [24]:
df_dict['pred_0.0']

Unnamed: 0.1,Unnamed: 0,id,b_mean,b_iqr,b_zscore,f_zscore,f_mean,f_iqr
0,0,175.45.176.1-4657-149.171.126.18-80-tcp,0,1,1,1,0,1
1,1,175.45.176.3-32473-149.171.126.18-80-tcp,0,1,1,1,0,1
2,2,175.45.176.0-21873-149.171.126.14-80-tcp,0,1,1,1,0,1
3,3,175.45.176.0-49194-149.171.126.17-80-tcp,0,1,1,1,0,1
4,4,175.45.176.1-51435-149.171.126.14-80-tcp,0,1,1,1,0,1
...,...,...,...,...,...,...,...,...
149077,149077,149.171.126.5-80-59.166.0.7-39048-tcp,1,1,1,1,1,1
149078,149078,59.166.0.7-25671-149.171.126.9-80-tcp,0,0,0,0,0,0
149079,149079,59.166.0.6-24097-149.171.126.7-80-tcp,0,0,0,0,0,0
149080,149080,175.45.176.2-20096-149.171.126.15-80-tcp,0,1,1,1,0,1


In [25]:
for key, df in df_dict.items():
    df.drop('Unnamed: 0', axis=1, inplace=True)

## Data Exploration

In [26]:
for key, df in df_dict.items():
    temp = df[df.duplicated()]['id'].unique()
    print(f'There are {len(temp)} duplicates data in df {key}')

There are 25269 duplicates data in df pred_0.005
There are 27113 duplicates data in df pred_0.0


In [27]:
for key, df in df_dict.items():
    count = 0
    for t in temp:
        srs = df[df['id']==t][['b_mean', 'b_iqr', 'b_zscore', 'f_mean', 'f_iqr', 'f_zscore' ]].nunique()
        if (srs > 1).any():
            print(t)
            count += 1
            break
        else:
            continue
    if count == 0:
        print(f'All duplicated values in df {key} have the same prediction')
    else:
        print(f'Not all duplicated values in df {key} have the same prediction')

59.166.0.8-12114-149.171.126.7-80-tcp
Not all duplicated values in df pred_0.005 have the same prediction
59.166.0.8-12114-149.171.126.7-80-tcp
Not all duplicated values in df pred_0.0 have the same prediction


## Data Pre-Processing

In [10]:
def preprocess(df):
    df = df.sort_values(by='id')
    df.drop_duplicates(inplace=True)
    return df

In [11]:
for key, df in df_dict.items():
    df = preprocess(df)

### Merge DF

In [28]:
df_merge = dict()
x = df_dict['pred_0.0']
for key, df in df_dict.items():
    if key == 'pred_0.0':
        continue
    else:
        df_all = pd.merge(x, df, on='id', how='inner')
        df_all = df_all.sort_values(by='id')
        df_merge[key] = df_all

In [38]:
df_merge['pred_0.005']

Unnamed: 0,id,b_mean_x,b_iqr_x,b_zscore_x,f_zscore_x,f_mean_x,f_iqr_x,b_mean_y,b_iqr_y,b_zscore_y,f_zscore_y,f_mean_y,f_iqr_y
117693,149.171.126.0-80-59.166.0.0-10657-tcp,1,1,1,1,1,1,1,1,1,1,1,1
117700,149.171.126.0-80-59.166.0.0-10657-tcp,1,1,1,1,1,1,1,1,1,1,1,1
117699,149.171.126.0-80-59.166.0.0-10657-tcp,1,1,1,1,1,1,1,1,1,1,1,1
117698,149.171.126.0-80-59.166.0.0-10657-tcp,1,1,1,1,1,1,1,1,1,1,1,1
117694,149.171.126.0-80-59.166.0.0-10657-tcp,1,1,1,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
46978,59.166.0.9-9990-149.171.126.9-80-tcp,0,0,0,0,0,0,0,0,0,0,0,0
23508,59.166.0.9-9991-149.171.126.4-80-tcp,0,0,0,0,0,0,0,0,0,0,0,0
23509,59.166.0.9-9991-149.171.126.4-80-tcp,0,0,0,0,0,0,0,0,0,0,0,0
23507,59.166.0.9-9991-149.171.126.4-80-tcp,0,0,0,0,0,0,0,0,0,0,0,0


- Plot F1-Score untuk tiap tiap threshold
- Cari trend regresinya, Hitung gradiennya untuk tiap threshold.
- Dilakukan untuk setiap protokol
- Rekap McNemar

- Vectorizing:


## Run McNemar Test

In [29]:
def mcnemar_test(df, x , y, key):
    a = len(df[(df[x]==0) & (df[y]==0)])
    b = len(df[(df[x]==0) & (df[y]==1)])
    c = len(df[(df[x]==1) & (df[y]==0)])
    d = len(df[(df[x]==1) & (df[y]==1)])

    table = np.array([
    [a,b],
    [c,d]
    ])

    print(key)
    print(table)
    result = mcnemar(table)
    print(result)
    return result.pvalue


### For Binary Score

#### For B-Mean

In [30]:
result = {}
result['b-mean'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'b_mean_x', 'b_mean_y', key)
    result['b-mean'][key] = test

pred_0.005
[[181983    566]
 [   164  56601]]
pvalue      1.3885322235306222e-52
statistic   164.0


#### For B-IQR

In [31]:
result['b-iqr'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'b_iqr_x', 'b_iqr_y', key)
    result['b-iqr'][key] = test

pred_0.005
[[153339  10981]
 [    16  74978]]
pvalue      0.0
statistic   16.0


#### For B-ZScore

In [32]:
result['b-zscore'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'b_zscore_x', 'b_zscore_y', key)
    result['b-zscore'][key] = test

pred_0.005
[[149699    177]
 [  3629  85809]]
pvalue      0.0
statistic   177.0


### For Floating Score

#### For F-Mean

In [33]:
result['f-mean'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'f_mean_x', 'f_mean_y', key)
    result['f-mean'][key] = test

pred_0.005
[[185279     28]
 [    56  53951]]
pvalue      0.002985323184447229
statistic   28.0


#### For F-IQR

In [34]:
result['f-iqr'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'f_iqr_x', 'f_iqr_y', key)
    result['f-iqr'][key] = test

pred_0.005
[[157658   5821]
 [    90  75745]]
pvalue      0.0
statistic   90.0


#### For F-ZScore

In [35]:
result['f-zscore'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'f_zscore_x', 'f_zscore_y', key)
    result['f-zscore'][key] = test

pred_0.005
[[156679   3788]
 [   490  78357]]
pvalue      0.0
statistic   490.0


## Saving Test Result

In [36]:
mcnemar_result = pd.DataFrame.from_dict(result).sort_index(axis=0)
mcnemar_result

Unnamed: 0,b-mean,b-iqr,b-zscore,f-mean,f-iqr,f-zscore
pred_0.005,1.3885320000000001e-52,0.0,0.0,0.002985,0.0,0.0


In [39]:
mcnemar_result.to_excel('./http-mcnemar.xlsx')