# McNemar Test

## Import All Necessary Libraries

In [7]:
import pandas as pd
import numpy as np
from statsmodels.stats.contingency_tables import mcnemar 
import os

## Read All the Data

In [8]:
dir = '/Users/angelaoryza/Documents/TA/noisy-rnnids/rnnids-py/results/mcnemar/ftp'
subdir = os.listdir(dir)

In [9]:
df_dict = {}
for file in subdir:
    if file.endswith('.csv'):
        df = pd.read_csv(f'./{file}')
        file = file.split('.csv')[0]
        df_dict[file] = df

In [10]:
df_dict['pred_0.0']

Unnamed: 0.1,Unnamed: 0,id,b_mean,b_iqr,b_zscore,f_zscore,f_mean,f_iqr
0,0,175.45.176.0-45235-149.171.126.16-21-tcp,1,1,1,1,1,1
1,1,175.45.176.3-1025-149.171.126.11-21-tcp,1,1,1,1,1,1
2,2,175.45.176.1-28136-149.171.126.11-21-tcp,1,1,1,1,1,1
3,3,175.45.176.2-43663-149.171.126.15-21-tcp,1,1,1,1,1,1
4,4,175.45.176.3-64709-149.171.126.15-21-tcp,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...
26108,26108,59.166.0.8-55795-149.171.126.6-21-tcp,0,0,0,0,0,0
26109,26109,59.166.0.7-7753-149.171.126.1-21-tcp,0,1,1,1,0,1
26110,26110,59.166.0.2-49329-149.171.126.0-21-tcp,0,1,1,1,0,1
26111,26111,59.166.0.3-7585-149.171.126.2-21-tcp,0,0,0,0,0,0


In [11]:
for key, df in df_dict.items():
    df.drop('Unnamed: 0', axis=1, inplace=True)

## Data Exploration

In [12]:
for key, df in df_dict.items():
    temp = df[df.duplicated()]['id'].unique()
    print(f'There are {len(temp)} duplicates data in df {key}')

There are 667 duplicates data in df pred_0.0039
There are 842 duplicates data in df pred_0.0


In [13]:
for key, df in df_dict.items():
    count = 0
    for t in temp:
        srs = df[df['id']==t][['b_mean', 'b_iqr', 'b_zscore', 'f_mean', 'f_iqr', 'f_zscore' ]].nunique()
        if (srs > 1).any():
            print(t)
            count += 1
            break
        else:
            continue
    if count == 0:
        print(f'All duplicated values in df {key} have the same prediction')
    else:
        print(f'Not all duplicated values in df {key} have the same prediction')

175.45.176.1-20956-149.171.126.16-21-tcp
Not all duplicated values in df pred_0.0039 have the same prediction
149.171.126.6-21-59.166.0.1-4337-tcp
Not all duplicated values in df pred_0.0 have the same prediction


## Data Pre-Processing

In [14]:
def preprocess(df):
    df = df.sort_values(by='id')
    df.drop_duplicates(inplace=True)
    return df

In [15]:
for key, df in df_dict.items():
    df = preprocess(df)

### Merge DF

In [16]:
df_merge = dict()
x = df_dict['pred_0.0']
for key, df in df_dict.items():
    if key == 'pred_0.0':
        continue
    else:
        df_all = pd.merge(x, df, on='id', how='inner')
        df_all = df_all.sort_values(by='id')
        df_merge[key] = df_all

- Plot F1-Score untuk tiap tiap threshold
- Cari trend regresinya, Hitung gradiennya untuk tiap threshold.
- Dilakukan untuk setiap protokol
- Rekap McNemar

- Vectorizing:


## Run McNemar Test

In [17]:
def mcnemar_test(df, x , y, key):
    a = len(df[(df[x]==0) & (df[y]==0)])
    b = len(df[(df[x]==0) & (df[y]==1)])
    c = len(df[(df[x]==1) & (df[y]==0)])
    d = len(df[(df[x]==1) & (df[y]==1)])

    table = np.array([
    [a,b],
    [c,d]
    ])

    print(key)
    print(table)
    result = mcnemar(table)
    print(result)
    return result.pvalue


### For Binary Score

#### For B-Mean

In [18]:
result = {}
result['b-mean'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'b_mean_x', 'b_mean_y', key)
    result['b-mean'][key] = test

pred_0.0039
[[24234    40]
 [ 1244  2864]]
pvalue      9.058784630636e-311
statistic   40.0


#### For B-IQR

In [19]:
result['b-iqr'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'b_iqr_x', 'b_iqr_y', key)
    result['b-iqr'][key] = test

pred_0.0039
[[20626   263]
 [  299  7194]]
pvalue      0.13977107386766693
statistic   263.0


#### For B-ZScore

In [20]:
result['b-zscore'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'b_zscore_x', 'b_zscore_y', key)
    result['b-zscore'][key] = test

pred_0.0039
[[20626   263]
 [  299  7194]]
pvalue      0.13977107386766693
statistic   263.0


### For Floating Score

#### For F-Mean

In [21]:
result['f-mean'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'f_mean_x', 'f_mean_y', key)
    result['f-mean'][key] = test

pred_0.0039
[[24636   387]
 [  928  2431]]
pvalue      1.3217205059091784e-51
statistic   387.0


#### For F-IQR

In [22]:
result['f-iqr'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'f_iqr_x', 'f_iqr_y', key)
    result['f-iqr'][key] = test

pred_0.0039
[[20626   263]
 [  299  7194]]
pvalue      0.13977107386766693
statistic   263.0


#### For F-ZScore

In [23]:
result['f-zscore'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'f_zscore_x', 'f_zscore_y', key)
    result['f-zscore'][key] = test

pred_0.0039
[[20626   263]
 [  299  7194]]
pvalue      0.13977107386766693
statistic   263.0


## Saving Test Result

In [24]:
mcnemar_result = pd.DataFrame.from_dict(result).sort_index(axis=0)
mcnemar_result

Unnamed: 0,b-mean,b-iqr,b-zscore,f-mean,f-iqr,f-zscore
pred_0.0039,9.058785e-311,0.139771,0.139771,1.3217209999999999e-51,0.139771,0.139771


In [25]:
mcnemar_result.to_excel('./ftp-mcnemar.xlsx')