# McNemar Test

## Import All Necessary Libraries

In [1]:
import pandas as pd
import numpy as np
from statsmodels.stats.contingency_tables import mcnemar 
import os

## Read All the Data

In [2]:
dir = '/Users/angelaoryza/Documents/TA/noisy-rnnids/rnnids-py/results/mcnemar/smtp'
subdir = os.listdir(dir)

In [3]:
df_dict = {}
for file in subdir:
    if file.endswith('.csv'):
        df = pd.read_csv(f'./{file}')
        file = file.split('.csv')[0]
        df_dict[file] = df

In [4]:
df_dict['pred_0.0']

Unnamed: 0.1,Unnamed: 0,id,b_mean,b_iqr,b_zscore,f_zscore,f_mean,f_iqr
0,0,175.45.176.0-40755-149.171.126.18-25-tcp,1,1,1,1,1,1
1,1,175.45.176.0-60595-149.171.126.17-25-tcp,1,1,1,1,1,1
2,2,175.45.176.2-58472-149.171.126.17-25-tcp,1,1,1,1,1,1
3,3,175.45.176.2-13352-149.171.126.13-25-tcp,1,1,1,1,1,1
4,4,175.45.176.0-3844-149.171.126.12-25-tcp,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...
44721,44721,59.166.0.9-21800-149.171.126.0-25-tcp,0,0,0,0,0,0
44722,44722,59.166.0.4-6591-149.171.126.6-25-tcp,0,0,0,0,0,0
44723,44723,59.166.0.0-12790-149.171.126.2-25-tcp,0,0,0,0,0,0
44724,44724,59.166.0.1-41608-149.171.126.1-25-tcp,0,0,0,0,0,0


In [5]:
for key, df in df_dict.items():
    df.drop('Unnamed: 0', axis=1, inplace=True)

## Data Exploration

In [6]:
for key, df in df_dict.items():
    temp = df[df.duplicated()]['id'].unique()
    print(f'There are {len(temp)} duplicates data in df {key}')

There are 203 duplicates data in df pred_0.007
There are 203 duplicates data in df pred_0.0


In [7]:
for key, df in df_dict.items():
    count = 0
    for t in temp:
        srs = df[df['id']==t][['b_mean', 'b_iqr', 'b_zscore', 'f_mean', 'f_iqr', 'f_zscore' ]].nunique()
        if (srs > 1).any():
            print(t)
            count += 1
            break
        else:
            continue
    if count == 0:
        print(f'All duplicated values in df {key} have the same prediction')
    else:
        print(f'Not all duplicated values in df {key} have the same prediction')

175.45.176.0-51774-149.171.126.17-25-tcp
Not all duplicated values in df pred_0.007 have the same prediction
175.45.176.0-26872-149.171.126.11-25-tcp
Not all duplicated values in df pred_0.0 have the same prediction


## Data Pre-Processing

In [26]:
def preprocess(df):
    df = df.sort_values(by='id')
    df.drop_duplicates(inplace=True)
    return df

In [27]:
for key, df in df_dict.items():
    df = preprocess(df)

### Merge DF

In [8]:
df_merge = dict()
x = df_dict['pred_0.0']
for key, df in df_dict.items():
    if key == 'pred_0.0':
        continue
    else:
        df_all = pd.merge(x, df, on='id', how='inner')
        df_all = df_all.sort_values(by='id')
        df_merge[key] = df_all

- Plot F1-Score untuk tiap tiap threshold
- Cari trend regresinya, Hitung gradiennya untuk tiap threshold.
- Dilakukan untuk setiap protokol
- Rekap McNemar

- Vectorizing:


## Run McNemar Test

In [9]:
def mcnemar_test(df, x , y, key):
    a = len(df[(df[x]==0) & (df[y]==0)])
    b = len(df[(df[x]==0) & (df[y]==1)])
    c = len(df[(df[x]==1) & (df[y]==0)])
    d = len(df[(df[x]==1) & (df[y]==1)])

    table = np.array([
    [a,b],
    [c,d]
    ])

    print(key)
    print(table)
    result = mcnemar(table)
    print(result)
    return result.pvalue


### For Binary Score

#### For B-Mean

In [10]:
result = {}
result['b-mean'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'b_mean_x', 'b_mean_y', key)
    result['b-mean'][key] = test

pred_0.007
[[40306    23]
 [ 1025  4007]]
pvalue      6.048441117783433e-269
statistic   23.0


#### For B-IQR

In [11]:
result['b-iqr'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'b_iqr_x', 'b_iqr_y', key)
    result['b-iqr'][key] = test

pred_0.007
[[38768  1048]
 [  563  4982]]
pvalue      5.614303446682554e-34
statistic   563.0


#### For B-ZScore

In [12]:
result['b-zscore'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'b_zscore_x', 'b_zscore_y', key)
    result['b-zscore'][key] = test

pred_0.007
[[39224   592]
 [  586  4959]]
pvalue      0.8841826652103992
statistic   586.0


### For Floating Score

#### For F-Mean

In [13]:
result['f-mean'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'f_mean_x', 'f_mean_y', key)
    result['f-mean'][key] = test

pred_0.007
[[40581    42]
 [ 1331  3407]]
pvalue      0.0
statistic   42.0


#### For F-IQR

In [14]:
result['f-iqr'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'f_iqr_x', 'f_iqr_y', key)
    result['f-iqr'][key] = test

pred_0.007
[[39852   363]
 [  575  4571]]
pvalue      4.5688415963372025e-12
statistic   363.0


#### For F-ZScore

In [15]:
result['f-zscore'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'f_zscore_x', 'f_zscore_y', key)
    result['f-zscore'][key] = test

pred_0.007
[[40135   355]
 [  406  4465]]
pvalue      0.06983913657230421
statistic   355.0


## Saving Test Result

In [16]:
mcnemar_result = pd.DataFrame.from_dict(result).sort_index(axis=0)
mcnemar_result

Unnamed: 0,b-mean,b-iqr,b-zscore,f-mean,f-iqr,f-zscore
pred_0.007,6.048441e-269,5.614303e-34,0.884183,0.0,4.568842e-12,0.069839


In [17]:
mcnemar_result.to_excel('./smtp-mcnemar.xlsx')