# McNemar Test

## Import All Necessary Libraries

In [1]:
import pandas as pd
import numpy as np
from statsmodels.stats.contingency_tables import mcnemar 
import os

## Read All the Data

In [2]:
dir = '/Users/angelaoryza/Documents/TA/noisy-rnnids/rnnids-py/results/vector/mcnemar/http'
subdir = os.listdir(dir)

In [3]:
df_dict = {}
for file in subdir:
    if file.endswith('.csv'):
        df = pd.read_csv(f'./{file}')
        file = file.split('.csv')[0]
        df_dict[file] = df

In [4]:
df_dict['lstm_pred_0.0']

Unnamed: 0.1,Unnamed: 0,id,b_mean,b_iqr,b_zscore,f_zscore,f_mean,f_iqr
0,0,175.45.176.1-4657-149.171.126.18-80-tcp,1,1,1,0,0,1
1,1,175.45.176.3-32473-149.171.126.18-80-tcp,0,1,1,0,0,0
2,2,175.45.176.0-21873-149.171.126.14-80-tcp,0,1,1,0,0,0
3,3,175.45.176.0-49194-149.171.126.17-80-tcp,1,1,1,1,0,1
4,4,175.45.176.1-51435-149.171.126.14-80-tcp,1,1,1,1,0,1
...,...,...,...,...,...,...,...,...
154327,154327,149.171.126.15-80-175.45.176.2-20096-tcp,0,1,1,0,0,0
154328,154328,59.166.0.7-25671-149.171.126.9-80-tcp,0,0,0,0,0,0
154329,154329,59.166.0.6-24097-149.171.126.7-80-tcp,0,0,0,0,0,0
154330,154330,59.166.0.6-22144-149.171.126.3-80-tcp,0,0,0,0,0,0


In [5]:
for key, df in df_dict.items():
    df.drop('Unnamed: 0', axis=1, inplace=True)

## Data Exploration

In [6]:
for key, df in df_dict.items():
    temp = df[df.duplicated()]['id'].unique()
    print(f'There are {len(temp)} duplicates data in df {key}')

There are 30764 duplicates data in df lstm_pred_0.002
There are 30828 duplicates data in df bi-lstm.001
There are 30788 duplicates data in df lstm_pred_0.003
There are 30612 duplicates data in df lstm_pred_0.001
There are 30546 duplicates data in df lstm_pred_0.0
There are 30609 duplicates data in df lstm_pred_0.004
There are 30656 duplicates data in df lstm_pred_0.005
There are 30612 duplicates data in df lstm.001
There are 30546 duplicates data in df bi-lstm_pred_0.0
There are 30656 duplicates data in df bi-lstm_pred_0.005
There are 30891 duplicates data in df bi-lstm_pred_0.004
There are 30608 duplicates data in df bi-lstm_pred_0.003
There are 30845 duplicates data in df bi-lstm_pred_0.002
There are 26540 duplicates data in df bi-lstm_pred_0.001


In [7]:
for key, df in df_dict.items():
    count = 0
    for t in temp:
        srs = df[df['id']==t][['b_mean', 'b_iqr', 'b_zscore', 'f_mean', 'f_iqr', 'f_zscore' ]].nunique()
        if (srs > 1).any():
            print(t)
            count += 1
            break
        else:
            continue
    if count == 0:
        print(f'All duplicated values in df {key} have the same prediction')
    else:
        print(f'Not all duplicated values in df {key} have the same prediction')

59.166.0.8-12114-149.171.126.7-80-tcp
Not all duplicated values in df lstm_pred_0.002 have the same prediction
59.166.0.8-12114-149.171.126.7-80-tcp
Not all duplicated values in df bi-lstm.001 have the same prediction
149.171.126.0-80-59.166.0.9-14582-tcp
Not all duplicated values in df lstm_pred_0.003 have the same prediction
149.171.126.0-80-59.166.0.9-14582-tcp
Not all duplicated values in df lstm_pred_0.001 have the same prediction
149.171.126.0-80-59.166.0.9-14582-tcp
Not all duplicated values in df lstm_pred_0.0 have the same prediction
149.171.126.0-80-59.166.0.9-14582-tcp
Not all duplicated values in df lstm_pred_0.004 have the same prediction
149.171.126.0-80-59.166.0.9-14582-tcp
Not all duplicated values in df lstm_pred_0.005 have the same prediction
149.171.126.0-80-59.166.0.9-14582-tcp
Not all duplicated values in df lstm.001 have the same prediction
149.171.126.0-80-59.166.0.9-14582-tcp
Not all duplicated values in df bi-lstm_pred_0.0 have the same prediction
149.171.126.0

## Data Pre-Processing

In [8]:
def preprocess(df):
    df = df.sort_values(by='id')
    df.drop_duplicates(inplace=True)
    return df

In [9]:
for key, df in df_dict.items():
    df = preprocess(df)

### Merge DF

In [10]:
df_merge = dict()
x = df_dict['bi-lstm_pred_0.0']
for key, df in df_dict.items():
    if key == 'bi-lstm_pred_0.0':
        continue
    else:
        df_all = pd.merge(x, df, on='id', how='inner')
        df_all = df_all.sort_values(by='id')
        df_merge[key] = df_all

- Plot F1-Score untuk tiap tiap threshold
- Cari trend regresinya, Hitung gradiennya untuk tiap threshold.
- Dilakukan untuk setiap protokol
- Rekap McNemar

- Vectorizing:


## Run McNemar Test

In [11]:
def mcnemar_test(df, x , y, key):
    a = len(df[(df[x]==0) & (df[y]==0)])
    b = len(df[(df[x]==0) & (df[y]==1)])
    c = len(df[(df[x]==1) & (df[y]==0)])
    d = len(df[(df[x]==1) & (df[y]==1)])

    table = np.array([
    [a,b],
    [c,d]
    ])

    print(key)
    print(table)
    result = mcnemar(table)
    print(result)
    return result.pvalue


### For Binary Score

#### For B-Mean

In [12]:
result = {}
result['b-mean'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'b_mean_x', 'b_mean_y', key)
    result['b-mean'][key] = test

lstm_pred_0.002
[[273217    259]
 [  4405  11031]]
pvalue      0.0
statistic   259.0
bi-lstm.001
[[273277    199]
 [  9489   5947]]
pvalue      0.0
statistic   199.0
lstm_pred_0.003
[[270079   3397]
 [   406  15030]]
pvalue      0.0
statistic   406.0
lstm_pred_0.001
[[273020    456]
 [  2514  12922]]
pvalue      0.0
statistic   456.0
lstm_pred_0.0
[[273351    125]
 [   125  15311]]
pvalue      1.0
statistic   125.0
lstm_pred_0.004
[[268985   4491]
 [   181  15255]]
pvalue      0.0
statistic   181.0
lstm_pred_0.005
[[273410     66]
 [ 12676   2760]]
pvalue      0.0
statistic   66.0
lstm.001
[[273020    456]
 [  2514  12922]]
pvalue      0.0
statistic   456.0
bi-lstm_pred_0.005
[[273410     66]
 [ 12676   2760]]
pvalue      0.0
statistic   66.0
bi-lstm_pred_0.004
[[269598   3878]
 [   210  15226]]
pvalue      0.0
statistic   210.0
bi-lstm_pred_0.003
[[272789    687]
 [  2020  13416]]
pvalue      6.104149576652465e-151
statistic   687.0
bi-lstm_pred_0.002
[[273432     44]
 [  9333   6103]

#### For B-IQR

In [13]:
result['b-iqr'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'b_iqr_x', 'b_iqr_y', key)
    result['b-iqr'][key] = test

lstm_pred_0.002
[[172557    624]
 [  3027 112704]]
pvalue      0.0
statistic   624.0
bi-lstm.001
[[173011    170]
 [  4914 110817]]
pvalue      0.0
statistic   170.0
lstm_pred_0.003
[[172885    296]
 [  7986 107745]]
pvalue      0.0
statistic   296.0
lstm_pred_0.001
[[172460    721]
 [  4485 111246]]
pvalue      0.0
statistic   721.0
lstm_pred_0.0
[[172696    485]
 [   485 115246]]
pvalue      1.0
statistic   485.0
lstm_pred_0.004
[[172018   1163]
 [  6501 109230]]
pvalue      0.0
statistic   1163.0
lstm_pred_0.005
[[172881    300]
 [ 11110 104621]]
pvalue      0.0
statistic   300.0
lstm.001
[[172460    721]
 [  4485 111246]]
pvalue      0.0
statistic   721.0
bi-lstm_pred_0.005
[[172881    300]
 [ 11110 104621]]
pvalue      0.0
statistic   300.0
bi-lstm_pred_0.004
[[172621    560]
 [  6386 109345]]
pvalue      0.0
statistic   560.0
bi-lstm_pred_0.003
[[172953    228]
 [  8044 107687]]
pvalue      0.0
statistic   228.0
bi-lstm_pred_0.002
[[172616    565]
 [  3467 112264]]
pvalue      0.

#### For B-ZScore

In [14]:
result['b-zscore'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'b_zscore_x', 'b_zscore_y', key)
    result['b-zscore'][key] = test

lstm_pred_0.002
[[180580   2346]
 [ 84005  21981]]
pvalue      0.0
statistic   2346.0
bi-lstm.001
[[182744    182]
 [ 85333  20653]]
pvalue      0.0
statistic   182.0
lstm_pred_0.003
[[182542    384]
 [ 84996  20990]]
pvalue      0.0
statistic   384.0
lstm_pred_0.001
[[182556    370]
 [ 84988  20998]]
pvalue      0.0
statistic   370.0
lstm_pred_0.0
[[182633    293]
 [   293 105693]]
pvalue      1.0
statistic   293.0
lstm_pred_0.004
[[181997    929]
 [  1772 104214]]
pvalue      5.401400604080042e-60
statistic   929.0
lstm_pred_0.005
[[182877     49]
 [ 87282  18704]]
pvalue      0.0
statistic   49.0
lstm.001
[[182556    370]
 [ 84988  20998]]
pvalue      0.0
statistic   370.0
bi-lstm_pred_0.005
[[182877     49]
 [ 87282  18704]]
pvalue      0.0
statistic   49.0
bi-lstm_pred_0.004
[[182144    782]
 [  1785 104201]]
pvalue      2.3667171893648914e-89
statistic   782.0
bi-lstm_pred_0.003
[[182880     46]
 [ 85651  20335]]
pvalue      0.0
statistic   46.0
bi-lstm_pred_0.002
[[181977    949

### For Floating Score

#### For F-Mean

In [15]:
result['f-mean'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'f_mean_x', 'f_mean_y', key)
    result['f-mean'][key] = test

lstm_pred_0.002
[[286118    800]
 [  1351    643]]
pvalue      8.815045179101798e-33
statistic   800.0
bi-lstm.001
[[286240    678]
 [  1363    631]]
pvalue      9.27403252924602e-53
statistic   678.0
lstm_pred_0.003
[[274545  12373]
 [   284   1710]]
pvalue      0.0
statistic   284.0
lstm_pred_0.001
[[286237    681]
 [   133   1861]]
pvalue      2.1719735320025665e-89
statistic   133.0
lstm_pred_0.0
[[286893     25]
 [    25   1969]]
pvalue      1.0
statistic   25.0
lstm_pred_0.004
[[268875  18043]
 [   142   1852]]
pvalue      0.0
statistic   142.0
lstm_pred_0.005
[[274736  12182]
 [  1540    454]]
pvalue      0.0
statistic   1540.0
lstm.001
[[286237    681]
 [   133   1861]]
pvalue      2.1719735320025665e-89
statistic   133.0
bi-lstm_pred_0.005
[[274736  12182]
 [  1540    454]]
pvalue      0.0
statistic   1540.0
bi-lstm_pred_0.004
[[275640  11278]
 [   208   1786]]
pvalue      0.0
statistic   208.0
bi-lstm_pred_0.003
[[270641  16277]
 [   262   1732]]
pvalue      0.0
statistic   2

#### For F-IQR

In [16]:
result['f-iqr'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'f_iqr_x', 'f_iqr_y', key)
    result['f-iqr'][key] = test

lstm_pred_0.002
[[266872   4470]
 [  3403  14167]]
pvalue      2.402732039152632e-33
statistic   3403.0
bi-lstm.001
[[270155   1187]
 [ 11820   5750]]
pvalue      0.0
statistic   1187.0
lstm_pred_0.003
[[265796   5546]
 [  2146  15424]]
pvalue      0.0
statistic   2146.0
lstm_pred_0.001
[[266628   4714]
 [  9663   7907]]
pvalue      0.0
statistic   4714.0
lstm_pred_0.0
[[271224    118]
 [   118  17452]]
pvalue      1.0
statistic   118.0
lstm_pred_0.004
[[265161   6181]
 [  1553  16017]]
pvalue      0.0
statistic   1553.0
lstm_pred_0.005
[[266319   5023]
 [  3830  13740]]
pvalue      6.908650969379703e-37
statistic   3830.0
lstm.001
[[266628   4714]
 [  9663   7907]]
pvalue      0.0
statistic   4714.0
bi-lstm_pred_0.005
[[266319   5023]
 [  3830  13740]]
pvalue      6.908650969379703e-37
statistic   3830.0
bi-lstm_pred_0.004
[[265544   5798]
 [  1749  15821]]
pvalue      0.0
statistic   1749.0
bi-lstm_pred_0.003
[[265471   5871]
 [  2218  15352]]
pvalue      0.0
statistic   2218.0
bi-ls

#### For F-ZScore

In [17]:
result['f-zscore'] = {}
for key, df in df_merge.items():
    test = mcnemar_test(df, 'f_zscore_x', 'f_zscore_y', key)
    result['f-zscore'][key] = test

lstm_pred_0.002
[[272462   4798]
 [  1745   9907]]
pvalue      1e-323
statistic   1745.0
bi-lstm.001
[[276787    473]
 [  7972   3680]]
pvalue      0.0
statistic   473.0
lstm_pred_0.003
[[267886   9374]
 [   322  11330]]
pvalue      0.0
statistic   322.0
lstm_pred_0.001
[[275024   2236]
 [  7453   4199]]
pvalue      0.0
statistic   2236.0
lstm_pred_0.0
[[277095    165]
 [   165  11487]]
pvalue      1.0
statistic   165.0
lstm_pred_0.004
[[267487   9773]
 [   187  11465]]
pvalue      0.0
statistic   187.0
lstm_pred_0.005
[[270449   6811]
 [  2350   9302]]
pvalue      0.0
statistic   2350.0
lstm.001
[[275024   2236]
 [  7453   4199]]
pvalue      0.0
statistic   2236.0
bi-lstm_pred_0.005
[[270449   6811]
 [  2350   9302]]
pvalue      0.0
statistic   2350.0
bi-lstm_pred_0.004
[[267703   9557]
 [   303  11349]]
pvalue      0.0
statistic   303.0
bi-lstm_pred_0.003
[[268244   9016]
 [   432  11220]]
pvalue      0.0
statistic   432.0
bi-lstm_pred_0.002
[[272741   4519]
 [   481  11171]]
pvalue 

## Saving Test Result

In [18]:
mcnemar_result = pd.DataFrame.from_dict(result).sort_index(axis=0)
mcnemar_result

Unnamed: 0,b-mean,b-iqr,b-zscore,f-mean,f-iqr,f-zscore
bi-lstm.001,0.0,0.0,0.0,9.274033e-53,0.0,0.0
bi-lstm_pred_0.001,5.163719e-93,0.0,0.0,2.1001220000000003e-120,0.0,0.0
bi-lstm_pred_0.002,0.0,0.0,0.0,7.580781e-54,1.860243e-40,0.0
bi-lstm_pred_0.003,6.10415e-151,0.0,0.0,0.0,0.0,0.0
bi-lstm_pred_0.004,0.0,0.0,2.3667169999999998e-89,0.0,0.0,0.0
bi-lstm_pred_0.005,0.0,0.0,0.0,0.0,6.908651e-37,0.0
lstm.001,0.0,0.0,0.0,2.171974e-89,0.0,0.0
lstm_pred_0.0,1.0,1.0,1.0,1.0,1.0,1.0
lstm_pred_0.001,0.0,0.0,0.0,2.171974e-89,0.0,0.0
lstm_pred_0.002,0.0,0.0,0.0,8.815045e-33,2.402732e-33,1e-323


In [19]:
mcnemar_result.to_excel('./http-mcnemar-bi-lstm.xlsx')