# 1. Imports

In [23]:
import pandas as pd
import numpy as np
import glob
import os

# 2. Importing the Data

In [17]:
def importAllMethods(folder, generic, year, assignmentDataset=False):
    path_to_ranking = r'../baseline_ranking/'+folder+'/ddos_'+generic+'_'+year+'_*.csv'
    
    if assignmentDataset:
        path_to_ranking = r'../baseline_ranking/'+folder+'/'+generic+'*.csv'

    dataframes_generic = []
    methods = []

    for file in glob.glob(path_to_ranking):
        print(f"Processing file: {file}")
        if assignmentDataset:
            method = file.split('\\')[1].split('_')[1].split('.')[0]
        else:
            method = file.split('\\')[1].split('_')[3].split('.')[0]
        methods.append(method)
        df = pd.read_csv(file, index_col=0)
        df = df[~df['feature'].str.contains('Unnamed', na=False)]
        dataframes_generic.append(df)

    file = '../stef_ranking/'+folder+'/ddos_'+generic+'_'+year+'.csv'
    if assignmentDataset:
        file = '../stef_ranking/'+folder+'/'+generic+'.csv'
    print(f"Processing file: {file}")
    stef_rank = pd.read_csv(file, index_col=0)
    dataframes_generic.append(stef_rank)
    methods.append('STEF-Rank')

    return (dataframes_generic, methods)

In [93]:
(dataframes_portmap, methods_portmap) = importAllMethods('CSE_CIC_IDS2018', 'portmap', '2018')

Processing file: ../baseline_ranking/CSE_CIC_IDS2018\ddos_portmap_2018_BackwardElimination.csv
Processing file: ../baseline_ranking/CSE_CIC_IDS2018\ddos_portmap_2018_MutualInformation.csv
Processing file: ../baseline_ranking/CSE_CIC_IDS2018\ddos_portmap_2018_RFE.csv
Processing file: ../baseline_ranking/CSE_CIC_IDS2018\ddos_portmap_2018_SelectKBest.csv
Processing file: ../baseline_ranking/CSE_CIC_IDS2018\ddos_portmap_2018_VarianceThreshold.csv
Processing file: ../baseline_ranking/CSE_CIC_IDS2018\ddos_portmap_2018_VIF.csv
Processing file: ../stef_ranking/CSE_CIC_IDS2018/ddos_portmap_2018.csv


In [94]:
(dataframes_syn, methods_syn) = importAllMethods('CIC_DDoS2019', 'syn', '2019')

Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_syn_2019_BackwardElimination.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_syn_2019_MutualInformation.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_syn_2019_RFE.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_syn_2019_SelectKBest.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_syn_2019_VarianceThreshold.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_syn_2019_VIF.csv
Processing file: ../stef_ranking/CIC_DDoS2019/ddos_syn_2019.csv


In [95]:
(dataframes_udp, methods_udp) = importAllMethods('CIC_DDoS2019', 'udp', '2019')

Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_udp_2019_BackwardElimination.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_udp_2019_MutualInformation.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_udp_2019_RFE.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_udp_2019_SelectKBest.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_udp_2019_VarianceThreshold.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_udp_2019_VIF.csv
Processing file: ../stef_ranking/CIC_DDoS2019/ddos_udp_2019.csv


In [96]:
(dataframes_netbios, methods_netbios) = importAllMethods('CIC_DDoS2019', 'netbios', '2019')

Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_netbios_2019_BackwardElimination.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_netbios_2019_MutualInformation.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_netbios_2019_RFE.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_netbios_2019_SelectKBest.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_netbios_2019_VarianceThreshold.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_netbios_2019_VIF.csv
Processing file: ../stef_ranking/CIC_DDoS2019/ddos_netbios_2019.csv


In [97]:
(dataframes_ldap, methods_ldap) = importAllMethods('CIC_DDoS2019', 'ldap', '2019')

Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_ldap_2019_BackwardElimination.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_ldap_2019_MutualInformation.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_ldap_2019_RFE.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_ldap_2019_SelectKBest.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_ldap_2019_VarianceThreshold.csv
Processing file: ../baseline_ranking/CIC_DDoS2019\ddos_ldap_2019_VIF.csv
Processing file: ../stef_ranking/CIC_DDoS2019/ddos_ldap_2019.csv


# 3. Putting it all Together

In [24]:
def puttingItAllTogether(dataframes_generic, methods, verbose):
    aggregated_data = []
    
    # Assuming all dataframes have the same 'feature' column
    features = dataframes_generic[0]['feature'].tolist()
    
    for current_feature in features:
        if current_feature != 'Label':
            temp = {'feature': current_feature}
            if verbose: print(current_feature)

            for temp_dataframe, current_method in zip(dataframes_generic, methods):
                if verbose: print(current_method)

                # Check if 'rank' column exists
                if 'rank' in temp_dataframe.columns:
                    current_ranking = temp_dataframe.loc[temp_dataframe['feature'] == current_feature, 'rank'].sum()
                    temp[current_method] = current_ranking
                else:
                    temp[current_method] = None  # or some default value like 0

            aggregated_data.append(temp)

    # Create DataFrame
    result_df = pd.DataFrame(aggregated_data)
    
    return result_df

In [99]:
master_portmap = puttingItAllTogether(dataframes_portmap, methods_portmap, verbose=False)
master_portmap.to_csv("../final_ranking/CSE_CIC_IDS2018/ddos_portmap_2018.csv")
master_portmap.head(5)

Unnamed: 0,feature,BackwardElimination,MutualInformation,RFE,SelectKBest,VarianceThreshold,VIF,STEF-Rank
0,DestinationPort,1,1,1,1,1,1,1.0
1,FlowDuration,1,1,0,0,1,0,0.566667
2,TotalFwdPackets,1,1,1,0,0,0,0.45
3,TotalBackwardPackets,1,1,1,0,0,0,0.516667
4,TotalLengthofFwdPackets,0,1,1,1,0,0,0.7


In [100]:
master_netbios = puttingItAllTogether(dataframes_netbios, methods_netbios, verbose=False)
master_netbios.to_csv("../final_ranking/CIC_DDoS2019/ddos_netbios_2019.csv")
master_netbios.head(5)

Unnamed: 0,feature,BackwardElimination,MutualInformation,RFE,SelectKBest,VarianceThreshold,VIF,STEF-Rank
0,FlowID,0,1,0,0,1,1,0.6
1,SourceIP,1,1,1,1,0,1,0.616667
2,SourcePort,1,1,0,1,1,1,0.733333
3,DestinationIP,1,1,1,0,0,1,0.65
4,DestinationPort,0,1,0,0,1,1,0.566667


In [101]:
master_syn = puttingItAllTogether(dataframes_syn, methods_syn, verbose=False)
master_syn.to_csv("../final_ranking/CIC_DDoS2019/ddos_syn_2019.csv")
master_syn.head(5)

Unnamed: 0,feature,BackwardElimination,MutualInformation,RFE,SelectKBest,VarianceThreshold,VIF,STEF-Rank
0,FlowID,1,1,0,1,1,1,0.7
1,SourceIP,0,1,1,1,0,1,0.666667
2,SourcePort,1,1,0,0,1,1,0.55
3,DestinationIP,0,1,1,0,0,1,0.55
4,DestinationPort,1,1,0,1,1,1,0.633333


In [102]:
master_udp = puttingItAllTogether(dataframes_udp, methods_udp, verbose=False)
master_udp.to_csv("../final_ranking/CIC_DDoS2019/ddos_udp_2019.csv")
master_udp.head(5)

Unnamed: 0,feature,BackwardElimination,MutualInformation,RFE,SelectKBest,VarianceThreshold,VIF,STEF-Rank
0,FlowID,1,1,0,1,1,1,0.8
1,SourceIP,1,1,1,1,1,1,0.8
2,SourcePort,1,1,0,1,1,1,0.8
3,DestinationIP,1,1,1,0,1,1,0.633333
4,DestinationPort,0,1,0,1,1,1,0.733333


In [103]:
master_ldap = puttingItAllTogether(dataframes_ldap, methods_ldap, verbose=False)
master_ldap.to_csv("../final_ranking/CIC_DDoS2019/ddos_ldap_2019.csv")
master_ldap.head(5)

Unnamed: 0,feature,BackwardElimination,MutualInformation,RFE,SelectKBest,VarianceThreshold,VIF,STEF-Rank
0,FlowID,1,1,0,0,1,1,0.566667
1,SourceIP,0,1,1,1,0,1,0.983333
2,SourcePort,0,1,1,1,1,1,0.75
3,DestinationIP,1,1,1,0,0,1,0.7
4,DestinationPort,1,1,0,0,1,1,0.533333


### Note: We will now need to select a threshold for the STEF-Rank feature! Such as 0.5

# 4. Repeating with Assignment Datasets

In [19]:
(dataframes_a2, methods_a2) = importAllMethods('csi5388_assignment2_3_data', 'assignment2', '', assignmentDataset=True)

Processing file: ../baseline_ranking/csi5388_assignment2_3_data\assignment2_BackwardElimination.csv
Processing file: ../baseline_ranking/csi5388_assignment2_3_data\assignment2_MutualInformation.csv
Processing file: ../baseline_ranking/csi5388_assignment2_3_data\assignment2_RFE.csv
Processing file: ../baseline_ranking/csi5388_assignment2_3_data\assignment2_SelectKBest.csv
Processing file: ../baseline_ranking/csi5388_assignment2_3_data\assignment2_VarianceThreshold.csv
Processing file: ../baseline_ranking/csi5388_assignment2_3_data\assignment2_VIF.csv
Processing file: ../stef_ranking/csi5388_assignment2_3_data/assignment2.csv


In [20]:
(dataframes_a3, methods_a3) = importAllMethods('csi5388_assignment2_3_data', 'assignment3', '', assignmentDataset=True)

Processing file: ../baseline_ranking/csi5388_assignment2_3_data\assignment3_BackwardElimination.csv
Processing file: ../baseline_ranking/csi5388_assignment2_3_data\assignment3_MutualInformation.csv
Processing file: ../baseline_ranking/csi5388_assignment2_3_data\assignment3_RFE.csv
Processing file: ../baseline_ranking/csi5388_assignment2_3_data\assignment3_SelectKBest.csv
Processing file: ../baseline_ranking/csi5388_assignment2_3_data\assignment3_VarianceThreshold.csv
Processing file: ../baseline_ranking/csi5388_assignment2_3_data\assignment3_VIF.csv
Processing file: ../stef_ranking/csi5388_assignment2_3_data/assignment3.csv


In [25]:
master_a2 = puttingItAllTogether(dataframes_a2, methods_a2, verbose=False)
master_a2.to_csv("../final_ranking/csi5388_assignment2_3_data/assignment2.csv")
master_a2.head(5)

Unnamed: 0,feature,BackwardElimination,MutualInformation,RFE,SelectKBest,VarianceThreshold,VIF,STEF-Rank
0,duration,1,1,0,1,1,1,0.816667
1,protocol_type,1,1,1,0,1,1,0.833333
2,service,0,1,0,1,1,1,0.733333
3,flag,1,1,1,1,1,0,0.75
4,src_bytes,0,1,0,1,1,1,0.716667


In [26]:
master_a3 = puttingItAllTogether(dataframes_a3, methods_a3, verbose=False)
master_a3.to_csv("../final_ranking/csi5388_assignment2_3_data/assignment3.csv")
master_a3.head(5)

Unnamed: 0,feature,BackwardElimination,MutualInformation,RFE,SelectKBest,VarianceThreshold,VIF,STEF-Rank
0,length_longest_word,0,1,0,1,1,1,0.666667
1,FQDN_count,1,1,0,1,1,1,0.833333
2,sld_vec,0,1,0,0,0,1,0.333333
3,private_192,0,0,1,1,1,1,0.7
4,lower,1,1,0,1,1,1,0.833333
