In [4]:
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report

def load_dataset(path):
    # parse the dataset
    df = pd.read_csv(path)

    ################ distilled from notebook 0 ################
    # check the integrity
    assert df.isna().any().any() == False, 'There is at least one missing value.'
    assert df['Timestamp'].is_monotonic_increasing, 'Timestamp is not sorted.'

    # type-cast
    df['abstime'] = pd.to_datetime(df['Timestamp'], unit='s').round('us')
    df['monotime'] = df['Timestamp'] - df['Timestamp'].min()
    df['aid_int'] = df['Arbitration_ID'].map(lambda x: int(x, 16))
    df['y'] = df['Class'].map({'Normal': 0, 'Attack': 1})

    ################ distilled from notebook 1 ################
    # calculate the stream-wise timedelta
    df['Timedelta'] = df.groupby('Arbitration_ID')['Timestamp'].diff()

    return df


df_submit1 = load_dataset('0_Preliminary/1_Submission/Pre_submit_D.csv')
df_submit2 = load_dataset('0_Preliminary/1_Submission/Pre_submit_S.csv')

Implement the `detect()` function. Then, submit the source in a text format (*.txt, *.py). You can download the Python file by menu `File - Download as - Python`.

In [9]:
def detect(df):
    df = df.copy()
    df['y_predicted'] = 0
    
    ########## implement your detection routine here ##########
    

    
    
    
    ###########################################################
    
    abstime_ceil = df['abstime'].dt.ceil('10ms')
    y = df.groupby(abstime_ceil)['y'].max()
    y_predicted = df.groupby(abstime_ceil)['y_predicted'].max()
    return y, y_predicted
    
    


y, y_predicted = detect(df_submit1)
print('Pre_submit_D.csv')
print(confusion_matrix(y, y_predicted))
print(classification_report(y, y_predicted, zero_division=0))

y, y_predicted = detect(df_submit2)
print('Pre_submit_S.csv')
print(confusion_matrix(y, y_predicted))
print(classification_report(y, y_predicted, zero_division=0))

Pre_submit_D.csv
[[39977     0]
 [34810     0]]
              precision    recall  f1-score   support

           0       0.53      1.00      0.70     39977
           1       0.00      0.00      0.00     34810

    accuracy                           0.53     74787
   macro avg       0.27      0.50      0.35     74787
weighted avg       0.29      0.53      0.37     74787

Pre_submit_S.csv
[[33953     0]
 [30912     0]]
              precision    recall  f1-score   support

           0       0.52      1.00      0.69     33953
           1       0.00      0.00      0.00     30912

    accuracy                           0.52     64865
   macro avg       0.26      0.50      0.34     64865
weighted avg       0.27      0.52      0.36     64865

