# Imports

In [1]:
# seperação do dataset de forma balanceada 
from sklearn.svm import SVC
from sklearn.naive_bayes import ComplementNB
from modules.preprocessing import preprocessing
from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold, cross_val_score 

import pandas as pd

# Global variables

In [2]:
svm = SVC(class_weight='balanced', gamma='auto')
stratified_method = StratifiedKFold(n_splits=5)

# Spliting proportionally the whole dataset into a training and testing dataset

In [3]:
def spliting_train_test(dataset):
    
    X = dataset.drop(columns=['y']).values
    y = dataset.y
    sss = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
    sss.get_n_splits(X, y)

    for train_index, test_index in sss.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
    return X_train, X_test, y_train, y_test

# Function to find the delta

In [4]:
def find_delta(csv_path):
    
    # reading the raw dataset
    packages = pd.read_csv(csv_path, low_memory=False, index_col=[0])
    
    # defining the values of delta
    deltas = ['1ms', '10ms', '50ms', '100ms', '300ms', '500ms', '900ms']
    columns = ['Delta', 'F1 Score (mean)', 'F1 Score (std)', 'Cross Val Output']
    results = list()
    
    for count, delta in enumerate(deltas):
        
        porcentage = count + 1 / len(deltas)
        porcentage = round(porcentage * 100, 2)
        
        print('{}% complete'.format(porcentage))
        
        # creating preprocessing dataset
        dataset = preprocessing(packages, delta)
        # spliting proportionally the whole dataset into a training and testing dataset
        X_train, _, y_train, _ = spliting_train_test(dataset)
        # applying the cross validation with 5 folds, using the metric f1 score
        f1_score = cross_val_score(svm, X_train, y_train, scoring='f1', cv=stratified_method, n_jobs=-1)
        # the f1 score's mean and std
        mean, std = f1_score.mean(), f1_score.std()
    
        # gathering the results into a tuple
        results.append([delta, mean, std, [f1_score]])
        
    return pd.DataFrame(results, index=deltas, columns=columns)

In [5]:
metrics = find_delta('SUEE1_TCP.csv')

  mask |= (ar1 == a)


14.29% complete
114.29% complete
214.29% complete
314.29% complete
414.29% complete
514.29% complete
614.29% complete


In [6]:
metrics

Unnamed: 0,Delta,F1 Score (mean),F1 Score (std),Cross Val Output
1ms,1ms,0.711687,0.004455,"[[0.7106325706594886, 0.7178106774338269, 0.70..."
10ms,10ms,0.711687,0.004455,"[[0.7106325706594886, 0.7178106774338269, 0.70..."
50ms,50ms,0.711687,0.004455,"[[0.7106325706594886, 0.7178106774338269, 0.70..."
100ms,100ms,0.711687,0.004455,"[[0.7106325706594886, 0.7178106774338269, 0.70..."
300ms,300ms,0.711687,0.004455,"[[0.7106325706594886, 0.7178106774338269, 0.70..."
500ms,500ms,0.711687,0.004455,"[[0.7106325706594886, 0.7178106774338269, 0.70..."
900ms,900ms,0.711687,0.004455,"[[0.7106325706594886, 0.7178106774338269, 0.70..."


In [7]:
metrics.to_csv("results_ms.csv")

In [8]:
!cat results.csv

,Delta,F1 Score (mean),F1 Score (std),Cross Val Output
1s,1s,0.7116871021724877,0.004455124498984987,"[array([0.71063257, 0.71781068, 0.70451498, 0.71087055, 0.71460674])]"
5s,5s,0.7028181831267947,0.01029761714208972,"[array([0.69482451, 0.68888889, 0.70215311, 0.71122011, 0.7170043 ])]"
20s,20s,0.6328360824404731,0.012944250879458427,"[array([0.65199161, 0.6266531 , 0.61752988, 0.62411348, 0.64389234])]"
30s,30s,0.567365877152232,0.013684657134916891,"[array([0.58563536, 0.54520167, 0.56368564, 0.56527591, 0.57703081])]"
40s,40s,0.5896291814974532,0.014747131823875161,"[array([0.57757296, 0.59777424, 0.58282209, 0.61463415, 0.57534247])]"
50s,50s,0.5659127504037298,0.022483178836025703,"[array([0.57093426, 0.53833605, 0.54362416, 0.5773913 , 0.59927798])]"
60s,60s,0.5116284166966183,0.018019733379785494,"[array([0.5243129 , 0.53362256, 0.51804671, 0.48425197, 0.49790795])]"
