In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import AffinityPropagation
from sklearn.metrics import classification_report,f1_score
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ParameterGrid

In [2]:
train = pd.read_csv('data_source/train.csv')
test = pd.read_csv('data_source/test.csv')

x_train_byte = train.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_train_byte = train.loc[:,'Action']
x_test_byte = test.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)','Bytes Received']]
y_test_byte = test.loc[:,'Action']

In [3]:
scaler = RobustScaler()
x_train_byte.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = scaler.fit_transform(x_train_byte.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])
x_test_byte.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']] = scaler.fit_transform(x_test_byte.loc[:,['Packets','Elapsed Time (sec)','Bytes Received']])

# y_train_byte.replace({'allow':0, 'deny':1, 'drop':2, 'reset-both':3},inplace=True)
# y_test_byte.replace({'allow':0, 'deny':1, 'drop':2, 'reset-both':3},inplace=True)

In [4]:
parameters = [{'damping': np.arange(0.5, 1.0, 0.1),
              'max_iter': [200, 500],
              'preference': np.arange(-50, -30, 1)}]

In [5]:
grid = ParameterGrid(parameters)
best_score = 0
best_params = {}

for params in grid:
    clf = AffinityPropagation(**params)
    clf.fit(x_train_byte, y_train_byte)
    y_pred = clf.predict(x_train_byte)
    f1 = f1_score(y_train_byte, y_pred,average='weighted')
    if f1 > best_score:
        best_score = f1
        best_params = params

print("Best parameters:", best_params)
print("Best f1 score:", best_score)