In [99]:
# Imports

import numpy as np
import pandas as pd

from sklearn.cluster import KMeans
from scipy.stats import beta as beta_dist
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [85]:
tuesday = pd.read_csv('C:/Users/AntonioPayá/Universidad de Oviedo/Sergio Arroni del Riego - TFG/Datasets/CIC-IDS-2017/MachineLearningCSV/MachineLearningCVE/Tuesday-WorkingHours.pcap_ISCX.csv')

In [86]:
# Remove Infinites
tuesday = tuesday.replace([np.inf, -np.inf], np.nan)
# Clear NaN values
tuesday = tuesday.dropna()
# Convert the ' Label' column to a scalar
tuesday[' Label'] = tuesday[' Label'].apply(lambda x: 0 if x == 'BENIGN' else 1)

In [87]:
# Split dataset into features and labels
X = tuesday.iloc[:, :-1].values
y = tuesday.iloc[:, -1].values

In [88]:
# Split dataset into training and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4, random_state = 0)

# Split X_test and y_test into X_test, X_test2 and y_test, y_test2
X_test, X_test2, y_test, y_test2 = train_test_split(X_test, y_test, test_size = 0.5, random_state = 0)

In [89]:
# Defining a class for the MAB problem
class MAB:
    # Initializing the class with the number of arms (classifiers) and the dataset
    def __init__(self, n_arms, X_train, y_train, X_test, y_test):
        self.n_arms = n_arms # Number of arms (classifiers)
        self.X_train = X_train # Training features
        self.y_train = y_train # Training labels
        self.X_test = X_test # Testing features
        self.y_test = y_test # Testing labels
        self.classifiers = [RandomForestClassifier(), GaussianNB(), SVC(), LogisticRegression()] # List of classifiers to choose from
        self.alpha = np.ones(n_arms) # Prior alpha parameter for Beta distribution (initially 1 for each arm)
        self.beta = np.ones(n_arms) # Prior beta parameter for Beta distribution (initially 1 for each arm)
        self.theta = np.zeros(n_arms) # Posterior mean reward for each arm (initially 0 for each arm)
    
    # A method to train the MAB with Thompson sampling
    def train(self):
        # Looping over each arm (classifier)
        for i in range(self.n_arms):
            # Sampling a theta value from the Beta distribution with current alpha and beta parameters
            self.theta[i] = np.random.beta(self.alpha[i], self.beta[i])
        
        # Choosing the arm with the highest theta value
        chosen_arm = np.argmax(self.theta)

        # Fitting the chosen classifier on the training data
        self.classifiers[chosen_arm].fit(self.X_train, self.y_train)

        # Predicting on the testing data
        y_pred = self.classifiers[chosen_arm].predict(self.X_test)

        # Calculating the accuracy score as the reward
        reward = np.mean(y_pred == self.y_test)

        # Updating the alpha and beta parameters based on the reward
        if reward == 1: # If correctly classified, increase alpha by 1
            self.alpha[chosen_arm] += 1 
        else: # If incorrectly classified, increase beta by 1
            self.beta[chosen_arm] += 1
    
    # A method to test the MAB on new data
    def test(self, X_test2, y_test2):
        # Choosing the arm with the highest posterior mean reward (theta value)
        chosen_arm = np.argmax(self.theta)

        # Predicting on the new testing data using the chosen classifier
        y_pred2 = self.classifiers[chosen_arm].predict(X_test2)

        # Calculating and returning the accuracy score on the new testing data
        accuracy = np.mean(y_pred2 == y_test2)
        return accuracy, self.classifiers[chosen_arm]

In [90]:
# Instantiate a ThompsonMABClassifier and fit it to the training data
#mab = MAB(n_arms=4, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
#mab.train()

In [91]:
# Test the MAB on the new testing data
#accuracy, chosen_arm = mab.test(X_test2, y_test2)
#print('Accuracy: ', accuracy, 'Chosen arm: ', chosen_arm)

In [105]:
class MultiArmedBanditThompsonSampling:
    
    def __init__(self, n_arms, n_clusters):
        self.n_arms = n_arms
        self.n_clusters = n_clusters
        self.arms = [RandomForestClassifier(), LogisticRegression(), GaussianNB()]
        self.cluster_centers = None
        self.cluster_assignments = None
        self.reward_sums = {}
        for cluster in range(n_clusters):
            self.reward_sums[cluster] = np.zeros(n_arms)
        self.alpha = np.ones(self.n_arms)
        self.beta = np.ones(self.n_arms)
        
    def train(self, X_train, y_train):
        kmeans = KMeans(n_clusters=self.n_clusters)
        self.cluster_assignments = kmeans.fit_predict(X_train)
        self.cluster_centers = kmeans.cluster_centers_
        # Print the number of samples in each cluster
        for i in range(self.n_clusters):
            print('Cluster {}: {}'.format(i, np.sum(self.cluster_assignments == i)))
        for i in range(self.n_clusters):
            cluster_mask = self.cluster_assignments == i
            cluster_X_train = X_train[cluster_mask]
            cluster_y_train = y_train[cluster_mask]
            for arm in range(self.n_arms):
                print('Training arm {} on cluster {}'.format(arm, i))
                arm_mask = cluster_y_train == arm
                arm_X_train = cluster_X_train[arm_mask]
                arm_y_train = cluster_y_train[arm_mask]
                if len(arm_X_train) > 0 and len(np.unique(arm_y_train)) > 1:
                    self.arms[arm].fit(arm_X_train, arm_y_train)
                else:
                    self.arms[arm].fit(X_train, y_train)

        # Set the arms rewards for each cluster
        for i in range(self.n_clusters):
            cluster_mask = self.cluster_assignments == i
            cluster_X_test = X_train[cluster_mask]
            cluster_y_test = y_train[cluster_mask]
            for arm in range(self.n_arms):
                print('Setting reward_sums arm {} on cluster {}'.format(arm, i))
                arm_mask = cluster_y_test == arm
                arm_X_test = cluster_X_test[arm_mask]
                arm_y_test = cluster_y_test[arm_mask]
                if len(arm_X_test) > 0:
                    arm_y_pred = self.arms[arm].predict(arm_X_test)
                    self.reward_sums[i][arm] = np.mean(arm_y_pred == arm_y_test)

                
    def select_arm(self, cluster):
        # Select the arm with the highest reward
        theta = np.zeros(self.n_arms)
        for arm in range(self.n_arms):
            theta[arm] = np.random.beta(self.alpha[arm] + self.reward_sums[cluster][arm], self.beta[arm] + 1 - self.reward_sums[cluster][arm])
        return np.argmax(theta)
    
    def predict(self, X_test):
        # Select the arm for each sample
        arms = np.zeros(len(X_test))
        for i in range(len(X_test)):
            cluster = np.argmin(np.linalg.norm(self.cluster_centers - X_test[i], axis=1))
            arms[i] = self.select_arm(cluster)
        # Predict using the selected arm
        y_pred = np.zeros(len(X_test))
        for arm in range(self.n_arms):
            arm_mask = arms == arm
            arm_X_test = X_test[arm_mask]
            if len(arm_X_test) > 0:
                y_pred[arm_mask] = self.arms[arm].predict(arm_X_test)
        return y_pred, arms


In [106]:
# Train the MAB
mab = MultiArmedBanditThompsonSampling(n_arms=3, n_clusters=2)
mab.train(X_train, y_train)



Cluster 0: 240747
Cluster 1: 26640
Training arm 0 on cluster 0
Training arm 1 on cluster 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Training arm 2 on cluster 0
Training arm 0 on cluster 1
Training arm 1 on cluster 1


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Training arm 2 on cluster 1
Setting reward_sums arm 0 on cluster 0
Setting reward_sums arm 1 on cluster 0
Setting reward_sums arm 2 on cluster 0
Setting reward_sums arm 0 on cluster 1
Setting reward_sums arm 1 on cluster 1
Setting reward_sums arm 2 on cluster 1


In [107]:
# Test the MAB
y_pred, selected_arms = mab.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9480303829281155
              precision    recall  f1-score   support

           0       1.00      0.95      0.97     86422
           1       0.35      0.86      0.50      2707

    accuracy                           0.95     89129
   macro avg       0.67      0.91      0.74     89129
weighted avg       0.98      0.95      0.96     89129



In [108]:
# Test the MAB
y_pred2, selected_arms = mab.predict(X_test2)
print("Accuracy:", accuracy_score(y_test2, y_pred2))
print(classification_report(y_test2, y_pred2))

Accuracy: 0.9490401552805484
              precision    recall  f1-score   support

           0       1.00      0.95      0.97     86323
           1       0.37      0.86      0.52      2806

    accuracy                           0.95     89129
   macro avg       0.68      0.91      0.74     89129
weighted avg       0.98      0.95      0.96     89129



In [109]:
for i in range(y_pred.shape[0]):
    print("Selected arm:", selected_arms[i], "Predicted:", y_pred[i], "Actual:", y_test[i])

Selected arm: 2.0 Predicted: 0.0 Actual: 0
Selected arm: 0.0 Predicted: 0.0 Actual: 0
Selected arm: 0.0 Predicted: 0.0 Actual: 0
Selected arm: 0.0 Predicted: 0.0 Actual: 0
Selected arm: 1.0 Predicted: 0.0 Actual: 0
Selected arm: 0.0 Predicted: 0.0 Actual: 0
Selected arm: 0.0 Predicted: 0.0 Actual: 0
Selected arm: 0.0 Predicted: 1.0 Actual: 1
Selected arm: 0.0 Predicted: 0.0 Actual: 0
Selected arm: 0.0 Predicted: 0.0 Actual: 0
Selected arm: 0.0 Predicted: 0.0 Actual: 0
Selected arm: 0.0 Predicted: 1.0 Actual: 0
Selected arm: 0.0 Predicted: 0.0 Actual: 0
Selected arm: 1.0 Predicted: 1.0 Actual: 0
Selected arm: 2.0 Predicted: 0.0 Actual: 0
Selected arm: 2.0 Predicted: 0.0 Actual: 0
Selected arm: 2.0 Predicted: 0.0 Actual: 0
Selected arm: 0.0 Predicted: 0.0 Actual: 0
Selected arm: 0.0 Predicted: 0.0 Actual: 0
Selected arm: 0.0 Predicted: 0.0 Actual: 0
Selected arm: 0.0 Predicted: 1.0 Actual: 1
Selected arm: 2.0 Predicted: 0.0 Actual: 0
Selected arm: 1.0 Predicted: 0.0 Actual: 0
Selected ar