In [1]:
"""
The code for testing the classifiers. Uses binary.csv and multiclass.csv

Loads feature lists and tuning ranges from pickles in CWD.

Change model types in models to tune on specific ones.

Writes output files.

Author: Wesley
"""

# Accelerates tuning of some classifiers
from sklearnex import patch_sklearn
patch_sklearn()

import pandas as pd
import numpy as np

from skopt import BayesSearchCV
from skopt.space import Categorical, Integer, Real

from sklearn.feature_selection import RFECV

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
from sklearn.svm import LinearSVC, SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

import pickle

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder

from sklearn.model_selection import StratifiedKFold

from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    accuracy_score
)

from time import time

from sklearn.metrics import classification_report

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
multiclass = pd.read_csv("multiclass_train.csv")
multiclass_test = pd.read_csv("multiclass_test.csv")

Preprocessing (make labels numeric)

In [3]:
# Encode attack labels to int and save as array to be used later.
le = LabelEncoder()
multiclass[" Label"] = le.fit_transform(multiclass[" Label"].values)
multiclass_test[" Label"] = le.transform(multiclass_test[" Label"].values)

multiclass_labels = []
print("\nMulticlass Label Encodings (in order of digits 0 -> n): ")
for i in range(0, len(list(set(list(multiclass[' Label']))))):
    multiclass_labels.append(le.inverse_transform([i])[0])

print(multiclass_labels)


Multiclass Label Encodings (in order of digits 0 -> n): 
['DNS', 'LDAP', 'MSSQL', 'NTP', 'NetBIOS', 'Portmap', 'SNMP', 'SSDP', 'Syn', 'TFTP', 'UDP', 'UDP-lag']


Load feature sets and search spaces and enumerate their contents.

In [4]:
feature_sets = pickle.load(open("feature_sets.pickle", 'rb'))
search_spaces = pickle.load(open("hyperparameter_search_spaces.pickle", 'rb'))

print(f"Available Tuning Ranges: {search_spaces.keys()}")

print("Feature Sets for Binary Dataset:")
for key, value in feature_sets["Binary"].items():
    if key == "RFE Sets":
        print(value.keys())

    elif key == "PCA":
        print(f"{key}, suggested variance threshold is {value}")
        
    else:
        print(key)

print("Feature Sets for Multiclass Dataset:")
for key, value in feature_sets["Multiclass"].items():
    if key == "RFE Sets":
        print(value.keys())

    elif key == "PCA":
        print(f"{key}, suggested variance threshold is {value}")

    else:
        print(key)

Available Tuning Ranges: dict_keys(['XGBoost', 'Bagging SVM', 'SVC (RBF)', 'SVC (Poly)', 'Logistic Regression', 'Random Forest', 'KNN', 'Linear SVC', 'Naive Bayes', 'Decision Tree'])
Feature Sets for Binary Dataset:
All
Correlation
Mutual Information
dict_keys(['Decision Tree', 'Random Forest', 'XGBoost', 'Linear SVC', 'Logistic Regression'])
PCA, suggested variance threshold is 0.95
Feature Sets for Multiclass Dataset:
All
Correlation
Mutual Information
dict_keys(['Decision Tree', 'Random Forest', 'XGBoost', 'Logistic Regression'])
PCA, suggested variance threshold is 0.95


In [5]:
"""
This is a helper method to place our performance results in a DataFrame for future analysis.
"""
def format_results_multiclass(y_test, predicted_values, fold_index, fitTime):
    # get scores
    accuracy = accuracy_score(y_test,predicted_values)
    recall_pos = recall_score(y_test, predicted_values, average='macro')
    precision_pos = precision_score(y_test,predicted_values, average='macro')
    f1 = f1_score(y_test,predicted_values, average='macro')

    cols = ["Fitting Time", "accuracy", "Precision", "Recall", "F1 Score"]
    results = [fitTime, accuracy, precision_pos, recall_pos, f1]

    outFrame = pd.DataFrame([results], columns=cols, index=[fold_index])

    return outFrame

This is for tuning on the multiclass set

In [6]:
models = {
    #"Decision Tree": DecisionTreeClassifier(random_state=42),
    #"Random Forest": RandomForestClassifier(random_state=42),
    #"XGBoost": xgb.XGBClassifier(random_state=42, num_class=12, objective='multi:softmax'),
    #"Linear SVC": make_pipeline(StandardScaler(), LinearSVC(random_state=42)),
    "Logistic Regression": make_pipeline(StandardScaler(), LogisticRegression(random_state=42)),
    #"KNN": KNeighborsClassifier(),
    #"Naive Bayes": GaussianNB(),
    #"SVC (RBF)": make_pipeline(StandardScaler(), SVC(random_state=42, kernel='rbf')),
}

score_methods = ['accuracy']

feature_set = feature_sets["Multiclass"]

y = multiclass[" Label"].copy()
X = multiclass.drop([" Label"], axis=1)

y_test_f = multiclass[" Label"].copy()
X_test_f = multiclass.drop([" Label"], axis=1)

In [7]:
params = []

# This will hold all of our results.
runFrame = None

for name, model in models.items():
        for feature_key, feature_val in feature_set.items():

            # If we're on the RFE sets, check if we have one for this classifier. If not, skip it.
            if feature_key == "RFE Sets":
                if name in feature_val.keys():
                    feature_val = feature_val[name]
                else:
                    continue
            
            for score_method in score_methods:
                opt = BayesSearchCV(estimator=model,search_spaces=search_spaces[name],n_iter=50,scoring=score_method,cv=5,n_jobs=5)
                kf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

                counter = 0

                # Used to hold data for a single run (performance metric)
                perfFrame = None

                for train_index, test_index in kf.split(X, y):

                    counter += 1

                    # PCA requires different logic to create X.
                    if feature_key != "PCA":
                        current_X = X.loc[:, feature_val]
                        X_train, X_test = current_X.iloc[train_index,:], current_X.iloc[test_index,:]
                        Y_train, Y_test = y.iloc[train_index], y.iloc[test_index]

                        current_X_f = X_test_f.loc[:, feature_val]
                        
                    else:
                        pca_trans = PCA(n_components=feature_val, random_state=42)
                        current_X = X.loc[:, X.columns]
                        X_train, X_test = current_X.iloc[train_index,:], current_X.iloc[test_index,:]
                        Y_train, Y_test = y.iloc[train_index], y.iloc[test_index]

                        # Apply PCA to training set and use it to transform test set.
                        X_train = pca_trans.fit_transform(X_train)
                        X_test = pca_trans.transform(X_test)
                        current_X_f = pca_trans.transform(X_test_f)

                        # Convert back to DataFrames
                        pca_cols = ["PC"+str(i) for i in list(range(1, len(X_train[0])+1))]
                        X_train = pd.DataFrame(data=X_train, columns=pca_cols)
                        X_test = pd.DataFrame(data=X_test, columns=pca_cols)
                        current_X_f = pd.DataFrame(data=current_X_f, columns=pca_cols)

                    startTime = time()

                    opt.fit(X_train,Y_train)

                    endTime = time()
                    fitTime = endTime - startTime

                    predicted_values = opt.predict(X_test)

                    # get metrics for this fold.
                    foldFrame = format_results_multiclass(Y_test, predicted_values, counter, fitTime)

                    # Add them to our lists of metric.
                    if perfFrame is None:
                        perfFrame = foldFrame
                    else:
                        perfFrame = pd.concat([perfFrame, foldFrame])

                    # Print a classification report on the testing results.
                    print("Validation Results: ")
                    print(classification_report(Y_test, predicted_values, target_names=multiclass_labels, digits=6))

                    print("Testing Results: ")
                    # Print a classification report on the testing results.
                    pred_test = opt.predict(current_X_f)
                    print(classification_report(y_test_f, pred_test, target_names=multiclass_labels, digits=6))

                    # Add tuple with the best params as well as the related model/config
                    params.append((f"Multiclass {name} {feature_key} {score_method} Fold {counter}", opt.best_params_))

                # Create a new line in the results table that averages all the folds
                perfFrame.loc["fold average"] = perfFrame.mean()

                # Mark the results table with the chosen classifier and the current performance metric.
                perfFrame['metric'] = [score_method for j in range(0,6)]
                perfFrame['Classifier'] = [name for j in range(0,6)]
                perfFrame['Feature Set'] = [feature_key for j in range(0,6)]
                perfFrame['Dataset'] = ["multiclass" for j in range(0,6)]
                print(f"{name} with {feature_key} and {score_method} completed.")

                # Add this run to the table with all runs.
                if runFrame is None:
                    runFrame = perfFrame
                else:
                    runFrame = pd.concat([runFrame, perfFrame])

# Write output file, best parameters, and best models to be used later.
runFrame.to_csv(f"multiclass_results_LR_{time()}.csv")
pickle.dump(params, open(f"multiclass_params_LR_{time()}.pickle", "wb"))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.351789  0.154856  0.215048      2667
        LDAP   0.489086  0.680540  0.569144      2667
       MSSQL   0.580098  0.885596  0.701010      2666
         NTP   0.987914  0.950488  0.968840      2666
     NetBIOS   0.521708  0.274944  0.360108      2666
     Portmap   0.499277  0.777194  0.607981      2666
        SNMP   0.624379  0.706787  0.663032      2667
        SSDP   0.456349  0.172478  0.250340      2667
         Syn   0.989761  0.543682  0.701839      2667
        TFTP   0.997211  0.536183  0.697391      2667
         UDP   0.475989  0.802775  0.597627      2667
     UDP-lag   0.644286  0.845836  0.731430      2666

    accuracy                       0.610925     31999
   macro avg   0.634821  0.610947  0.588649     31999
weighted avg   0.634819  0.610925  0.588636     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.359823  0.158929  0.22

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.152623  0.036009  0.058270      2666
        LDAP   0.490780  0.698538  0.576512      2667
       MSSQL   0.572353  0.928732  0.708238      2666
         NTP   0.982080  0.945611  0.963501      2666
     NetBIOS   0.539694  0.277944  0.366923      2666
     Portmap   0.506015  0.788526  0.616444      2667
        SNMP   0.564456  0.737158  0.639350      2667
        SSDP   0.481457  0.535433  0.507012      2667
         Syn   0.984343  0.542182  0.699226      2667
        TFTP   0.998579  0.526809  0.689740      2667
         UDP   0.488131  0.424072  0.453852      2667
     UDP-lag   0.644729  0.848837  0.732837      2666

    accuracy                       0.607488     31999
   macro avg   0.617103  0.607488  0.584325     31999
weighted avg   0.617109  0.607488  0.584328     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.134075  0.030751  0.05

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.119281  0.027382  0.044539      2666
        LDAP   0.483542  0.694038  0.569977      2667
       MSSQL   0.569285  0.910386  0.700519      2667
         NTP   0.984344  0.943361  0.963417      2666
     NetBIOS   0.514585  0.257968  0.343656      2667
     Portmap   0.496670  0.782902  0.607772      2667
        SNMP   0.553930  0.724306  0.627763      2666
        SSDP   0.472984  0.439985  0.455888      2666
         Syn   0.988636  0.554556  0.710545      2667
        TFTP   0.994425  0.535058  0.695758      2667
         UDP   0.474894  0.503751  0.488897      2666
     UDP-lag   0.652462  0.849644  0.738111      2667

    accuracy                       0.601956     31999
   macro avg   0.608753  0.601945  0.578904     31999
weighted avg   0.608767  0.601956  0.578913     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.130689  0.030151  0.04

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.125407  0.028871  0.046937      2667
        LDAP   0.487395  0.696174  0.573370      2666
       MSSQL   0.516701  0.928009  0.663806      2667
         NTP   0.988942  0.938883  0.963262      2667
     NetBIOS   0.507660  0.273341  0.355350      2667
     Portmap   0.497069  0.763030  0.601982      2667
        SNMP   0.555873  0.729557  0.630981      2666
        SSDP   0.494382  0.066017  0.116479      2666
         Syn   0.993759  0.537509  0.697663      2666
        TFTP   0.992419  0.540135  0.699538      2666
         UDP   0.484343  0.806452  0.605208      2666
     UDP-lag   0.644049  0.854143  0.734365      2667

    accuracy                       0.596850     31998
   macro avg   0.607333  0.596843  0.557412     31998
weighted avg   0.607322  0.596850  0.557412     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.132435  0.030226  0.04

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.138103  0.031121  0.050796      2667
        LDAP   0.490269  0.689797  0.573165      2666
       MSSQL   0.579782  0.918260  0.710782      2667
         NTP   0.987558  0.952381  0.969651      2667
     NetBIOS   0.510161  0.272966  0.355642      2667
     Portmap   0.497802  0.764441  0.602959      2666
        SNMP   0.552550  0.735559  0.631054      2666
        SSDP   0.458867  0.376594  0.413679      2666
         Syn   0.990560  0.551013  0.708122      2666
        TFTP   0.996682  0.563391  0.719866      2666
         UDP   0.478247  0.556430  0.514385      2667
     UDP-lag   0.650186  0.853018  0.737918      2667

    accuracy                       0.605413     31998
   macro avg   0.610897  0.605414  0.582335     31998
weighted avg   0.610887  0.605413  0.582330     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.133485  0.030826  0.05

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.124786  0.027372  0.044895      2667
        LDAP   0.485806  0.680165  0.566786      2667
       MSSQL   0.576968  0.920855  0.709435      2666
         NTP   0.987064  0.944486  0.965306      2666
     NetBIOS   0.526890  0.271943  0.358733      2666
     Portmap   0.497967  0.780945  0.608150      2666
        SNMP   0.545148  0.726659  0.622951      2667
        SSDP   0.470787  0.416948  0.442235      2667
         Syn   0.983028  0.542932  0.699517      2667
        TFTP   0.993051  0.535808  0.696055      2667
         UDP   0.473719  0.530559  0.500531      2667
     UDP-lag   0.642959  0.844336  0.730015      2666

    accuracy                       0.601894     31999
   macro avg   0.609014  0.601917  0.578717     31999
weighted avg   0.609009  0.601894  0.578702     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.132846  0.030001  0.04

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.152733  0.035634  0.057786      2666
        LDAP   0.490909  0.698538  0.576602      2667
       MSSQL   0.568458  0.912603  0.700547      2666
         NTP   0.985838  0.939985  0.962366      2666
     NetBIOS   0.535994  0.282071  0.369624      2666
     Portmap   0.506427  0.782902  0.615022      2667
        SNMP   0.563610  0.737533  0.638948      2667
        SSDP   0.466179  0.382452  0.420185      2667
         Syn   0.986311  0.540307  0.698159      2667
        TFTP   0.994342  0.527184  0.689047      2667
         UDP   0.479962  0.570304  0.521247      2667
     UDP-lag   0.644077  0.848462  0.732276      2666

    accuracy                       0.604831     31999
   macro avg   0.614570  0.604831  0.581817     31999
weighted avg   0.614576  0.604831  0.581820     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.132387  0.030076  0.04

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.119935  0.027757  0.045081      2666
        LDAP   0.483542  0.694038  0.569977      2667
       MSSQL   0.568411  0.908136  0.699192      2667
         NTP   0.983587  0.944111  0.963445      2666
     NetBIOS   0.513196  0.262467  0.347308      2667
     Portmap   0.498682  0.780277  0.608480      2667
        SNMP   0.554566  0.724306  0.628172      2666
        SSDP   0.459276  0.380720  0.416325      2666
         Syn   0.989940  0.553431  0.709957      2667
        TFTP   0.991667  0.535433  0.695398      2667
         UDP   0.472204  0.548012  0.507292      2666
     UDP-lag   0.652586  0.851519  0.738897      2667

    accuracy                       0.600863     31999
   macro avg   0.607299  0.600851  0.577460     31999
weighted avg   0.607313  0.600863  0.577470     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.132124  0.030601  0.04

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.123613  0.029246  0.047301      2667
        LDAP   0.488164  0.696174  0.573902      2666
       MSSQL   0.572399  0.916010  0.704542      2667
         NTP   0.986967  0.937008  0.961339      2667
     NetBIOS   0.508772  0.271841  0.354350      2667
     Portmap   0.496343  0.763405  0.601566      2667
        SNMP   0.556033  0.727682  0.630382      2666
        SSDP   0.470369  0.425731  0.446938      2666
         Syn   0.991684  0.536759  0.696520      2666
        TFTP   0.992419  0.540135  0.699538      2666
         UDP   0.482616  0.525881  0.503321      2666
     UDP-lag   0.644331  0.854518  0.734687      2667

    accuracy                       0.602038     31998
   macro avg   0.609476  0.602033  0.579532     31998
weighted avg   0.609466  0.602038  0.579530     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.131942  0.030826  0.04

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.136738  0.031121  0.050703      2667
        LDAP   0.490133  0.689422  0.572943      2666
       MSSQL   0.574207  0.896513  0.700044      2667
         NTP   0.985676  0.954631  0.969905      2667
     NetBIOS   0.509790  0.263592  0.347504      2667
     Portmap   0.496863  0.772318  0.604699      2666
        SNMP   0.553642  0.735559  0.631765      2666
        SSDP   0.449164  0.372843  0.407461      2666
         Syn   0.992573  0.551388  0.708946      2666
        TFTP   0.996016  0.562641  0.719080      2666
         UDP   0.477717  0.558680  0.515036      2667
     UDP-lag   0.649573  0.854893  0.738222      2667

    accuracy                       0.603631     31998
   macro avg   0.609341  0.603633  0.580526     31998
weighted avg   0.609331  0.603631  0.580521     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.132732  0.030901  0.05

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.351789  0.154856  0.215048      2667
        LDAP   0.489344  0.680165  0.569187      2667
       MSSQL   0.580198  0.879220  0.699075      2666
         NTP   0.986667  0.943736  0.964724      2666
     NetBIOS   0.535682  0.222431  0.314339      2666
     Portmap   0.497069  0.827082  0.620952      2666
        SNMP   0.624379  0.706787  0.663032      2667
        SSDP   0.444544  0.559055  0.495267      2667
         Syn   0.966045  0.544057  0.696090      2667
        TFTP   0.991016  0.537683  0.697132      2667
         UDP   0.465729  0.384702  0.421355      2667
     UDP-lag   0.644220  0.836084  0.727718      2666

    accuracy                       0.606300     31999
   macro avg   0.631390  0.606321  0.590327     31999
weighted avg   0.631387  0.606300  0.590315     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.358414  0.158629  0.21

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.153605  0.036759  0.059322      2666
        LDAP   0.492604  0.699288  0.578026      2667
       MSSQL   0.504432  0.939235  0.656356      2666
         NTP   0.989007  0.944861  0.966430      2666
     NetBIOS   0.538799  0.276069  0.365079      2666
     Portmap   0.505897  0.788151  0.616242      2667
        SNMP   0.563808  0.737158  0.638934      2667
        SSDP   0.490026  0.211849  0.295812      2667
         Syn   0.995828  0.536933  0.697686      2667
        TFTP   0.992953  0.528309  0.689672      2667
         UDP   0.475960  0.627297  0.541249      2667
     UDP-lag   0.644293  0.853338  0.734226      2666

    accuracy                       0.598269     31999
   macro avg   0.612268  0.598271  0.569919     31999
weighted avg   0.612275  0.598269  0.569922     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.132600  0.030751  0.04

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.119281  0.027382  0.044539      2666
        LDAP   0.483542  0.694038  0.569977      2667
       MSSQL   0.566916  0.908511  0.698170      2667
         NTP   0.981712  0.946362  0.963713      2666
     NetBIOS   0.518072  0.257968  0.344431      2667
     Portmap   0.496905  0.782527  0.607835      2667
        SNMP   0.553771  0.724306  0.627661      2666
        SSDP   0.460020  0.507127  0.482426      2666
         Syn   0.988544  0.550056  0.706818      2667
        TFTP   0.991672  0.535808  0.695716      2667
         UDP   0.467968  0.413728  0.439180      2666
     UDP-lag   0.651337  0.849644  0.737390      2667

    accuracy                       0.599800     31999
   macro avg   0.606645  0.599788  0.576488     31999
weighted avg   0.606659  0.599800  0.576498     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.131062  0.030001  0.04

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.127036  0.029246  0.047546      2667
        LDAP   0.487786  0.696549  0.573768      2666
       MSSQL   0.586010  0.923510  0.717031      2667
         NTP   0.987392  0.939633  0.962920      2667
     NetBIOS   0.508162  0.268466  0.351325      2667
     Portmap   0.497565  0.766029  0.603278      2667
        SNMP   0.555556  0.729557  0.630777      2666
        SSDP   0.468913  0.438485  0.453189      2666
         Syn   0.991678  0.536384  0.696203      2666
        TFTP   0.994475  0.540135  0.700049      2666
         UDP   0.472346  0.515754  0.493097      2666
     UDP-lag   0.644073  0.851519  0.733409      2667

    accuracy                       0.602944     31998
   macro avg   0.610083  0.602939  0.580216     31998
weighted avg   0.610073  0.602944  0.580214     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.132474  0.030076  0.04

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.137417  0.031121  0.050749      2667
        LDAP   0.490395  0.689422  0.573121      2666
       MSSQL   0.595186  0.917885  0.722124      2667
         NTP   0.986041  0.953506  0.969501      2667
     NetBIOS   0.507671  0.272966  0.355035      2667
     Portmap   0.498407  0.762941  0.602935      2666
        SNMP   0.552862  0.735559  0.631257      2666
        SSDP   0.459447  0.373968  0.412324      2666
         Syn   0.993886  0.548762  0.707105      2666
        TFTP   0.996019  0.563016  0.719387      2666
         UDP   0.478799  0.580052  0.524585      2667
     UDP-lag   0.650342  0.856393  0.739278      2667

    accuracy                       0.607132     31998
   macro avg   0.612206  0.607133  0.583950     31998
weighted avg   0.612196  0.607132  0.583946     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.132277  0.030676  0.04

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.348739  0.155606  0.215193      2667
        LDAP   0.488643  0.677540  0.567793      2667
       MSSQL   0.590998  0.906227  0.715428      2666
         NTP   0.987124  0.948987  0.967680      2666
     NetBIOS   0.516810  0.247937  0.335108      2666
     Portmap   0.496009  0.792573  0.610165      2666
        SNMP   0.624379  0.706787  0.663032      2667
        SSDP   0.479717  0.381327  0.424901      2667
         Syn   0.986348  0.541807  0.699419      2667
        TFTP   0.988935  0.536183  0.695356      2667
         UDP   0.476641  0.604424  0.532981      2667
     UDP-lag   0.645457  0.844711  0.731763      2666

    accuracy                       0.611988     31999
   macro avg   0.635817  0.612009  0.596568     31999
weighted avg   0.635815  0.611988  0.596556     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.355668  0.158854  0.21

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.150943  0.036009  0.058147      2666
        LDAP   0.490134  0.698538  0.576067      2667
       MSSQL   0.509224  0.942236  0.661140      2666
         NTP   0.983871  0.938110  0.960445      2666
     NetBIOS   0.514866  0.266317  0.351051      2666
     Portmap   0.501087  0.778028  0.609577      2667
        SNMP   0.563096  0.731159  0.636215      2667
        SSDP   0.489189  0.203600  0.287530      2667
         Syn   0.988300  0.538433  0.697087      2667
        TFTP   0.995039  0.526434  0.688573      2667
         UDP   0.472637  0.641170  0.544153      2667
     UDP-lag   0.644362  0.848837  0.732600      2666

    accuracy                       0.595737     31999
   macro avg   0.608562  0.595739  0.566882     31999
weighted avg   0.608570  0.595737  0.566884     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.130297  0.030901  0.04

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.117021  0.028882  0.046330      2666
        LDAP   0.485047  0.693288  0.570767      2667
       MSSQL   0.573428  0.906262  0.702412      2667
         NTP   0.983896  0.939610  0.961243      2666
     NetBIOS   0.516546  0.239970  0.327701      2667
     Portmap   0.495216  0.795651  0.610472      2667
        SNMP   0.551256  0.716054  0.622940      2666
        SSDP   0.461178  0.516879  0.487443      2666
         Syn   0.985215  0.549681  0.705656      2667
        TFTP   0.992329  0.533558  0.693977      2667
         UDP   0.480017  0.432483  0.455012      2666
     UDP-lag   0.650620  0.846269  0.735658      2667

    accuracy                       0.599894     31999
   macro avg   0.607647  0.599882  0.576634     31999
weighted avg   0.607661  0.599894  0.576644     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.128528  0.031426  0.05

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.125198  0.029621  0.047908      2667
        LDAP   0.486366  0.695799  0.572531      2666
       MSSQL   0.507953  0.934008  0.658037      2667
         NTP   0.985674  0.928759  0.956371      2667
     NetBIOS   0.494437  0.266592  0.346407      2667
     Portmap   0.497421  0.759280  0.601069      2667
        SNMP   0.552230  0.719805  0.624980      2666
        SSDP   0.460934  0.440360  0.450412      2666
         Syn   0.992372  0.536759  0.696689      2666
        TFTP   0.991701  0.537884  0.697471      2666
         UDP   0.479545  0.395724  0.433621      2666
     UDP-lag   0.645721  0.851519  0.734476      2667

    accuracy                       0.591349     31998
   macro avg   0.601629  0.591343  0.568331     31998
weighted avg   0.601618  0.591349  0.568329     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.130952  0.030526  0.04

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.131200  0.030746  0.049818      2667
        LDAP   0.490783  0.689047  0.573256      2666
       MSSQL   0.565279  0.896138  0.693256      2667
         NTP   0.984369  0.944507  0.964026      2667
     NetBIOS   0.498532  0.254593  0.337056      2667
     Portmap   0.495419  0.770818  0.603170      2666
        SNMP   0.551224  0.726557  0.626861      2666
        SSDP   0.435637  0.498875  0.465116      2666
         Syn   0.986523  0.549137  0.705542      2666
        TFTP   0.994691  0.562266  0.718428      2666
         UDP   0.465615  0.398575  0.429495      2667
     UDP-lag   0.650630  0.851894  0.737782      2667

    accuracy                       0.597756     31998
   macro avg   0.604159  0.597763  0.575317     31998
weighted avg   0.604148  0.597756  0.575310     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.129203  0.031126  0.05

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.181818  0.004499  0.008782      2667
        LDAP   0.442535  0.714661  0.546602      2667
       MSSQL   0.323575  0.626032  0.426636      2666
         NTP   0.068841  0.007127  0.012916      2666
     NetBIOS   0.000000  0.000000  0.000000      2666
     Portmap   0.423365  0.784321  0.549901      2666
        SNMP   0.551013  0.591301  0.570447      2667
        SSDP   0.075000  0.003375  0.006459      2667
         Syn   0.240029  0.999625  0.387106      2667
        TFTP   0.000000  0.000000  0.000000      2667
         UDP   0.064457  0.013123  0.021807      2667
     UDP-lag   0.006923  0.006752  0.006836      2666

    accuracy                       0.312572     31999
   macro avg   0.198130  0.312568  0.211458     31999
weighted avg   0.198135  0.312572  0.211460     31999

Testing Results: 


  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

         DNS   0.163399  0.003750  0.007332     13333
        LDAP   0.441640  0.725643  0.549092     13333
       MSSQL   0.328559  0.633166  0.432624     13333
         NTP   0.076197  0.008476  0.015255     13332
     NetBIOS   0.000000  0.000000  0.000000     13333
     Portmap   0.418479  0.779269  0.544535     13333
        SNMP   0.561378  0.597885  0.579056     13332
        SSDP   0.057377  0.002625  0.005021     13332
         Syn   0.241143  0.999025  0.388508     13333
        TFTP   0.000000  0.000000  0.000000     13333
         UDP   0.066086  0.013650  0.022627     13333
     UDP-lag   0.006149  0.005925  0.006035     13333

    accuracy                       0.314120    159993
   macro avg   0.196700  0.314118  0.212507    159993
weighted avg   0.196700  0.314120  0.212507    159993



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.156863  0.003001  0.005889      2666
        LDAP   0.441503  0.731534  0.550663      2667
       MSSQL   0.332299  0.642536  0.438051      2666
         NTP   0.089109  0.010128  0.018188      2666
     NetBIOS   0.000000  0.000000  0.000000      2666
     Portmap   0.423900  0.787402  0.551109      2667
        SNMP   0.568531  0.609674  0.588384      2667
        SSDP   0.049180  0.002250  0.004303      2667
         Syn   0.205835  0.806899  0.327999      2667
        TFTP   0.000000  0.000000  0.000000      2667
         UDP   0.073864  0.014623  0.024413      2667
     UDP-lag   0.004872  0.005626  0.005222      2666

    accuracy                       0.301166     31999
   macro avg   0.195496  0.301139  0.209518     31999
weighted avg   0.195509  0.301166  0.209537     31999

Testing Results: 


  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

         DNS   0.153125  0.003675  0.007178     13333
        LDAP   0.441056  0.725643  0.548640     13333
       MSSQL   0.328773  0.634441  0.433106     13333
         NTP   0.068918  0.007501  0.013529     13332
     NetBIOS   0.000000  0.000000  0.000000     13333
     Portmap   0.422628  0.778594  0.547868     13333
        SNMP   0.563800  0.597885  0.580342     13332
        SSDP   0.057283  0.002625  0.005020     13332
         Syn   0.205034  0.803345  0.326689     13333
        TFTP   0.000000  0.000000  0.000000     13333
         UDP   0.066134  0.013650  0.022630     13333
     UDP-lag   0.005038  0.006000  0.005477     13333

    accuracy                       0.297782    159993
   macro avg   0.192649  0.297780  0.207540    159993
weighted avg   0.192648  0.297782  0.207540    159993



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.122449  0.002251  0.004420      2666
        LDAP   0.437866  0.729284  0.547194      2667
       MSSQL   0.325455  0.629921  0.429174      2667
         NTP   0.077193  0.008252  0.014910      2666
     NetBIOS   0.000000  0.000000  0.000000      2667
     Portmap   0.421318  0.784027  0.548100      2667
        SNMP   0.561311  0.597524  0.578852      2666
        SSDP   0.067164  0.003376  0.006429      2666
         Syn   0.241742  0.998875  0.389274      2667
        TFTP   0.000000  0.000000  0.000000      2667
         UDP   0.048193  0.009002  0.015171      2666
     UDP-lag   0.004275  0.004124  0.004198      2667

    accuracy                       0.313916     31999
   macro avg   0.192247  0.313886  0.211477     31999
weighted avg   0.192250  0.313916  0.211490     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.176259  0.003675  0.00

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.125000  0.003375  0.006572      2667
        LDAP   0.439583  0.727307  0.547972      2666
       MSSQL   0.324525  0.621672  0.426440      2667
         NTP   0.064516  0.007499  0.013436      2667
     NetBIOS   0.000000  0.000000  0.000000      2667
     Portmap   0.410779  0.763030  0.534051      2667
        SNMP   0.563947  0.593773  0.578476      2666
        SSDP   0.042017  0.001875  0.003591      2666
         Syn   0.240691  0.998875  0.387910      2666
        TFTP   0.000000  0.000000  0.000000      2666
         UDP   0.068333  0.015379  0.025107      2666
     UDP-lag   0.005934  0.005624  0.005775      2667

    accuracy                       0.311519     31998
   macro avg   0.190444  0.311534  0.210777     31998
weighted avg   0.190437  0.311519  0.210769     31998

Testing Results: 


  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

         DNS   0.160000  0.003900  0.007615     13333
        LDAP   0.440877  0.725118  0.548352     13333
       MSSQL   0.328689  0.633466  0.432806     13333
         NTP   0.075203  0.008326  0.014992     13332
     NetBIOS   0.000000  0.000000  0.000000     13333
     Portmap   0.417789  0.779269  0.543951     13333
        SNMP   0.563193  0.597960  0.580056     13332
        SSDP   0.057377  0.002625  0.005021     13332
         Syn   0.240663  0.999025  0.387886     13333
        TFTP   0.000000  0.000000  0.000000     13333
         UDP   0.065870  0.013650  0.022614     13333
     UDP-lag   0.006202  0.005925  0.006061     13333

    accuracy                       0.314107    159993
   macro avg   0.196322  0.314105  0.212446    159993
weighted avg   0.196321  0.314107  0.212446    159993



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Results: 
              precision    recall  f1-score   support

         DNS   0.227273  0.005624  0.010977      2667
        LDAP   0.443602  0.724306  0.550221      2666
       MSSQL   0.336850  0.646419  0.442903      2667
         NTP   0.065744  0.007124  0.012855      2667
     NetBIOS   0.000000  0.000000  0.000000      2667
     Portmap   0.438498  0.762191  0.556712      2666
        SNMP   0.565402  0.598275  0.581374      2666
        SSDP   0.052632  0.002251  0.004317      2666
         Syn   0.201697  0.998875  0.335623      2666
        TFTP   0.423780  0.052138  0.092852      2666
         UDP   0.070299  0.014998  0.024722      2667
     UDP-lag   0.000000  0.000000  0.000000      2667

    accuracy                       0.317645     31998
   macro avg   0.235481  0.317683  0.217713     31998
weighted avg   0.235459  0.317645  0.217688     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.166667  0.003900  0.00

  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
best = pickle.load(open("multiclass_params_LR_1670732570.800041.pickle", 'rb'))

In [8]:
final_feature_set = None
for key, value in feature_sets["Multiclass"].items():
    if key == "RFE Sets":
        final_feature_set = value["Logistic Regression"]
        break

print(final_feature_set)

X_fin = X.loc[:, final_feature_set]
x_test_fin = X_test_f.loc[:, final_feature_set]

[' Protocol', ' Flow Duration', 'Total Length of Fwd Packets', ' Fwd Packet Length Max', ' Fwd Packet Length Min', ' Fwd Packet Length Mean', ' Fwd Packet Length Std', ' Bwd Packet Length Mean', 'Flow Bytes/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max', 'Fwd Packets/s', ' Min Packet Length', ' Max Packet Length', ' Packet Length Mean', ' Packet Length Std', ' Packet Length Variance', ' ACK Flag Count', ' Average Packet Size', ' Avg Fwd Segment Size', ' Avg Bwd Segment Size', ' Subflow Fwd Bytes', 'Init_Win_bytes_forward', ' act_data_pkt_fwd', 'Idle Mean', ' Idle Std', ' Idle Max', ' Idle Min']


In [12]:
for i in range(16, 20):
    print(best[i])

('Multiclass Logistic Regression RFE Sets accuracy Fold 2', OrderedDict([('logisticregression__C', 1368.724871427079), ('logisticregression__max_iter', 279)]))
('Multiclass Logistic Regression RFE Sets accuracy Fold 3', OrderedDict([('logisticregression__C', 9956.802629520667), ('logisticregression__max_iter', 214)]))
('Multiclass Logistic Regression RFE Sets accuracy Fold 4', OrderedDict([('logisticregression__C', 4602.593009396766), ('logisticregression__max_iter', 240)]))
('Multiclass Logistic Regression RFE Sets accuracy Fold 5', OrderedDict([('logisticregression__C', 2358.2284838718524), ('logisticregression__max_iter', 224)]))


In [18]:
clf = make_pipeline(StandardScaler(), LogisticRegression(random_state=42, C = 4602.593009396766, max_iter = 240))
clf.fit(X_fin, y)
pred_test = clf.predict(x_test_fin)
print(classification_report(y_test_f, pred_test, target_names=multiclass_labels, digits=6))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


              precision    recall  f1-score   support

         DNS   0.127860  0.030601  0.049383     13333
        LDAP   0.488660  0.691667  0.572706     13333
       MSSQL   0.591760  0.902798  0.714914     13333
         NTP   0.984851  0.941119  0.962488     13332
     NetBIOS   0.507321  0.252081  0.336807     13333
     Portmap   0.496927  0.782270  0.607773     13333
        SNMP   0.552258  0.722922  0.626169     13332
        SSDP   0.463269  0.525053  0.492230     13332
         Syn   0.987230  0.545039  0.702329     13333
        TFTP   0.990220  0.539188  0.698198     13333
         UDP   0.480260  0.454361  0.466952     13333
     UDP-lag   0.648067  0.847596  0.734523     13333

    accuracy                       0.602889    159993
   macro avg   0.609890  0.602891  0.580373    159993
weighted avg   0.609889  0.602889  0.580371    159993

