In [27]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report,f1_score
from sklearn.model_selection import GridSearchCV,ParameterGrid
from sklearn.model_selection import validation_curve
from sklearn.model_selection import learning_curve

In [28]:
train = pd.read_csv('data_source/train.csv')
test = pd.read_csv('data_source/test.csv')

x_train = train.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)']]
y_train = train.loc[:,'Action']
x_test = test.loc[:,['Destination Port', 'NAT Source Port','Packets', 'Elapsed Time (sec)']]
y_test = test.loc[:,'Action']

In [29]:
scaler = StandardScaler()
x_train.loc[:,['Packets','Elapsed Time (sec)']] = scaler.fit_transform(x_train.loc[:,['Packets','Elapsed Time (sec)']])
x_test.loc[:,['Packets','Elapsed Time (sec)']] = scaler.fit_transform(x_test.loc[:,['Packets','Elapsed Time (sec)']])

In [30]:
logis = LogisticRegression()
logis.fit(x_train,y_train)

train_yhat = logis.predict(x_train)
train_f1 = f1_score(y_train,train_yhat,average='weighted')

test_yhat = logis.predict(x_test)
test_f1 = f1_score(y_test,test_yhat,average='weighted')
print('Train Score\n',classification_report(y_train,train_yhat))
print('Test Score\n',classification_report(y_test, test_yhat))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Train Score
               precision    recall  f1-score   support

       allow       1.00      0.98      0.99     26348
        deny       0.98      0.93      0.95     10491
        drop       0.90      1.00      0.95      8995
  reset-both       0.00      0.00      0.00        38

    accuracy                           0.97     45872
   macro avg       0.72      0.73      0.72     45872
weighted avg       0.97      0.97      0.97     45872

Test Score
               precision    recall  f1-score   support

       allow       1.00      0.98      0.99     11292
        deny       0.97      0.92      0.95      4496
        drop       0.89      1.00      0.94      3856
  reset-both       0.00      0.00      0.00        16

    accuracy                           0.97     19660
   macro avg       0.72      0.73      0.72     19660
weighted avg       0.97      0.97      0.97     19660



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [31]:
parameters = [{'penalty':['l2',None],'solver':['lbfgs','newton-cg','newton-cholesky','sag']},
              {'penalty':['l1','l2'],'solver':['liblinear']}
              ]

In [32]:
# logis_best = GridSearchCV(logis, parameters, cv=5)
# logis_best.fit(x_train,y_train)
# print(logis_best.best_params_)
# print(logis_best.best_estimator_)
# answer = logis_best.predict(x_test)
# print(classification_report(y_test, answer))

In [33]:
best_score = 0
best_grid = ''
for g in ParameterGrid(parameters):
    logis.set_params(**g)
    logis.fit(x_train,y_train)
    answer = logis.predict(x_test)
    f1 = f1_score(y_test,answer,average='weighted')

    if f1 > best_score:
        best_score = f1
        best_grid = g
print("F1: %0.5f" % best_score)
print("Grid:", best_grid)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Matrix is singular.
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Fur

F1: 0.97435
Grid: {'penalty': None, 'solver': 'newton-cg'}


In [34]:
logis = LogisticRegression(penalty=None,solver='newton-cg')
logis.fit(x_train,y_train)

train_yhat = logis.predict(x_train)
train_f1 = f1_score(y_train,train_yhat,average='weighted')

test_yhat = logis.predict(x_test)
test_f1 = f1_score(y_test,test_yhat,average='weighted')
print('Train Score\n',classification_report(y_train,train_yhat))
print('Test Score\n',classification_report(y_test, test_yhat))
print(f"AVG F1-Score Train: {train_f1}\nAVG F1-Score Test: {test_f1}")

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Train Score
               precision    recall  f1-score   support

       allow       1.00      0.99      0.99     26348
        deny       0.99      0.93      0.96     10491
        drop       0.90      1.00      0.95      8995
  reset-both       0.00      0.00      0.00        38

    accuracy                           0.98     45872
   macro avg       0.72      0.73      0.73     45872
weighted avg       0.98      0.98      0.98     45872

Test Score
               precision    recall  f1-score   support

       allow       1.00      0.99      0.99     11292
        deny       0.99      0.93      0.96      4496
        drop       0.90      1.00      0.95      3856
  reset-both       0.00      0.00      0.00        16

    accuracy                           0.97     19660
   macro avg       0.72      0.73      0.72     19660
weighted avg       0.98      0.97      0.97     19660

AVG F1-Score Train: 0.9757744136856
AVG F1-Score Test: 0.9743460461438176


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
