In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [3]:
df = pd.read_csv("7-cyber_attack_data.csv")

In [4]:
df.head()

Unnamed: 0,src_packet_rate,dst_packet_rate,avg_payload_size,connection_duration,tcp_flag_count,avg_interarrival_time,failed_login_attempts,unusual_port_activity_score,session_entropy,avg_response_delay,attack_type
0,-1.286132,-0.648334,1.044115,-0.469715,0.789859,-0.083727,-1.647309,-1.316412,1.01191,-0.898063,2
1,-0.222224,2.083232,1.191114,-1.354527,-0.956992,1.696028,-1.070406,0.981403,-1.628798,1.377594,0
2,-0.431963,0.375745,-1.370334,0.819214,0.345243,1.389447,-1.90413,1.292602,0.925545,0.232705,0
3,-0.912633,0.986988,-0.690042,2.014628,-0.44226,0.590347,-1.819353,1.560938,0.823755,0.517762,0
4,-0.367056,1.667892,0.879172,2.214276,1.846338,-0.894047,1.543838,0.931103,-1.01521,1.061845,1


In [5]:
df.columns

Index(['src_packet_rate', 'dst_packet_rate', 'avg_payload_size',
       'connection_duration', 'tcp_flag_count', 'avg_interarrival_time',
       'failed_login_attempts', 'unusual_port_activity_score',
       'session_entropy', 'avg_response_delay', 'attack_type'],
      dtype='object')

In [6]:
# src_packet_rate -> Source-side packet transmission rate
# dst_packet_rate -> Destination-side packet reception rate
# avg_payload_size -> Average size of payload in packets
# connection_duration -> Duration of the connection (in seconds)
# tcp_flag_count -> Number of TCP flag occurrences
# avg_interarrival_time -> Time between packet arrivals
# failed_login_attempts -> Number of failed login attempts
# unusual_port_activity_score -> Score representing unusual port usage
# session_entropy -> Entropy of session behavior (for anomaly detection)
# avg_response_delay -> Average delay in server response (in ms)
# attack_type -> 0 = Normal, 1 = DDoS, 2 = Port Scan

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   src_packet_rate              1000 non-null   float64
 1   dst_packet_rate              1000 non-null   float64
 2   avg_payload_size             1000 non-null   float64
 3   connection_duration          1000 non-null   float64
 4   tcp_flag_count               1000 non-null   float64
 5   avg_interarrival_time        1000 non-null   float64
 6   failed_login_attempts        1000 non-null   float64
 7   unusual_port_activity_score  1000 non-null   float64
 8   session_entropy              1000 non-null   float64
 9   avg_response_delay           1000 non-null   float64
 10  attack_type                  1000 non-null   int64  
dtypes: float64(10), int64(1)
memory usage: 86.1 KB


In [8]:
df.describe()

Unnamed: 0,src_packet_rate,dst_packet_rate,avg_payload_size,connection_duration,tcp_flag_count,avg_interarrival_time,failed_login_attempts,unusual_port_activity_score,session_entropy,avg_response_delay,attack_type
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,-0.012296,0.239737,-0.031142,0.013329,0.0089,0.016681,0.026614,0.315347,-0.045636,0.17953,1.003
std,1.406123,1.635646,0.989677,1.001178,1.000905,1.027824,1.022217,1.364824,1.472316,0.960834,0.817104
min,-4.267039,-7.960328,-3.718638,-3.250031,-3.288725,-3.17879,-3.057529,-4.045045,-5.869039,-4.409592,0.0
25%,-1.007421,-0.87125,-0.657668,-0.672964,-0.654218,-0.697799,-0.653787,-0.712015,-1.074499,-0.591831,0.0
50%,0.085888,0.296278,-0.008968,0.045505,-0.032894,0.014639,0.049283,0.567003,0.12158,0.24892,1.0
75%,1.080743,1.518676,0.615381,0.678219,0.687831,0.703139,0.713809,1.23589,1.013049,0.983239,2.0
max,3.874738,7.168331,3.206344,2.741943,3.477044,3.600187,3.357941,4.020627,4.986178,4.112542,2.0


In [9]:
X = df.drop("attack_type", axis=1) #hedef değişkeni(dependent) drop ederiz geri kalan her şey X olur
y = df["attack_type"]

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=15)

In [13]:
from sklearn.linear_model import LogisticRegression

In [14]:
model = LogisticRegression()

In [15]:
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

In [16]:
y_pred

array([2, 0, 0, 0, 2, 0, 1, 1, 2, 0, 2, 1, 2, 1, 0, 0, 2, 2, 0, 2, 0, 0,
       0, 1, 1, 1, 1, 2, 2, 2, 1, 0, 2, 2, 2, 2, 1, 2, 1, 0, 2, 1, 0, 2,
       1, 1, 2, 0, 0, 0, 2, 0, 1, 2, 0, 2, 1, 2, 2, 1, 0, 0, 0, 2, 1, 2,
       1, 0, 2, 0, 2, 0, 2, 1, 2, 0, 2, 1, 1, 2, 1, 0, 1, 2, 2, 0, 2, 2,
       1, 0, 1, 0, 0, 0, 1, 1, 2, 1, 1, 0, 1, 1, 1, 2, 1, 1, 1, 0, 0, 1,
       2, 1, 2, 1, 0, 1, 2, 0, 0, 1, 0, 0, 2, 1, 0, 1, 0, 2, 2, 1, 0, 2,
       2, 2, 1, 2, 0, 2, 0, 2, 1, 0, 2, 0, 2, 2, 0, 1, 1, 2, 1, 2, 1, 1,
       0, 0, 1, 1, 1, 2, 2, 0, 0, 2, 2, 1, 2, 1, 1, 1, 0, 0, 1, 0, 1, 2,
       0, 2, 2, 0, 2, 1, 1, 1, 2, 1, 0, 1, 0, 1, 1, 2, 0, 2, 1, 2, 1, 0,
       1, 2, 0, 2, 1, 0, 1, 0, 2, 1, 0, 1, 2, 0, 0, 0, 0, 0, 0, 1, 2, 0,
       2, 0, 2, 1, 2, 1, 0, 0, 1, 1, 2, 1, 0, 2, 1, 0, 2, 1, 1, 2, 2, 1,
       1, 2, 2, 1, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 0, 1, 2, 1, 1, 2, 0,
       2, 0, 1, 0, 1, 2, 2, 1, 2, 2, 2, 2, 0, 2, 1, 0, 0, 0, 2, 1, 2, 2,
       2, 1, 0, 0, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2])

In [17]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [18]:
score = accuracy_score(y_pred,y_test)
print("Score:" ,score)
print(classification_report(y_pred,y_test))
print("Confusion Matrix: \n", confusion_matrix(y_pred,y_test))

Score: 0.7866666666666666
              precision    recall  f1-score   support

           0       0.78      0.91      0.84        90
           1       0.74      0.73      0.73        99
           2       0.84      0.74      0.78       111

    accuracy                           0.79       300
   macro avg       0.79      0.79      0.79       300
weighted avg       0.79      0.79      0.79       300

Confusion Matrix: 
 [[82  2  6]
 [17 72 10]
 [ 6 23 82]]


In [19]:
penalty = ["l1","l2","elasticnet"]
c_values = [100,10,1,0.1,0.01] #negatif olamaz
solver = ["newton-cg","lbfgs","liblinear","sag","saga","newton-cholesky"] #default burda lbfg

In [20]:
params = dict(penalty=penalty, C=c_values, solver=solver)

In [21]:
params

{'penalty': ['l1', 'l2', 'elasticnet'],
 'C': [100, 10, 1, 0.1, 0.01],
 'solver': ['newton-cg',
  'lbfgs',
  'liblinear',
  'sag',
  'saga',
  'newton-cholesky']}

In [22]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold

In [23]:
cv = StratifiedKFold() #default 5
grid = GridSearchCV(estimator = model, param_grid = params, cv = cv, scoring = "accuracy", n_jobs = -1)
grid.fit(X_train,y_train)

250 fits failed out of a total of 450.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.13/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/opt/anaconda3/lib/python3.13/site-packages/sklearn/linear_model/_logistic.py", line 1193, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/opt/anaconda3/lib/python3.13/site-package

In [24]:
grid.best_params_

{'C': 0.1, 'penalty': 'l1', 'solver': 'liblinear'}

In [25]:
grid.best_score_

np.float64(0.7985714285714286)

In [26]:
y_pred = grid.predict(X_test)

In [27]:
score = accuracy_score(y_pred,y_test)
print("Score: ", score)
print(classification_report(y_pred,y_test))
print("Confusion matrix: \n",confusion_matrix(y_pred,y_test))

Score:  0.7866666666666666
              precision    recall  f1-score   support

           0       0.78      0.92      0.85        89
           1       0.71      0.75      0.73        92
           2       0.87      0.71      0.78       119

    accuracy                           0.79       300
   macro avg       0.79      0.80      0.79       300
weighted avg       0.79      0.79      0.79       300

Confusion matrix: 
 [[82  2  5]
 [15 69  8]
 [ 8 26 85]]


In [28]:
# one vs rest

In [29]:
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier

In [30]:
onevsonemodel = OneVsOneClassifier(LogisticRegression()) #içinde estimator istiyor

In [31]:
onevsrestmodel = OneVsRestClassifier(LogisticRegression()) #bunun içine de estimator gerekiyor

In [32]:
onevsonemodel.fit(X_train,y_train)
y_pred = onevsonemodel.predict(X_test)
score = accuracy_score(y_pred,y_test)
print("Score: ", score)
print(classification_report(y_pred,y_test))
print("Confusion matrix: \n",confusion_matrix(y_pred,y_test))

Score:  0.7966666666666666
              precision    recall  f1-score   support

           0       0.80      0.92      0.86        91
           1       0.74      0.76      0.75        95
           2       0.85      0.73      0.78       114

    accuracy                           0.80       300
   macro avg       0.80      0.80      0.80       300
weighted avg       0.80      0.80      0.80       300

Confusion matrix: 
 [[84  3  4]
 [12 72 11]
 [ 9 22 83]]


In [33]:
onevsrestmodel.fit(X_train,y_train)
y_pred = onevsrestmodel.predict(X_test)
score = accuracy_score(y_pred,y_test)
print("Score: ", score)
print(classification_report(y_pred,y_test))
print("Confusion matrix: \n",confusion_matrix(y_pred,y_test))

Score:  0.7833333333333333
              precision    recall  f1-score   support

           0       0.79      0.91      0.85        91
           1       0.71      0.73      0.72        94
           2       0.85      0.72      0.78       115

    accuracy                           0.78       300
   macro avg       0.78      0.79      0.78       300
weighted avg       0.79      0.78      0.78       300

Confusion matrix: 
 [[83  2  6]
 [16 69  9]
 [ 6 26 83]]
