In [2]:
'''
Purpose of this dataset: 
    Is to facilitate research, analysis, and development of intrusion detection systems,
    anomaly detection algorithms and other security mechanisms for substation networks using the Modbus protocol
    
Attack scenarios: 
    The dataset covers attacks conducted in three different scenarios: 
    attacks from devices external to the network, attacks from compromised IEDs and attacks
    from compromised Human-Machine Interfaces (HMIs). Each scenario generated specific logs 
    capturing the corresponding attack activity.

Atacks:
    Reconnaissance,
    query flooding, 
    loading payloads, 
    delay response, 
    modify length parameters, 
    false data injection, 
    stacking Modbus frames, 
    brute force write 
    baseline replay
    based on MITRE ICS ATT&CK framework
    
Data Format:
    Network captures: 
        The network captures are stored in PCAP (Packet Capture) format. 
        The captures are chunked into 100MB files, named in sequential order and each file represents
        a portion of the overall network traffic.
    Logs: 
        The logs generated by the attack tools and the trust model are stored in CSV (Comma-Separated Values) format.
        The logs are grouped by dates, and each record within the log files is timestamped, providing a chronological 
        view of the captured events.

IPs of the devices:

Secure IEDs
IED1A  185.175.0.4
IED4C  185.175.0.8
Normal IEDs
IED1B  185.175.0.5
Secure SCADA HMI  185.175.0.2
Normal SCADA HMI  185.175.0.3
Central Agent  185.175.0.6
Attacker  185.175.0.7
'''
'C:/Users/gabri/OneDrive/Área de Trabalho/5_periodo/SI/Modbus Dataset/Modbus Dataset/attack/compromised-scada/ied1b/ied1b-network-captures/vethc76bd3f-0.pcap'


'C:/Users/gabri/OneDrive/Área de Trabalho/5_periodo/SI/Modbus Dataset/Modbus Dataset/attack/compromised-scada/ied1b/ied1b-network-captures/vethc76bd3f-0.pcap'

In [4]:
import pandas as pd

#Carregando os pacotes em um dataframe do pandas
dfWideNetwork = pd.read_csv('wideNetwork.csv', header = None, names = ['Timestamp', 'PackLenght', 'SourceAddress', 'DestAddress', 'Protocol', 'TTL', 'SourcePort', 'DestPort', 'WindowSize', 'SequenceNumber', 'Reserved', 'Accurate', 'CWR', 'ECN_Echo', 'Urgent', 'Acknowledgement', 'Push', 'Reset', 'Syn', 'Attacked', 'AttackType'])


  dfWideNetwork = pd.read_csv('wideNetwork.csv', header = None, names = ['Timestamp', 'PackLenght', 'SourceAddress', 'DestAddress', 'Protocol', 'TTL', 'SourcePort', 'DestPort', 'WindowSize', 'SequenceNumber', 'Reserved', 'Accurate', 'CWR', 'ECN_Echo', 'Urgent', 'Acknowledgement', 'Push', 'Reset', 'Syn', 'Attacked', 'AttackType'])


In [6]:
dfWideNetwork.head()

Unnamed: 0,Timestamp,PackLenght,SourceAddress,DestAddress,Protocol,TTL,SourcePort,DestPort,WindowSize,SequenceNumber,...,Accurate,CWR,ECN_Echo,Urgent,Acknowledgement,Push,Reset,Syn,Attacked,AttackType
0,2023-02-01 03:49:36.443,66,185.175.0.3,185.175.0.4,6,64,53496,502,64256,1235332397,...,False,False,False,False,True,False,False,False,False,
1,2023-02-01 03:49:36.443,66,185.175.0.3,185.175.0.4,6,64,53496,502,64256,1235332397,...,False,False,False,False,True,False,False,False,False,
2,2023-02-01 03:49:36.459,66,185.175.0.4,185.175.0.3,6,64,502,53494,65152,1919784539,...,False,False,False,False,True,False,False,False,False,
3,2023-02-01 03:49:36.464,74,185.175.0.3,185.175.0.4,6,64,53498,502,64240,4126181854,...,False,False,False,False,False,False,False,True,False,
4,2023-02-01 03:49:36.464,74,185.175.0.4,185.175.0.3,6,64,502,53498,65160,3143922994,...,False,False,False,False,True,False,False,True,False,


In [7]:
dfWideNetwork.columns

Index(['Timestamp', 'PackLenght', 'SourceAddress', 'DestAddress', 'Protocol',
       'TTL', 'SourcePort', 'DestPort', 'WindowSize', 'SequenceNumber',
       'Reserved', 'Accurate', 'CWR', 'ECN_Echo', 'Urgent', 'Acknowledgement',
       'Push', 'Reset', 'Syn', 'Attacked', 'AttackType'],
      dtype='object')

In [8]:
dfWideNetwork.describe()

Unnamed: 0,PackLenght,Protocol,TTL,SourcePort,DestPort,WindowSize,SequenceNumber
count,90942.0,90942.0,90942.0,90942.0,90942.0,90942.0,90942.0
mean,69.562996,6.0,64.0,29478.913296,23251.316751,61519.273724,2144228000.0
std,12.538257,0.0,0.0,26485.12869,26501.02213,13915.817057,1236916000.0
min,54.0,6.0,64.0,502.0,502.0,0.0,454541.0
25%,66.0,6.0,64.0,502.0,502.0,64256.0,1079583000.0
50%,66.0,6.0,64.0,34844.0,502.0,64256.0,2151377000.0
75%,74.0,6.0,64.0,56476.0,55464.0,65152.0,3210437000.0
max,342.0,6.0,64.0,60998.0,60998.0,65280.0,4294705000.0


In [14]:
 ! pip install numpy




[notice] A new release of pip is available: 23.3.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
df = dfWideNetwork

# Drop columns that might not be useful for the model
X = df.drop(columns=['Timestamp', 'Attacked', 'AttackType'])
y = df['Attacked']

# Split the dataset while keeping the same proportion of values in the target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Create and train the XGBoost model
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)

# Make predictions
y_pred = xgb_model.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')


ImportError: 

IMPORTANT: PLEASE READ THIS FOR ADVICE ON HOW TO SOLVE THIS ISSUE!

Importing the numpy C-extensions failed. This error can happen for
many reasons, often due to issues with your setup or how NumPy was
installed.

We have compiled some common reasons and troubleshooting tips at:

    https://numpy.org/devdocs/user/troubleshooting-importerror.html

Please note and check the following:

  * The Python version is: Python3.10 from "C:\Users\gabri\AppData\Local\Programs\Python\Python310\python.exe"
  * The NumPy version is: "1.23.4"

and make sure that they are the versions you expect.
Please carefully study the documentation linked above for further help.

Original error was: DLL load failed while importing _multiarray_umath: Não foi possível encontrar o módulo especificado.


ImportError: numpy._core.multiarray failed to import

In [5]:
attacked_rows = dfWideNetwork.loc[dfWideNetwork['Attacked'] == True]

# Mostrar as linhas selecionadas
print(attacked_rows)

                     Timestamp  PackLenght SourceAddress  DestAddress  \
5065   2023-02-01 03:51:37.648          66   185.175.0.4  185.175.0.7   
5066   2023-02-01 03:51:37.648          78   185.175.0.7  185.175.0.4   
5067   2023-02-01 03:51:37.648          54   185.175.0.4  185.175.0.7   
6992   2023-02-01 03:52:46.776          78   185.175.0.7  185.175.0.4   
6993   2023-02-01 03:52:46.776          66   185.175.0.4  185.175.0.7   
6994   2023-02-01 03:52:46.776          66   185.175.0.7  185.175.0.4   
17213  2023-02-01 03:58:25.357          74   185.175.0.7  185.175.0.4   
17214  2023-02-01 03:58:25.357          74   185.175.0.4  185.175.0.7   
17215  2023-02-01 03:58:25.357          66   185.175.0.7  185.175.0.4   
17216  2023-02-01 03:58:25.357          78   185.175.0.7  185.175.0.4   
17217  2023-02-01 03:58:25.357          66   185.175.0.4  185.175.0.7   
17218  2023-02-01 03:58:25.358          66   185.175.0.7  185.175.0.4   
19525  2023-02-01 03:59:28.607          74   185.17

In [11]:
print(dfWideNetwork.iloc[6993])

Timestamp          2023-02-01 03:52:46.776
PackLenght                              66
SourceAddress                  185.175.0.4
DestAddress                    185.175.0.7
Protocol                                 6
TTL                                     64
SourcePort                             502
DestPort                             55038
WindowSize                           65152
SequenceNumber                  3316525133
Reserved                             False
Accurate                             False
CWR                                  False
ECN_Echo                             False
Urgent                               False
Acknowledgement                       True
Push                                 False
Reset                                False
Syn                                  False
Attacked                             False
AttackType                             NaN
Name: 6993, dtype: object


In [1]:
dfWideNetwork

NameError: name 'dfWideNetwork' is not defined