In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from joblib import dump

# Step 1: Load the dataset (Assuming the labeled flow data is in CSV format)
data = pd.read_csv('cleaned_data.csv')

# Step 2: Inspect and Preprocess Data (Dropping unnecessary columns, handling missing values, etc.)
data = data.drop(columns=['Timestamp'], errors='ignore')  # Modify based on dataset structure

# Step 3: Separate features (X) and target labels (y)
X = data.drop('label', axis=1)  # Features
y = data['label']  # Target (Normal or Attack)

# Step 4: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 5: Train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Step 6: Evaluate the model
y_pred = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Random Forest Model Accuracy: {accuracy * 100:.2f}%')

# Step 7: Save the trained model for later use
dump(rf_model, 'random_forest_iot23_model.joblib')

# Step 8: Save the processed data into a CSV file for further analysis or reproducibility
X_train.to_csv('iot23_processed_train_data.csv', index=False)
X_test.to_csv('iot23_processed_test_data.csv', index=False)


Random Forest Model Accuracy: 81.43%


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from google.colab import files
files.download('random_forest_iot23_model.joblib')
files.download('iot23_processed_train_data.csv')
files.download('iot23_processed_test_data.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import pandas as pd

# After Step 6: Evaluate the model
y_pred = rf_model.predict(X_test)

# Create a DataFrame for results
results_df = X_test.copy()  # Copy the test features
results_df['True Label'] = y_test.reset_index(drop=True)  # True labels
results_df['Predicted Label'] = y_pred  # Predictions

# Save the results to a CSV file
results_file_path = 'iot23_results.csv'
results_df.to_csv(results_file_path, index=False)

# Optional: Print some results to check
print(results_df.head())


       duration  orig_bytes  resp_bytes  missed_bytes  orig_pkts  \
42995  3.155947           0           0           0.0        3.0   
17812  2.999841           0           0           0.0        3.0   
12377  2.997684           0           0           0.0        3.0   
44381  0.000465          29          45           0.0        1.0   
19241  2.996074           0           0           0.0        3.0   

       orig_ip_bytes  resp_pkts  resp_ip_bytes  proto_icmp  proto_tcp  ...  \
42995          180.0        0.0            0.0           0          1  ...   
17812          180.0        0.0            0.0           0          1  ...   
12377          180.0        0.0            0.0           0          1  ...   
44381           57.0        1.0           73.0           0          0  ...   
19241          180.0        0.0            0.0           0          1  ...   

       conn_state_RSTRH  conn_state_S0  conn_state_S1  conn_state_S2  \
42995                 0              1            

In [None]:
from google.colab import files
files.download('iot23_results.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from joblib import dump

# Step 1: Load the dataset
data = pd.read_csv('iot23_combined.csv')  # Ensure this is the correct path to your dataset

# Step 2: Drop unnecessary columns if any
data = data.drop(columns=['Timestamp'], errors='ignore')  # Modify based on dataset structure

# Separate features (X) and target labels (y)
X = data.drop('label', axis=1)  # Features
y = data['label']  # Target (Normal or Attack)

# Step 3: Identify categorical columns
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()

# Step 4: Create a Column Transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', X.select_dtypes(include=['int64', 'float64']).columns),  # Numeric features pass through
        ('cat', OneHotEncoder(), categorical_cols)  # Apply One-Hot Encoding to categorical features
    ])

# Step 5: Create a Pipeline that first transforms the data then fits the model
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Step 6: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 7: Train the model
pipeline.fit(X_train, y_train)

# Step 8: Evaluate the model
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Random Forest Model Accuracy: {accuracy * 100:.2f}%')

# Step 9: Create a DataFrame for results with only important columns
important_columns = [
    'ts',
    'id.orig_h',
    'duration',
    'orig_bytes',
    'resp_bytes',
    'missed_bytes',
    'orig_pkts',
    'orig_ip_bytes',
    'resp_pkts',
    'resp_ip_bytes',
    'label',
    'proto_icmp',
    'proto_tcp',
    'proto_udp',
    'conn_state_SF',
    'conn_state_REJ',
    'conn_state_RSTO',
    'conn_state_S1',
    'conn_state_S0'
]

# Create the results DataFrame
results_df = pd.DataFrame({
    'True Label': y_test.reset_index(drop=True),
    'Predicted Label': y_pred
})

# Include the important features from X_test
results_df = pd.concat([X_test[important_columns].reset_index(drop=True), results_df], axis=1)

# Step 10: Save the results to a CSV file
results_file_path = 'important_results.csv'
results_df.to_csv(results_file_path, index=False)

# Step 11: Save the trained model for later use
dump(pipeline, 'random_forest_iot23_model.joblib')

# Optional: Print some results to check
print(results_df.head())


ValueError: Found unknown categories ['223.32.52.214', '89.14.63.89', '45.202.33.75', '213.219.200.149', '64.111.83.105', '192.72.62.73', '24.165.148.231', '69.163.134.27', '200.56.101.169', '61.125.141.34', '38.64.137.227', '121.41.16.177', '191.130.209.104', '218.248.160.166', '173.244.96.250', '66.165.163.178', '45.112.246.17', '200.2.162.122', '193.200.241.216', '203.80.217.46', '82.232.137.29', '181.24.133.99', '37.236.121.123', '149.172.45.234', '203.249.77.1', '174.144.241.185', '178.192.31.3', '58.160.249.10', '42.56.109.169', '128.109.9.53', '130.81.21.240', '161.246.231.2', '216.218.222.230', '182.163.62.19', '101.29.12.13', '103.70.108.1', '211.175.185.110', '195.1.208.169', '213.131.110.26', '104.203.116.205', '115.2.199.242', '112.190.237.158', '190.19.196.169', '173.151.74.200', '37.158.210.116', '182.72.26.5', '67.129.123.118', '1.208.92.138', '198.202.27.3', '141.101.178.40', '78.110.192.158', '36.236.162.86', '200.24.33.150', '126.178.165.105', '213.248.86.150', '103.253.52.217', '197.35.63.164', '156.240.52.191', '130.243.128.19', '50.226.25.130', '86.43.12.86', '206.59.174.169', '112.215.81.77', '92.200.58.96', '61.237.123.66', '115.153.10.93', '60.246.245.210', '84.22.40.34', '183.233.35.38', '69.197.90.137', '181.176.254.57', '222.50.127.109', '206.214.33.2', '143.106.1.142', '46.249.120.8', '74.60.132.1', '204.115.170.252', '186.144.243.215', '103.104.3.188', '183.181.7.5', '197.234.155.1', '194.72.12.30', '100.89.121.18', '12.124.105.50', '95.84.218.150', '68.86.222.90', '179.84.62.224', '77.186.222.128', '66.17.93.34', '119.198.94.193', '78.35.234.86', '87.245.237.146', '80.169.160.38', '200.19.240.70', '194.141.252.62', '141.85.160.41', '104.246.172.254', '79.201.71.251', '60.251.221.229', '104.69.107.79', '37.126.147.182', '217.0.119.23', '206.126.16.198', '114.79.129.34', '59.127.169.65', '12.152.116.2', '194.184.131.101', '217.80.239.4', '59.18.32.110', '115.238.151.95', '96.239.110.16', '104.168.94.96', '200.24.33.82', '61.76.33.242', '144.223.20.74', '88.87.179.9', '170.82.219.242', '175.253.187.1', '172.81.130.1', '218.248.235.198', '211.170.34.170', '179.11.193.165', '84.135.145.176', '83.169.172.98', '60.139.169.118', '86.101.244.31', '212.174.62.144', '58.160.249.6', '61.233.15.197', '38.122.91.74', '77.217.131.211', '123.212.119.157', '104.151.16.140', '106.69.217.173', '101.171.215.147', '1.208.154.230', '112.119.176.77', '117.150.11.36', '58.160.252.6', '179.17.116.72', '175.234.221.52', '195.68.152.178', '45.61.10.17', '82.95.108.102', '120.43.120.194', '41.36.91.84', '116.105.55.243', 'fe80::ad:a9ad:1b82:2e67', '218.192.243.170', '88.4.252.2', '174.2.75.30', '69.74.153.241', '2.201.158.137', '188.110.164.151', '92.196.111.190', '121.119.97.21', '91.89.48.93', '49.77.97.11', '67.215.180.154', '46.46.160.90', '83.246.194.199', '68.85.215.122', '94.223.134.140', '208.72.75.58', '47.63.193.68', '69.51.129.12', '69.53.246.253', '45.35.43.174', '202.163.72.198', '31.167.39.16', '198.210.117.131', '141.28.15.25', '121.138.240.7', '221.225.160.3', '122.208.112.78', '74.58.193.197', '154.202.68.110', '84.21.1.233', '148.240.221.134', '41.210.243.218', '190.122.6.18', '167.61.120.196', '104.227.2.25', '210.227.11.70', '148.206.63.249', '187.218.193.82', '129.232.213.130', '82.56.210.160', '144.121.66.101', '210.148.39.102', '207.171.244.2', '185.96.133.56', '212.225.141.204', '188.121.240.10', '84.94.140.220', '106.44.35.36', '113.123.199.229', '76.224.255.254', '216.66.83.110', '69.39.239.187', '213.149.186.23', '154.73.189.235', '45.118.135.209', '91.37.217.223', '62.40.124.218', '212.68.66.179', '223.99.161.210', '220.170.255.240', '31.208.26.202', '173.225.52.211', '90.154.160.2', '124.171.17.99', '42.235.233.45', '45.200.86.95', '121.189.11.237', '183.128.201.3', '80.124.133.210', '125.19.76.37', '182.76.215.242', '23.89.54.156', '95.61.111.138', '196.22.54.46', '91.146.152.88', '189.8.89.38', '148.247.49.142', '94.177.32.222', '162.144.240.27', '23.45.3.70', '202.39.64.222', '178.5.94.180', '60.177.16.83', '112.65.132.205', '60.7.18.129', '1.189.59.106', '31.207.86.142', '41.207.178.101', '91.206.129.134', '154.204.108.250', '209.58.93.21', '188.43.5.189', '221.148.74.162', '100.72.251.10', '118.100.53.142', '189.124.37.224', '178.2.102.212', '172.199.253.2', '182.16.175.246', '176.61.137.48', '109.229.44.34', '123.141.220.218', '95.188.87.8', '186.248.100.14', '85.185.214.166', '23.225.225.37', '71.254.168.130', '183.133.67.38', '178.4.137.211', '119.193.241.30', '148.247.49.150', '154.70.140.12', '202.128.2.82', '189.209.209.146', '174.117.53.125', '69.193.68.1', '119.165.244.234', '41.191.99.198', '12.247.72.10', '193.112.46.74', '217.231.227.36', '4.14.28.214', '91.189.218.26', '112.192.8.148', '184.146.4.1', '174.93.40.1', '178.16.117.252', '203.144.128.80', '168.187.124.98', '221.180.20.178', '185.229.192.80', '198.255.31.242', '87.140.41.9', '24.164.105.214', '184.206.183.84', '50.233.82.98', '219.129.118.89', '69.61.18.166', '150.199.51.30', '77.230.144.129', '211.137.211.157', '114.94.91.231', '62.46.46.71', '81.173.152.54', '69.73.128.158', '61.237.5.49', '213.165.36.81', '141.45.176.112', '180.253.170.25', '178.7.89.93', '196.219.76.152', '66.252.65.54', '202.112.31.237', '89.228.14.214', '67.51.92.66', '118.67.64.140', '59.11.46.74', '130.237.0.4', '187.73.155.131', '178.5.186.50', '91.248.137.138', '114.205.89.194', '207.152.68.1', '191.170.244.197', '121.224.114.220', '185.87.159.28', '82.144.193.90', '101.169.109.138', '202.73.96.73', '38.104.245.6', '124.122.79.198', '141.53.2.3', '186.225.142.229', '59.93.34.49', '182.237.16.5', '196.11.63.30', '68.187.49.55', '179.127.144.18', '138.68.64.195', '141.138.138.26', '213.242.197.6', '121.98.208.1', '196.201.222.234', '61.19.23.17', '69.128.252.58', '69.35.220.57', '31.17.107.253', '149.6.150.166', '143.0.184.12', '195.149.237.62', '41.232.124.7', '101.228.218.23', '81.180.26.118', '196.30.1.23', '80.101.128.100', '69.76.239.204', '175.252.168.53', '111.226.192.113', '24.164.241.158', '77.27.96.49', '179.60.230.2', '91.249.40.172', '157.7.221.75', '149.11.45.114', '36.37.76.133', '31.16.93.140', '93.138.57.237', '31.150.56.47', '190.3.67.106', '76.169.226.159', '196.80.175.138', '218.237.28.214', '217.88.100.149', '60.30.145.10', '199.229.229.190', '5.9.134.116', '64.80.122.9', '191.169.199.177', '62.115.118.225', '82.41.134.33', '23.105.217.183', '124.197.128.70', '92.215.213.255', '159.148.168.225', '120.234.59.145', '119.5.84.91', '14.111.31.178', '199.167.206.250', '209.58.159.5', '142.147.56.121', '178.38.189.19', '138.201.202.153', '182.73.85.78', '59.179.245.34', '189.213.107.169', '120.209.247.138', '169.229.59.242', '178.10.217.154', '199.91.230.146', '87.20.40.162', '78.48.11.4', '134.245.100.110', '216.58.37.193', '45.79.96.186', '169.237.240.4', '42.56.80.70', '61.213.145.146', '196.19.65.60', '66.2.182.217', '182.180.150.66', '112.72.255.98', '208.66.159.166', '177.142.228.1', '197.245.241.8', '136.62.160.176', '179.147.99.202', '87.37.139.49', '101.108.91.248', '64.59.151.46', '103.199.244.62', '79.226.6.144', '156.227.94.40', '114.190.35.45', '190.119.252.169', '137.175.224.200', '60.139.25.252', '73.160.194.119', '187.33.226.138', '93.222.223.138', '143.208.31.83', '62.145.75.4', '141.101.206.2', '217.110.96.130', '68.119.96.145', '212.75.110.86', '42.97.222.91', '160.247.3.254', '211.73.14.94', '156.246.8.110', '177.53.223.198', '88.196.18.138', '61.82.195.142', '186.216.128.86', '177.234.0.1', '147.178.220.107', '150.214.220.218', '124.94.134.65', '104.118.152.60', '188.1.230.50', '178.119.177.166', '134.3.103.27', '209.226.236.1', '41.71.71.250', '112.188.46.226', '196.192.102.10', '216.11.11.218', '161.184.0.198', '151.41.192.75', '69.253.127.215', '210.92.20.17', '118.212.114.139', '109.236.95.111', '162.212.209.134', '2.115.243.69', '31.165.92.239', '177.101.55.50', '45.116.156.196', '181.191.57.52', '62.91.0.105', '68.87.239.174', '116.102.79.112', '63.235.176.114', '49.255.198.117', '31.169.104.5', '115.77.90.61', '106.120.25.66', '196.52.17.127', '68.38.220.6', '59.108.160.209', '216.194.115.30', '141.45.3.4', '213.165.80.79', '67.231.146.211', '79.132.136.210', '31.41.56.33', '86.16.131.84', '107.173.125.205', '119.95.114.0', '104.202.194.250', '94.53.1.143', '64.59.150.178', '14.210.109.75', '32.96.114.34', '187.73.67.239', '84.129.82.160', '187.9.156.30', '79.100.201.13', '46.19.15.130', '217.64.4.173', '211.131.73.91', '83.168.243.156', '90.149.110.185', '151.245.214.134', '193.160.35.19', '196.46.112.1', '112.226.121.44', '92.211.162.30', '114.202.116.45', '50.27.149.204', '197.49.101.54', '143.130.18.2', '121.99.20.1', '108.190.179.77', '193.93.216.95', '149.28.108.28', '5.54.251.78', '31.168.55.9', '129.143.60.117', '201.48.42.113', '124.65.239.50', '211.231.187.157', '178.209.249.238', '87.177.34.89', '69.27.177.8', '24.28.89.162', '217.170.253.202', '185.117.8.238', '91.137.142.211', '141.0.165.96', '196.234.238.15', '88.87.0.253', '37.209.69.199', '1.235.81.67', '147.155.253.4', '24.12.83.246', '212.73.200.150', '218.248.255.122', '138.121.73.134', '196.75.251.190', '31.204.253.204', '69.112.118.59', '115.72.111.72', '64.250.48.138', '88.133.133.58', '156.252.213.48', '196.207.210.182', '221.192.20.141', '217.253.96.230', '103.239.240.182', '104.155.173.15', '88.79.8.123', '54.183.157.195', '185.103.81.151', '216.234.104.182', '200.61.128.247', '31.31.51.135', '196.122.138.235', '192.168.69.241', '89.11.132.99', '67.227.104.194', '173.219.152.219', '5.167.137.137', '178.39.100.90', 'fe80::82e6:50ff:fe21:867c', '198.207.22.117', '93.49.28.2', '125.23.132.177', '161.184.255.98', '92.106.50.44', '173.242.255.118', '79.139.128.1', '185.109.81.1', '93.106.99.46', '1.208.83.174', '108.125.150.127', '36.75.246.44', '218.248.235.237', '105.22.64.210', '156.248.77.187', '96.20.176.90', '182.176.88.137', '73.12.128.134', '203.50.20.40', '38.104.251.78', '31.192.72.4', '121.182.168.238', '200.160.121.173', '174.45.39.127', '179.75.45.184', '193.239.73.14', '167.57.110.102', '173.122.135.151', '157.14.233.162', '99.243.17.241', '139.130.25.98', '12.208.131.2', '86.61.188.242', '173.197.17.45', '86.43.11.66', '83.181.222.153', '121.181.90.14', '92.228.144.90', '199.203.51.253', '69.35.96.28', '104.121.155.222', '88.74.180.94', '24.145.210.54', '190.199.162.129', '193.17.11.250', '192.168.69.136', '188.248.228.192', '162.225.210.49', '197.13.3.22', '115.6.159.17', '210.155.132.61', '69.159.184.1', '181.214.170.166', '220.167.88.189', '58.123.178.58', '106.68.158.230', '112.174.246.150', '69.144.26.178', '74.212.235.94', '220.211.6.224', '129.143.19.81', '220.141.5.42', '204.111.55.66', '84.96.161.17', '161.132.35.130', '39.77.179.209', '23.192.91.5', 'fe80::1c5c:213d:7cf8:890', '79.98.41.246', '121.229.251.73', '61.247.97.201', '218.159.189.206', '188.43.3.156', '178.2.69.7', '45.32.50.231', '104.113.87.147', '93.238.11.191', '97.72.252.90', '31.150.43.31', '177.152.48.142', '158.64.14.230', '148.66.65.147', '92.79.244.72', '210.216.6.118', '71.13.187.254', '92.198.62.28', '84.164.60.150', '119.212.97.14', '89.107.160.25', '181.188.30.70', '91.244.144.1', '91.248.36.13', '104.144.236.217', '61.86.16.86', '123.141.250.6', '23.224.135.58', '124.205.152.233', '37.114.44.151', '91.177.80.1', '200.123.36.245', '199.127.196.107', '196.22.48.17', '223.199.60.90', '23.213.59.231', '64.243.159.134', '159.226.254.62', '206.172.233.1', '124.72.158.89', '67.14.162.90', '211.179.140.46', '184.64.220.213', '115.192.27.218', '69.74.152.126', '60.94.146.43', '223.32.212.176', '211.206.84.54', '182.71.180.214', '69.80.186.2', '191.37.68.118', '177.63.152.126', '162.144.240.23', '79.96.144.70', '31.150.195.142', '173.219.225.246', '80.53.97.241', '218.248.235.141', '191.53.228.186', '59.180.210.202', '205.207.26.1', '120.69.94.77', '58.229.22.114', '81.18.220.195', '123.141.250.170', '89.221.43.201', '163.209.188.110', '148.212.16.2', '123.195.58.59', '148.231.239.111', '109.201.100.5', '148.243.122.250', '139.201.151.42', '185.16.37.46', '147.255.143.10', '60.128.203.156', '69.119.84.8', '66.86.218.47', '62.80.222.101', '41.34.175.159', '169.130.82.221', '60.172.68.98', '162.127.1.158', '23.235.132.146', '61.72.35.138', '121.97.139.1', '120.80.234.182', '69.206.38.125', '197.92.8.15', '126.77.131.14', '91.41.175.77', '185.192.59.11', '210.241.31.233', '77.48.21.161', '178.32.106.89', '154.207.11.212', '31.17.137.122', '31.13.228.201', '210.7.38.46', '118.113.168.131', '46.218.103.17', '196.114.70.251', '107.172.224.88', '216.55.50.34', '138.0.40.157', '112.25.137.70', '218.21.155.109', '220.82.125.10', '221.147.67.154', '61.207.32.101', '121.228.140.249', '1.58.71.50', '200.201.173.72', '87.234.10.54', '221.120.249.1', '24.187.131.112', '103.54.221.150', '118.23.25.214', '91.96.8.231', '107.174.166.23', '210.36.6.125', '181.129.40.189', '195.219.241.26', '182.50.241.202', '141.45.2.145', '188.154.95.62', '197.220.196.200', '2.200.171.61', '175.230.26.184', '61.147.100.69', '87.37.138.114', '42.6.51.71', '181.6.15.34', '156.249.235.13', '101.4.115.250', '87.245.212.24', '91.214.82.194', '218.248.235.129', '192.144.99.119', '80.88.97.1', '217.69.72.137', '218.207.189.85', '179.145.226.226', '101.171.45.238', '66.43.35.5', '80.136.100.201', '201.7.77.122', '171.25.220.30', '176.119.240.2', '69.139.195.2', '47.21.13.92', '38.104.215.250', '62.117.4.13', '123.224.167.16', '212.3.235.9', '139.5.17.114', '121.150.199.78', '81.43.37.241', '87.176.193.101', '149.233.165.23', '122.7.37.147', '82.202.133.165', '32.96.114.37', '179.63.252.233', '181.163.88.46', '211.142.176.202', '138.36.2.229', '196.46.40.89', '89.138.90.138', '185.19.19.73', '31.16.67.18', '194.85.224.3', '176.122.159.211', '77.75.101.145', '68.85.209.254', '219.251.93.42', '100.64.8.34', '81.201.176.50', '27.68.242.49', '193.136.134.150', '171.102.250.49', '187.87.128.1', '109.193.139.104', '194.190.254.162', '98.158.106.155', '207.164.139.166', '118.139.178.118', '92.209.117.245', '112.174.173.14', '31.42.160.10', '202.104.198.33', '12.153.11.233', '203.233.12.234', '189.200.240.1', '220.134.171.27', '194.8.80.254', '125.161.155.32', '111.162.91.147', '172.252.171.67', '41.206.192.227', '84.199.193.14', '196.245.185.15', '162.144.240.14', '27.85.208.133', '126.177.252.96', '219.141.138.22', '219.118.80.4', '68.26.69.87', '103.224.5.19', '179.193.232.132', '204.88.159.246', '179.127.160.238', '221.182.68.227', '46.240.134.254', '154.73.227.118', '122.99.240.18', '196.162.35.241', '175.224.239.6', '222.227.1.190', '71.149.39.91', '41.162.22.1', '79.115.47.95', '179.125.172.218', '24.96.153.186', '94.75.98.50', '207.232.88.193', '185.6.202.12', '203.222.39.162', '96.21.144.21', '98.159.217.42', '103.3.177.201', '70.6.52.195', '71.0.159.225', '24.10.185.219', '62.87.208.141', '197.48.0.179', '107.175.173.163', '185.74.25.254', '58.222.33.8', '165.231.172.15', '183.101.71.174', '161.24.200.2', '24.30.175.34', '196.6.103.26', '190.146.228.84', '135.26.224.37', '221.141.211.13', '213.248.7.141', '152.179.0.198', '187.23.64.3', '149.14.101.138', '85.20.255.97', '139.81.161.154', '62.214.104.206', '217.71.103.226', '96.127.249.130', '172.250.3.158', '24.199.189.145', '89.13.78.214', '191.37.79.1', '193.60.90.66', '220.190.233.160', '64.59.170.186', '168.224.170.93', '200.61.128.226', '183.6.195.234', '89.221.34.55', '31.215.102.132', '218.248.255.2', '124.194.112.9', '138.122.14.187', '23.108.56.7', '117.239.39.73', '161.45.254.253', '195.222.19.122', '98.30.201.53', '91.2.150.208', '92.213.170.97', '69.120.168.204', '193.41.156.23', '92.192.81.48', '38.142.208.9', '217.87.121.81', '218.248.255.21', '193.51.105.150', '176.46.242.174', '79.135.128.35', '124.106.70.226', '1.220.167.90', '197.230.98.188', '80.122.109.102', '5.56.18.54', '150.99.189.238', '138.36.180.1', '178.194.164.8', '109.167.211.6', '18.4.7.65', '47.63.91.18', '39.119.164.18', '122.236.39.82', '183.186.23.197', '77.23.92.96', '111.119.24.30', '38.142.186.75', '139.175.58.46', '86.103.132.164', '60.41.29.241', '195.3.66.134', '46.27.35.47', '212.248.55.21', '179.75.86.2', '211.140.38.114', '87.178.143.33', '218.61.43.93', '210.211.95.106', '210.186.12.233', '31.150.176.118', '67.199.160.40', '176.192.225.242', '118.241.64.208', '154.24.3.142', '41.57.65.3', '154.208.94.207', '124.120.81.142', '80.144.173.189', '221.131.235.17', '217.111.28.78', '218.69.22.74', '155.159.0.3', '122.1.239.132', '169.130.168.233', '12.180.141.130', '68.244.199.86', '210.233.169.249', '148.251.64.182', '41.75.114.169', '103.26.247.49', '201.54.174.9', '74.213.161.121', '87.144.226.45', '218.147.74.142', '24.179.248.213', '68.86.149.186', '154.149.24.157', '208.69.43.177', '31.150.6.15', '63.218.56.10', '219.101.152.54', '121.140.47.250', '177.251.229.3', '1.208.88.150', '135.19.45.146', '124.229.157.191', '121.101.64.2'] in column 0 during transform

In [None]:
with open("/content/drive/MyDrive/dataset iot/CTU-IoT-Malware-Capture-1-1/bro/conn.log.labeled", "r") as file:
    for i in range(10):  # Adjust number of lines as needed
        print(f"Line {i+1}: {file.readline()}")

Line 1: #separator \x09

Line 2: #set_separator	,

Line 3: #empty_field	(empty)

Line 4: #unset_field	-

Line 5: #path	conn

Line 6: #open	2018-05-21-21-03-43

Line 7: #fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	proto	service	duration	orig_bytes	resp_bytes	conn_state	local_orig	local_resp	missed_bytes	history	orig_pkts	orig_ip_bytes	resp_pkts	resp_ip_bytes	tunnel_parents   label   detailed-label

Line 8: #types	time	string	addr	port	addr	port	enum	string	interval	count	count	string	bool	bool	count	string	count	count	count	count	set[string]   string   string

Line 9: 1525879831.015811	CUmrqr4svHuSXJy5z7	192.168.100.103	51524	65.127.233.163	23	tcp	-	2.999051	0	0	S0	-	-	0	S	3	180	0	0	(empty)   Malicious   PartOfAHorizontalPortScan

Line 10: 1525879831.025055	CH98aB3s1kJeq6SFOc	192.168.100.103	56305	63.150.16.171	23	tcp	-	-	-	-	S0	-	-	0	S	1	60	0	0	(empty)   Malicious   PartOfAHorizontalPortScan



In [None]:
import pandas as pd

# Read the labeled conn.log file into a DataFrame
file_path = "/content/drive/MyDrive/dataset iot/CTU-IoT-Malware-Capture-1-1/bro/conn.log.labeled"
df = pd.read_csv(file_path, sep="\t", skiprows=6)

# Print the first few rows of the DataFrame
print(df.head())

# Access specific columns
print(df["id.orig_h"])  # Access the 'id.orig_h' column

# Convert 'duration' column to numeric, coercing errors to NaN
df['duration'] = pd.to_numeric(df['duration'], errors='coerce')

# Drop rows with NaN in the 'duration' column
df = df.dropna(subset=['duration'])

# Filter rows based on conditions
filtered_df = df[df['duration'] > 10]  # Connections longer than 10 seconds

# Group and aggregate data
grouped_df = df.groupby("id.orig_h").agg({"duration": "mean"})  # Calculate average duration per source IP

# Save the filtered data to a CSV file
filtered_df.to_csv("/content/filtered_data.csv", index=False)

# Save the grouped data to a CSV file
grouped_df.to_csv("/content/grouped_data.csv")

print("Data saved successfully!")

  df = pd.read_csv(file_path, sep="\t", skiprows=6)


             #fields                  ts              uid id.orig_h  \
0             #types                time           string      addr   
1  1525879831.015811  CUmrqr4svHuSXJy5z7  192.168.100.103     51524   
2  1525879831.025055  CH98aB3s1kJeq6SFOc  192.168.100.103     56305   
3  1525879831.045045   C3GBTkINvXNjVGtN5  192.168.100.103     41101   
4  1525879832.016240   CDe43c1PtgynajGI6  192.168.100.103     60905   

         id.orig_p id.resp_h id.resp_p proto   service  duration  ...  \
0             port      addr      port  enum    string  interval  ...   
1   65.127.233.163        23       tcp     -  2.999051         0  ...   
2    63.150.16.171        23       tcp     -         -         -  ...   
3     111.40.23.49        23       tcp     -         -         -  ...   
4  131.174.215.147        23       tcp     -  2.998796         0  ...   

  conn_state local_orig local_resp missed_bytes history orig_pkts  \
0     string       bool       bool        count  string     count