In [1]:
import pandas as pd 
from datetime import datetime
import numpy as np

# Prep data

In [2]:
df =pd.read_csv('synthetic_honeynet_logs.csv')

In [3]:
df.head()

Unnamed: 0,timestamp,log_type,client_ip,event
0,2025-03-29T12:19:00Z,auth,192.168.1.11,Accepted password for root from 192.168.1.11 p...
1,2025-03-29T10:46:00Z,access,192.168.1.11,GET /config.php HTTP/1.1 - 403
2,2025-03-29T12:13:00Z,cron,203.0.113.5,CRON job started: wget malicious.sh
3,2025-03-29T12:38:00Z,cron,203.0.113.5,CRON executed reverse_shell.py
4,2025-03-29T12:38:00Z,access,192.168.1.10,GET /admin HTTP/1.1 - 403


In [4]:
df['timestamp'] = pd.to_datetime(df['timestamp'])

In [5]:
df.head()

Unnamed: 0,timestamp,log_type,client_ip,event
0,2025-03-29 12:19:00+00:00,auth,192.168.1.11,Accepted password for root from 192.168.1.11 p...
1,2025-03-29 10:46:00+00:00,access,192.168.1.11,GET /config.php HTTP/1.1 - 403
2,2025-03-29 12:13:00+00:00,cron,203.0.113.5,CRON job started: wget malicious.sh
3,2025-03-29 12:38:00+00:00,cron,203.0.113.5,CRON executed reverse_shell.py
4,2025-03-29 12:38:00+00:00,access,192.168.1.10,GET /admin HTTP/1.1 - 403


In [6]:
df['date'] = df['timestamp'].dt.date
df['time'] = df['timestamp'].dt.time

In [7]:
df.head()

Unnamed: 0,timestamp,log_type,client_ip,event,date,time
0,2025-03-29 12:19:00+00:00,auth,192.168.1.11,Accepted password for root from 192.168.1.11 p...,2025-03-29,12:19:00
1,2025-03-29 10:46:00+00:00,access,192.168.1.11,GET /config.php HTTP/1.1 - 403,2025-03-29,10:46:00
2,2025-03-29 12:13:00+00:00,cron,203.0.113.5,CRON job started: wget malicious.sh,2025-03-29,12:13:00
3,2025-03-29 12:38:00+00:00,cron,203.0.113.5,CRON executed reverse_shell.py,2025-03-29,12:38:00
4,2025-03-29 12:38:00+00:00,access,192.168.1.10,GET /admin HTTP/1.1 - 403,2025-03-29,12:38:00


# Checking Informations

In [8]:
df['log_type'].unique()

array(['auth', 'access', 'cron'], dtype=object)

In [9]:
df['event'].unique()

array(['Accepted password for root from 192.168.1.11 port 54321 ssh2',
       'GET /config.php HTTP/1.1 - 403',
       'CRON job started: wget malicious.sh',
       'CRON executed reverse_shell.py', 'GET /admin HTTP/1.1 - 403',
       'POST /login.php HTTP/1.1 - 200',
       'new user: name=attacker, UID=1001, GID=1001',
       'Failed password for invalid user admin from 203.0.113.5 port 45678 ssh2',
       'CRON job added by attacker', 'GET /index.html HTTP/1.1 - 200',
       'Accepted password for root from 203.0.113.5 port 54321 ssh2',
       'Failed password for invalid user admin from 192.168.1.10 port 45678 ssh2',
       'Accepted password for root from 192.168.1.10 port 54321 ssh2',
       'Failed password for invalid user admin from 192.168.1.11 port 45678 ssh2',
       'GET /phpmyadmin/ HTTP/1.1 - 404',
       'Failed password for invalid user admin from 10.0.0.8 port 45678 ssh2'],
      dtype=object)

In [10]:
df['client_ip'].unique()

array(['192.168.1.11', '203.0.113.5', '192.168.1.10', '10.0.0.8'],
      dtype=object)

In [11]:
df['date'].nunique()

1

In [12]:
group_df = df.groupby(['date', 'client_ip']).agg({'event': 'count'}).reset_index()
group_df.columns = ['date', 'client_ip', 'event_count']

In [13]:
group_df

Unnamed: 0,date,client_ip,event_count
0,2025-03-29,10.0.0.8,21
1,2025-03-29,192.168.1.10,30
2,2025-03-29,192.168.1.11,22
3,2025-03-29,203.0.113.5,27


# IP 192.168.1.11 Logs

In [14]:
df[(df['client_ip'] == '192.168.1.11') & (df['log_type'] == 'auth')]['event'].to_list()

['Accepted password for root from 192.168.1.11 port 54321 ssh2',
 'Failed password for invalid user admin from 192.168.1.11 port 45678 ssh2',
 'Failed password for invalid user admin from 192.168.1.11 port 45678 ssh2',
 'Accepted password for root from 192.168.1.11 port 54321 ssh2',
 'new user: name=attacker, UID=1001, GID=1001',
 'Accepted password for root from 192.168.1.11 port 54321 ssh2',
 'new user: name=attacker, UID=1001, GID=1001']

In [15]:
df[(df['client_ip'] == '192.168.1.11') & (df['log_type'] == 'access')]['event'].to_list()

['GET /config.php HTTP/1.1 - 403',
 'GET /index.html HTTP/1.1 - 200',
 'GET /config.php HTTP/1.1 - 403',
 'GET /admin HTTP/1.1 - 403',
 'GET /phpmyadmin/ HTTP/1.1 - 404',
 'GET /index.html HTTP/1.1 - 200',
 'GET /phpmyadmin/ HTTP/1.1 - 404']

In [16]:
df[(df['client_ip'] == '192.168.1.11') & (df['log_type'] == 'cron')]['event'].to_list()

['CRON job added by attacker',
 'CRON executed reverse_shell.py',
 'CRON job added by attacker',
 'CRON executed reverse_shell.py',
 'CRON job started: wget malicious.sh',
 'CRON job started: wget malicious.sh',
 'CRON executed reverse_shell.py',
 'CRON job started: wget malicious.sh']

# IP 203.0.113.5 Logs

In [17]:
df[(df['client_ip'] == '203.0.113.5') & (df['log_type'] == 'auth')]['event'].to_list()

['Failed password for invalid user admin from 203.0.113.5 port 45678 ssh2',
 'Accepted password for root from 203.0.113.5 port 54321 ssh2',
 'new user: name=attacker, UID=1001, GID=1001',
 'Failed password for invalid user admin from 203.0.113.5 port 45678 ssh2',
 'new user: name=attacker, UID=1001, GID=1001',
 'Accepted password for root from 203.0.113.5 port 54321 ssh2',
 'Failed password for invalid user admin from 203.0.113.5 port 45678 ssh2',
 'Failed password for invalid user admin from 203.0.113.5 port 45678 ssh2',
 'Failed password for invalid user admin from 203.0.113.5 port 45678 ssh2',
 'Accepted password for root from 203.0.113.5 port 54321 ssh2']

# IP 192.168.1.10 Logs

In [18]:
df[(df['client_ip'] == '192.168.1.10') & (df['log_type'] == 'cron')]

Unnamed: 0,timestamp,log_type,client_ip,event,date,time
13,2025-03-29 11:35:00+00:00,cron,192.168.1.10,CRON job added by attacker,2025-03-29,11:35:00
22,2025-03-29 12:47:00+00:00,cron,192.168.1.10,CRON job started: wget malicious.sh,2025-03-29,12:47:00
23,2025-03-29 10:00:00+00:00,cron,192.168.1.10,CRON job started: wget malicious.sh,2025-03-29,10:00:00
54,2025-03-29 10:02:00+00:00,cron,192.168.1.10,CRON job added by attacker,2025-03-29,10:02:00
57,2025-03-29 11:29:00+00:00,cron,192.168.1.10,CRON executed reverse_shell.py,2025-03-29,11:29:00
72,2025-03-29 10:37:00+00:00,cron,192.168.1.10,CRON job started: wget malicious.sh,2025-03-29,10:37:00


# IP 10.0.0.8 Logs

In [19]:
df[(df['client_ip'] == '10.0.0.8') & (df['log_type'] == 'cron')]

Unnamed: 0,timestamp,log_type,client_ip,event,date,time
26,2025-03-29 10:59:00+00:00,cron,10.0.0.8,CRON job started: wget malicious.sh,2025-03-29,10:59:00
36,2025-03-29 10:14:00+00:00,cron,10.0.0.8,CRON job added by attacker,2025-03-29,10:14:00
43,2025-03-29 10:15:00+00:00,cron,10.0.0.8,CRON executed reverse_shell.py,2025-03-29,10:15:00
52,2025-03-29 11:35:00+00:00,cron,10.0.0.8,CRON executed reverse_shell.py,2025-03-29,11:35:00
55,2025-03-29 12:45:00+00:00,cron,10.0.0.8,CRON job added by attacker,2025-03-29,12:45:00
60,2025-03-29 12:12:00+00:00,cron,10.0.0.8,CRON job added by attacker,2025-03-29,12:12:00
76,2025-03-29 10:05:00+00:00,cron,10.0.0.8,CRON job started: wget malicious.sh,2025-03-29,10:05:00
