# Analyze and Plot Traffic Logs produced by `iptraf`

In [19]:
import pandas as pd
from datetime import datetime

## Open and parse log file

In [3]:
log_file_path = "./iptraf_log.txt"

In [14]:
iptraf_df = pd.read_csv(log_file_path, 
                        sep=';',
                        header=0,  # Override first row
                        names=['timestamp', 'type', 'interface', 'num_bytes', 'src/dst'],
                        on_bad_lines='skip'
                       ).iloc[:-1] # Ignore last row

In [16]:
iptraf_df

Unnamed: 0,timestamp,type,interface,num_bytes,src/dst
0,Thu Nov 17 19:04:57 2022,UDP,h1-eth0,1278 bytes,from 10.0.0.1:54903 to 10.0.0.2:6121
1,Thu Nov 17 19:04:57 2022,UDP,h1-eth0,1278 bytes,from 10.0.0.2:6121 to 10.0.0.1:54903
2,Thu Nov 17 19:04:57 2022,UDP,h1-eth0,1278 bytes,from 10.0.0.1:54903 to 10.0.0.2:6121
3,Thu Nov 17 19:04:57 2022,UDP,h1-eth0,520 bytes,from 10.0.0.2:6121 to 10.0.0.1:54903
4,Thu Nov 17 19:04:57 2022,UDP,h1-eth0,193 bytes,from 10.0.0.1:54903 to 10.0.0.2:6121
...,...,...,...,...,...
9404,Thu Nov 17 19:05:03 2022,UDP,h1-eth0,1278 bytes,from 10.0.0.2:6121 to 10.0.0.1:54903
9405,Thu Nov 17 19:05:03 2022,UDP,h1-eth0,310 bytes,from 10.0.0.2:6121 to 10.0.0.1:54903
9406,Thu Nov 17 19:05:03 2022,UDP,h1-eth0,54 bytes,from 10.0.0.2:6121 to 10.0.0.1:54903
9407,Thu Nov 17 19:05:03 2022,UDP,h1-eth0,62 bytes,from 10.0.0.1:54903 to 10.0.0.2:6121


## Preprocess

### Parse datetime

In [32]:
datetime_format = '%a %b %d %H:%M:%S %Y'

iptraf_datetime_df = iptraf_df.copy(deep=True)
iptraf_datetime_df['timestamp'] = iptraf_datetime_df['timestamp'].apply(lambda time_str: datetime.strptime(time_str, datetime_format))
iptraf_datetime_df

Unnamed: 0,timestamp,type,interface,num_bytes,src/dst
0,2022-11-17 19:04:57,UDP,h1-eth0,1278 bytes,from 10.0.0.1:54903 to 10.0.0.2:6121
1,2022-11-17 19:04:57,UDP,h1-eth0,1278 bytes,from 10.0.0.2:6121 to 10.0.0.1:54903
2,2022-11-17 19:04:57,UDP,h1-eth0,1278 bytes,from 10.0.0.1:54903 to 10.0.0.2:6121
3,2022-11-17 19:04:57,UDP,h1-eth0,520 bytes,from 10.0.0.2:6121 to 10.0.0.1:54903
4,2022-11-17 19:04:57,UDP,h1-eth0,193 bytes,from 10.0.0.1:54903 to 10.0.0.2:6121
...,...,...,...,...,...
9404,2022-11-17 19:05:03,UDP,h1-eth0,1278 bytes,from 10.0.0.2:6121 to 10.0.0.1:54903
9405,2022-11-17 19:05:03,UDP,h1-eth0,310 bytes,from 10.0.0.2:6121 to 10.0.0.1:54903
9406,2022-11-17 19:05:03,UDP,h1-eth0,54 bytes,from 10.0.0.2:6121 to 10.0.0.1:54903
9407,2022-11-17 19:05:03,UDP,h1-eth0,62 bytes,from 10.0.0.1:54903 to 10.0.0.2:6121


### Parse source and destination

In [34]:
test_srcdst = iptraf_datetime_df.iloc[0]['src/dst']
test_srcdst

' from 10.0.0.1:54903 to 10.0.0.2:6121'

In [36]:
test_srcdst.strip().split(' ')

['from', '10.0.0.1:54903', 'to', '10.0.0.2:6121']

In [47]:
def parse_src_dst(src_dst):
    fileds = src_dst.strip().split(' ')
    src, dst = fileds[1], fileds[3]
    src_ip, src_port = src.split(':')
    dst_ip, dst_port = dst.split(':')
    return pd.Series([src_ip, src_port, dst_ip, dst_port])

In [48]:
iptraf_datetime_df['src/dst'].apply(parse_src_dst)

Unnamed: 0,0,1,2,3
0,10.0.0.1,54903,10.0.0.2,6121
1,10.0.0.2,6121,10.0.0.1,54903
2,10.0.0.1,54903,10.0.0.2,6121
3,10.0.0.2,6121,10.0.0.1,54903
4,10.0.0.1,54903,10.0.0.2,6121
...,...,...,...,...
9404,10.0.0.2,6121,10.0.0.1,54903
9405,10.0.0.2,6121,10.0.0.1,54903
9406,10.0.0.2,6121,10.0.0.1,54903
9407,10.0.0.1,54903,10.0.0.2,6121


In [53]:
iptraf_srcdst_df = iptraf_datetime_df.copy(deep=True)
iptraf_srcdst_df = iptraf_datetime_df['src/dst'].apply(parse_src_dst)
iptraf_srcdst_df.columns = ['src_ip', 'src_port', 'dst_ip', 'dst_port']

iptraf_srcdst_df = pd.concat([iptraf_datetime_df.drop(labels='src/dst', axis=1),
                              iptraf_srcdst_df], axis=1)
iptraf_srcdst_df

Unnamed: 0,timestamp,type,interface,num_bytes,src_ip,src_port,dst_ip,dst_port
0,2022-11-17 19:04:57,UDP,h1-eth0,1278 bytes,10.0.0.1,54903,10.0.0.2,6121
1,2022-11-17 19:04:57,UDP,h1-eth0,1278 bytes,10.0.0.2,6121,10.0.0.1,54903
2,2022-11-17 19:04:57,UDP,h1-eth0,1278 bytes,10.0.0.1,54903,10.0.0.2,6121
3,2022-11-17 19:04:57,UDP,h1-eth0,520 bytes,10.0.0.2,6121,10.0.0.1,54903
4,2022-11-17 19:04:57,UDP,h1-eth0,193 bytes,10.0.0.1,54903,10.0.0.2,6121
...,...,...,...,...,...,...,...,...
9404,2022-11-17 19:05:03,UDP,h1-eth0,1278 bytes,10.0.0.2,6121,10.0.0.1,54903
9405,2022-11-17 19:05:03,UDP,h1-eth0,310 bytes,10.0.0.2,6121,10.0.0.1,54903
9406,2022-11-17 19:05:03,UDP,h1-eth0,54 bytes,10.0.0.2,6121,10.0.0.1,54903
9407,2022-11-17 19:05:03,UDP,h1-eth0,62 bytes,10.0.0.1,54903,10.0.0.2,6121


In [65]:
### Parse num bytes
iptraf_num_bytes_df = iptraf_srcdst_df.copy(deep=True)

iptraf_num_bytes_df['num_bytes'] = iptraf_num_bytes_df['num_bytes'].apply(
    lambda byte_str: int(byte_str.strip().split(' ')[0])
)
iptraf_num_bytes_df

Unnamed: 0,timestamp,type,interface,num_bytes,src_ip,src_port,dst_ip,dst_port
0,2022-11-17 19:04:57,UDP,h1-eth0,1278,10.0.0.1,54903,10.0.0.2,6121
1,2022-11-17 19:04:57,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903
2,2022-11-17 19:04:57,UDP,h1-eth0,1278,10.0.0.1,54903,10.0.0.2,6121
3,2022-11-17 19:04:57,UDP,h1-eth0,520,10.0.0.2,6121,10.0.0.1,54903
4,2022-11-17 19:04:57,UDP,h1-eth0,193,10.0.0.1,54903,10.0.0.2,6121
...,...,...,...,...,...,...,...,...
9404,2022-11-17 19:05:03,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903
9405,2022-11-17 19:05:03,UDP,h1-eth0,310,10.0.0.2,6121,10.0.0.1,54903
9406,2022-11-17 19:05:03,UDP,h1-eth0,54,10.0.0.2,6121,10.0.0.1,54903
9407,2022-11-17 19:05:03,UDP,h1-eth0,62,10.0.0.1,54903,10.0.0.2,6121


## Plot data of an example group

In [66]:
processed_df = iptraf_num_bytes_df

In [60]:
src_ip = "10.0.0.2"
src_port = "6121"
dst_ip = "10.0.0.1"
dst_port = "54903"

# dst_ip = "10.0.0.2"
# dst_port = "6121"
# src_ip = "10.0.0.1"
# src_port = "54903"

In [69]:
group_df = processed_df.loc[
    (iptraf_srcdst_df['src_ip'] == src_ip) &
    (iptraf_srcdst_df['src_port'] == src_port) &
    (iptraf_srcdst_df['dst_ip'] == dst_ip) &
    (iptraf_srcdst_df['dst_port'] == dst_port)
]

group_df

Unnamed: 0,timestamp,type,interface,num_bytes,src_ip,src_port,dst_ip,dst_port
1,2022-11-17 19:04:57,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903
3,2022-11-17 19:04:57,UDP,h1-eth0,520,10.0.0.2,6121,10.0.0.1,54903
5,2022-11-17 19:04:57,UDP,h1-eth0,484,10.0.0.2,6121,10.0.0.1,54903
6,2022-11-17 19:04:57,UDP,h1-eth0,115,10.0.0.2,6121,10.0.0.1,54903
9,2022-11-17 19:04:57,UDP,h1-eth0,1274,10.0.0.2,6121,10.0.0.1,54903
...,...,...,...,...,...,...,...,...
9401,2022-11-17 19:05:03,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903
9402,2022-11-17 19:05:03,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903
9404,2022-11-17 19:05:03,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903
9405,2022-11-17 19:05:03,UDP,h1-eth0,310,10.0.0.2,6121,10.0.0.1,54903


In [70]:
# Sort dataframe in order of time
group_sorted_df = group_df.sort_values(by='timestamp', axis=0)

group_sorted_df

Unnamed: 0,timestamp,type,interface,num_bytes,src_ip,src_port,dst_ip,dst_port
1,2022-11-17 19:04:57,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903
299,2022-11-17 19:04:57,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903
298,2022-11-17 19:04:57,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903
296,2022-11-17 19:04:57,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903
295,2022-11-17 19:04:57,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903
...,...,...,...,...,...,...,...,...
8540,2022-11-17 19:05:03,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903
8539,2022-11-17 19:05:03,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903
8538,2022-11-17 19:05:03,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903
8544,2022-11-17 19:05:03,UDP,h1-eth0,1278,10.0.0.2,6121,10.0.0.1,54903


In [73]:
# Groupby timestamp
group_sorted_df.groupby(by='timestamp').mean(numeric_only=True)

Unnamed: 0_level_0,num_bytes
timestamp,Unnamed: 1_level_1
2022-11-17 19:04:57,1269.955621
2022-11-17 19:04:58,1278.0
2022-11-17 19:04:59,1278.0
2022-11-17 19:05:00,1278.0
2022-11-17 19:05:01,1278.0
2022-11-17 19:05:02,1278.0
2022-11-17 19:05:03,1276.070423
