# analysis of /var/log/ufw.log 

data collected April 6, 2020 to April 25, 2020 on a DigitalOcean droplet running Ubuntu 18.04



In [1]:
import pandas
import re # https://docs.python.org/3/library/re.html
from collections import Counter
import ipaddress # https://docs.python.org/3/library/ipaddress.html
import time
import sys

In [2]:
with open('ufw.log') as f:
    lines = f.readlines()

In [5]:
len(lines)

9517

In [3]:
lines[0]

'Apr  6 18:58:09 django-s-1vcpu-1gb-sfo2-01 kernel: [   21.950901] [UFW BLOCK] IN=eth0 OUT= MAC=42:27:1c:b8:86:9e:30:7c:5e:91:9c:30:08:00 SRC=104.155.153.206 DST=138.68.10.91 LEN=48 TOS=0x00 PREC=0x00 TTL=120 ID=2506 DF PROTO=TCP SPT=51695 DPT=3128 WINDOW=8192 RES=0x00 SYN URGP=0 \n'

## convert list of lines into a list of dicts suitable for a Pandas dataframe

In [48]:
list_of_dicts = []
for indx, line in enumerate(lines):
    line_as_dict = {}

    line = line.replace('\x00','') # there was a bad character on `lines[6615]`

    line_as_dict['date time'] = line[0:15]
    line_as_list = line[16:].strip().split(' ')
    
    while("" in line_as_list): 
        line_as_list.remove("") 

    line_as_dict['server name'] = line_as_list[0].strip()
    line_as_dict['service name'] = line_as_list[1].replace(':','')
    if line_as_list[2]=='[':
        line_as_dict['elapsed time'] = line_as_list[3].replace(']','')
        line_as_dict['message'] = ' '.join(line_as_list[4:])
    else:
        line_as_dict['elapsed time'] = line_as_list[2].replace('[','').replace(']','')
        line_as_dict['message'] = ' '.join(line_as_list[3:])
    if '[UFW BLOCK]' in line_as_dict['message']:
        line_as_dict['msg type'] = '[UFW BLOCK]'
    elif '[UFW LIMIT BLOCK]' in line_as_dict['message']:
        line_as_dict['msg type'] = '[UFW LIMIT BLOCK]'
    else:
        print('unmatched msg type detected:',line_as_dict['message'])
    if ' IN=' in line_as_dict['message']:
        for entry in line_as_dict['message'].split(' '):
            if 'IN=' in entry:
                line_as_dict['IN'] = entry.replace('IN=','')
            if 'MAC=' in entry:
                line_as_dict['MAC'] = entry.replace('MAC=','')
            if 'SRC=' in entry:
                line_as_dict['SRC'] = entry.replace('SRC=','')
            if 'DST=' in entry:
                line_as_dict['DST'] = entry.replace('DST=','')
                
    list_of_dicts.append(line_as_dict)

In [49]:
df = pandas.DataFrame(list_of_dicts)
df.shape

(9517, 10)

In [50]:
# https://strftime.org/
df['datetime'] = pandas.to_datetime('2020 ' + df['date time'], format='%Y %b %d %H:%M:%S')
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop.html
df.drop('date time', inplace=True, axis=1)

In [51]:
df['message'][0]

'[UFW BLOCK] IN=eth0 OUT= MAC=42:27:1c:b8:86:9e:30:7c:5e:91:9c:30:08:00 SRC=104.155.153.206 DST=138.68.10.91 LEN=48 TOS=0x00 PREC=0x00 TTL=120 ID=2506 DF PROTO=TCP SPT=51695 DPT=3128 WINDOW=8192 RES=0x00 SYN URGP=0'

In [52]:
df.head()

Unnamed: 0,DST,IN,MAC,SRC,elapsed time,message,msg type,server name,service name,datetime
0,138.68.10.91,eth0,42:27:1c:b8:86:9e:30:7c:5e:91:9c:30:08:00,104.155.153.206,21.950901,[UFW BLOCK] IN=eth0 OUT= MAC=42:27:1c:b8:86:9e...,[UFW BLOCK],django-s-1vcpu-1gb-sfo2-01,kernel,2020-04-06 18:58:09
1,138.68.10.91,eth0,42:27:1c:b8:86:9e:30:7c:5e:93:1c:70:08:00,71.234.0.96,22.096127,[UFW BLOCK] IN=eth0 OUT= MAC=42:27:1c:b8:86:9e...,[UFW BLOCK],django-s-1vcpu-1gb-sfo2-01,kernel,2020-04-06 18:58:09
2,138.68.10.91,eth0,42:27:1c:b8:86:9e:30:7c:5e:91:9c:30:08:00,159.69.34.106,22.104043,[UFW BLOCK] IN=eth0 OUT= MAC=42:27:1c:b8:86:9e...,[UFW BLOCK],django-s-1vcpu-1gb-sfo2-01,kernel,2020-04-06 18:58:09
3,138.68.10.91,eth0,42:27:1c:b8:86:9e:30:7c:5e:93:1c:70:08:00,159.69.34.106,22.170431,[UFW BLOCK] IN=eth0 OUT= MAC=42:27:1c:b8:86:9e...,[UFW BLOCK],django-s-1vcpu-1gb-sfo2-01,kernel,2020-04-06 18:58:09
4,138.68.10.91,eth0,42:27:1c:b8:86:9e:30:7c:5e:93:1c:70:08:00,159.69.34.106,22.174797,[UFW BLOCK] IN=eth0 OUT= MAC=42:27:1c:b8:86:9e...,[UFW BLOCK],django-s-1vcpu-1gb-sfo2-01,kernel,2020-04-06 18:58:09


In [54]:
df['DST'].value_counts()

167.172.250.37    7984
138.68.10.91      1533
Name: DST, dtype: int64

In [56]:
df['SRC'].value_counts().head(20)

213.166.71.30      2115
185.176.27.246      270
104.155.153.206     186
45.231.154.1        169
45.231.152.1        152
104.211.140.12      108
134.209.151.197     102
194.26.29.210        93
194.26.29.114        90
194.127.179.142      88
194.26.29.213        86
212.32.229.34        81
194.26.29.212        79
89.248.167.141       63
78.128.113.42        61
87.251.74.60         60
185.156.73.65        57
149.56.254.151       56
13.80.131.151        55
185.176.27.86        46
Name: SRC, dtype: int64