# Analyze the UFW log

## Part 1
a and b are example lines from the ufw log file. the main difference is the whitespace in the cpu uptime field

In [2]:
a = """Aug  1 00:00:55 jupiter kernel: [156505.664718] [UFW BLOCK] IN=wlp6s0 OUT= MAC=01:00:5e:00:00:fb:00:80:92:9c:02:06:08:00 SRC=192.168.1.3 DST=224.0.0.251 LEN=32 TOS=0x0C PREC=0x20 TTL=1 ID=31272 PROTO=2 
"""
b = """Aug 12 04:07:15 jupiter kernel: [   87.722287] [UFW BLOCK] IN=wlp6s0 OUT= MAC=2c:fd:a1:60:e5:d5:00:80:92:9c:02:06:08:00 SRC=192.168.1.3 DST=192.168.1.10 LEN=1285 TOS=0x00 PREC=0x00 TTL=64 ID=19681 PROTO=UDP SPT=3702 DPT=36175 LEN=1265 """

In [3]:
import re

In [4]:
p = re.compile(r"""(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+
        (\d{1,2})\s+ 
        (\d{2}:\d{2}:\d{2})\s+  # timestamp
        (jupiter|otherhost)\s+  # hostname
        kernel:\s+ 
        (\[\s*\d+.\d+])\s+         # cputime
        \[UFW\s(BLOCK|ALLOW|LOG)\]\s+           # ufw block or allow
        (.+)                    # rest of line
        """, re.VERBOSE)
m = p.match(a)

In [5]:
m.groups()

('Aug',
 '1',
 '00:00:55',
 'jupiter',
 '[156505.664718]',
 'BLOCK',
 'IN=wlp6s0 OUT= MAC=01:00:5e:00:00:fb:00:80:92:9c:02:06:08:00 SRC=192.168.1.3 DST=224.0.0.251 LEN=32 TOS=0x0C PREC=0x20 TTL=1 ID=31272 PROTO=2 ')

In [6]:
m2 = p.match(b)
m2.groups()

('Aug',
 '12',
 '04:07:15',
 'jupiter',
 '[   87.722287]',
 'BLOCK',
 'IN=wlp6s0 OUT= MAC=2c:fd:a1:60:e5:d5:00:80:92:9c:02:06:08:00 SRC=192.168.1.3 DST=192.168.1.10 LEN=1285 TOS=0x00 PREC=0x00 TTL=64 ID=19681 PROTO=UDP SPT=3702 DPT=36175 LEN=1265 ')

Simpler regex that splits the line into 3 parts: 
1. the timestamp text
2. block/allow/log indicator
3. the rest of the log entry with the packet details

In [23]:
p2 = re.compile(r"""
        (.+)
        \[UFW\s(BLOCK|ALLOW|LOG)\]\s
        (.+)
        """, re.VERBOSE)
n = p2.match(a)
n2 = p2.match(b)

In [27]:
n.groups()

('Aug  1 00:00:55 jupiter kernel: [156505.664718] ',
 'BLOCK',
 'IN=wlp6s0 OUT= MAC=01:00:5e:00:00:fb:00:80:92:9c:02:06:08:00 SRC=192.168.1.3 DST=224.0.0.251 LEN=32 TOS=0x0C PREC=0x20 TTL=1 ID=31272 PROTO=2 ')

In [26]:
n2.groups()

('Aug 12 04:07:15 jupiter kernel: [   87.722287] ',
 'BLOCK',
 'IN=wlp6s0 OUT= MAC=2c:fd:a1:60:e5:d5:00:80:92:9c:02:06:08:00 SRC=192.168.1.3 DST=192.168.1.10 LEN=1285 TOS=0x00 PREC=0x00 TTL=64 ID=19681 PROTO=UDP SPT=3702 DPT=36175 LEN=1265 ')

In [9]:
print('There are ', len(n.groups()), ' items in n')

There are  3  items in n


In [31]:
i = 1
for item in n.groups():
    print(f'item "{i}" is "{n.group(i)}"')
    i += 1

item "1" is "Aug  1 00:00:55 jupiter kernel: [156505.664718] "
item "2" is "BLOCK"
item "3" is "IN=wlp6s0 OUT= MAC=01:00:5e:00:00:fb:00:80:92:9c:02:06:08:00 SRC=192.168.1.3 DST=224.0.0.251 LEN=32 TOS=0x0C PREC=0x20 TTL=1 ID=31272 PROTO=2 "


In [11]:
date_re = re.compile(r"""
                    (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) # month
                    \s+
                    (\d+) # day
                    \s+
                    (\d{2}:\d{2}:\d{2}) # time
                    \s+
                    (\w+) # hostname
                    \s+
                    \w+\: # message source (discard)
                    \s+
                    (\[\s*\d+.\d+\]) # cpu uptime
                    \s*.*
                    """, flags=re.VERBOSE)


In [32]:
result = date_re.match(n.group(1))

# there should be 5 fields, month, day, time, hostname, cpu uptime
# if result in null, the regex didn't match

if result and len(result.groups()) == 5 : 
    timestamp = ''
    print('result is not none')
    i = 1
    for x in result.groups(): 
        if i != 4: # we don't want the hostname
            timestamp += '{} '.format(x)
        i += 1
else:
    print('result is none')

print(f'*{timestamp}*')

result is not none
*Aug 1 00:00:55 [156505.664718] *


## Part 2

Now get into the details of what was blocked. Fields are generally defined as key=value, separated by whitespace. There are also flags listed on lines that do not have a key=value format. 

Start with a split on whitespace

In [13]:
c = """Aug  1 00:02:51 jupiter kernel: [156621.587697] [UFW BLOCK] IN=wlp6s0 OUT= MAC=01:00:5e:00:00:01:6c:b0:ce:be:5e:33:08:00 SRC=0.0.0.0 DST=224.0.0.1 LEN=32 TOS=0x00 PREC=0xC0 TTL=1 ID=0 DF PROTO=2"""

In [14]:
m3 = p.match(c) 

In [15]:
m3

<re.Match object; span=(0, 194), match='Aug  1 00:02:51 jupiter kernel: [156621.587697] [>

In [16]:
m3.groups()

('Aug',
 '1',
 '00:02:51',
 'jupiter',
 '[156621.587697]',
 'BLOCK',
 'IN=wlp6s0 OUT= MAC=01:00:5e:00:00:01:6c:b0:ce:be:5e:33:08:00 SRC=0.0.0.0 DST=224.0.0.1 LEN=32 TOS=0x00 PREC=0xC0 TTL=1 ID=0 DF PROTO=2')

In [17]:
m3.group(7)

'IN=wlp6s0 OUT= MAC=01:00:5e:00:00:01:6c:b0:ce:be:5e:33:08:00 SRC=0.0.0.0 DST=224.0.0.1 LEN=32 TOS=0x00 PREC=0xC0 TTL=1 ID=0 DF PROTO=2'

In [18]:
m3.group(7).split()

['IN=wlp6s0',
 'OUT=',
 'MAC=01:00:5e:00:00:01:6c:b0:ce:be:5e:33:08:00',
 'SRC=0.0.0.0',
 'DST=224.0.0.1',
 'LEN=32',
 'TOS=0x00',
 'PREC=0xC0',
 'TTL=1',
 'ID=0',
 'DF',
 'PROTO=2']

### Breakdown:
There are 3 different patterns to the values in the rest of the line: 
1. KEY=value
2. KEY=(nothing)
3. FLAG


In [19]:
for x in m3.group(7).split(): 
    
    if re.match('\w+=.+', x):  # best case - a key and value pair
        print('type 1 entry, a key/value pair: ', x)
    elif re.match('\w+=', x): # don't store these since there's no value
        print('type 2 entry, a key without a value: ', x)
    elif re.match('\w+', x): # concatenate these into a flags field
        print('type 3 entry, a flags entry: ', x)
    else: 
        print('no match: ', x)
        

type 1 entry, a key/value pair:  IN=wlp6s0
type 2 entry, a key without a value:  OUT=
type 1 entry, a key/value pair:  MAC=01:00:5e:00:00:01:6c:b0:ce:be:5e:33:08:00
type 1 entry, a key/value pair:  SRC=0.0.0.0
type 1 entry, a key/value pair:  DST=224.0.0.1
type 1 entry, a key/value pair:  LEN=32
type 1 entry, a key/value pair:  TOS=0x00
type 1 entry, a key/value pair:  PREC=0xC0
type 1 entry, a key/value pair:  TTL=1
type 1 entry, a key/value pair:  ID=0
type 3 entry, a flags entry:  DF
type 1 entry, a key/value pair:  PROTO=2
