In [None]:
from scapy.all import *
from hashlib import md5
import arrow
import pandas as pd
from parse_rssi import parse_rssi

In [None]:
import glob
capture_files = glob.glob("captured_packets/*.cap")

In [None]:
import re

def parse_station_id(file_name):
    p = re.compile('(?:[0-9a-fA-F]:?){12}')
    test_str = file_name
    return re.findall(p, test_str)[0]

## Get packets from all available collects
### Probe requests are management frames of type 0x00 with subtype 0x04

In [None]:
collects = []
for capture_file in capture_files:
    pcap_reader = PcapReader(capture_file)
    packets = []
    for pkt in pcap_reader:
        try:
            if (pkt.type == 0 and pkt.subtype == 0x04):
                packets.append(pkt)
        except:
            pass
    collects.append((parse_station_id(capture_file), packets))

# Print out the start and end times of the collected packages

In [None]:
for collect in collects:
    packets = collect[1]
    pkt_start = packets[1]
    pkt_end = packets[-1]
    
    print(f'Station: {collect[0]}')
    print(f'Start: {arrow.get(pkt_start.time)}')
    print(f'End  : {arrow.get(pkt_end.time)}\n')

## Find the MAC address from each packet, it's associated RSSI (signal strength) and the time of capture

In [None]:
for num, collect in enumerate(collects):
    packets = collect[1]
    near = [((pkt.addr2), arrow.get(pkt.time), parse_rssi(pkt)) for pkt in packets]
    collects[num] = collect + (near,)

## Save the semi-processed data on a CSV

In [None]:
### TODO: Use a DataFrame for this instead

import csv
with open('parsed_packet_data.csv','w') as f:
    wr = csv.writer(f, dialect='excel')
    wr.writerows([('MAC', 'time', 'RSSI', 'station')])
    
    # Write one collect at a time
    for collect in collects:
        # Iterate through the proximity data of the current collect
        for near in collect[2]:
            # Add the station (ID) data to each proximity reading
            near = near + (collect[0],)
            wr.writerow(near)

## Prepare the data as a Pandas Dataframe, so we can group and filter it

In [None]:
proximity_table = []
for collect in collects:
    # Iterate through the proximity data of the current collect
    for near in collect[2]:
        # Add the station (ID) data to each proximity reading
        near = near + (collect[0],)
        proximity_table.append(list(near))

proximity_table = pd.DataFrame(proximity_table, columns=['MAC', 'time', 'RSSI', 'station']).sort_values('time')

## Define a proximity threshold: any signal stronger than it will be considered a person close to the station

In [None]:
proximity_threshold = 96
close_ones = proximity_table[proximity_table['RSSI'] > proximity_threshold]
print(f"Found {len(close_ones.groupby(['station', 'MAC']))} diferent devices near the wi-fi collect stations.")

In [None]:
movements = pd.DataFrame([], columns=['personal_token', 'time', 'origin', 'destination'])

for close_one in close_ones.itertuples(index=False):
    # Find this same MAC in another station in the future
    current_mac = close_one[0]
    current_time = close_one[1]
    current_station = close_one[3]
    
    movement_filter = (close_ones['MAC'] == current_mac) \
                       & (close_ones['station'] != current_station) \
                       & (close_ones['time'] > current_time)
    
    movement_found = close_ones[movement_filter]
    
    if not movement_found.empty:
        movements = movements.append({
                'personal_token': current_mac, 
                'time': movement_found.iloc[0]['time'], 
                'origin': current_station, 
                'destination': movement_found.iloc[0]['station']
            }, ignore_index=True)

In [None]:
### TODO: Remove duplicated movements

## Save the data about movements in a CSV

In [None]:
movements.to_csv('movements.csv')