Setup globals

In [1]:
import datetime
import gmaps
import gmaps.datasets
import math
import numpy
import os.path
import pandas as pd
import sqlite3
import time
from IPython.display import display, Markdown

day_of_seconds = 86400
week_of_seconds = 7 * day_of_seconds
month_of_seconds = 30 * day_of_seconds

today = int(time.mktime(datetime.date.today().timetuple()))

The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed',)).History will not be written to the database.


The following cell values are safe to modify as needed

In [2]:
sqlite_file = 'RaMBLE.sqlite'
gmaps_apikey_filename = 'gmaps_api.key'
max_rows = 500

# Used for filtering out devices that have similar names but add a serial code or personally identifying information
prefix_length = 4
suffix_length = -5

# Howfar back to look
# Typically only looking back the past week, but the number can be easily incrememented to look at previous weeks
wayback = week_of_seconds * 1

# Duration
timespan = week_of_seconds

Pull in database

In [3]:
pd.set_option('display.max_rows', max_rows)
db = sqlite3.connect(sqlite_file)
device_table = pd.read_sql_query("SELECT * from devices", db)
lastest_activity = device_table.last_seen.max()

device_range = device_table[device_table.first_seen < lastest_activity-wayback+timespan] 
new_devices = device_range[device_range.first_seen >= lastest_activity-wayback]
old_devices = device_range[device_range.first_seen < lastest_activity-wayback]

The database has two important tables: `devices` and `locations`

devices
* id (int)
* address (string: OUI)
* adv_flags (float)
* device_name (string)
* device_type (string)
* first_seen (int)
* last_seen (int)
* msd_key (float, Manufacturer Data: ID)
* msd_field (hex string, Manufacturer Data: Data)
* service_uuids (string uuid)
* service_data (string hex data)
* days_seen (int)
* raw_adv_data (bitstring)

locations
* id (int)
* device_id (int)
* timestamp (int)
* rssi (int)
* tx_power (float)
* latitude (float)
* logitude (float)
* accuracy (float)
* packets_received (int)

In [4]:
oui_table = pd.read_csv('data/nmap-mac-prefixes', sep='\t', error_bad_lines=False, names=["oui", "vendor"])
cid_table = pd.read_csv('data/cid.csv',  header=0)
msd_table = pd.read_csv('data/bt_msdid.csv')

def grab_org(address, msd):
    oui_octets = address.split(':')[:3]
    mac_type = 'cid'
    org_name = None
    mfg_name = None
    
    # turn off the multicast bit
    oui_octets[0] = '{:02x}'.format(int(oui_octets[0], 16) & 0xfe)
    oui = ''.join(oui_octets).upper()
    
    # check to see if this is a global oui
    if int(oui_octets[0], 16) & 0x2 == 0:
        mac_type = 'oui'
        org_name = oui_table[oui_table.oui == oui].vendor.values
    else:
        org_name = cid_table[cid_table.Assignment == oui]['Organization Name'].values
        
    if len(org_name) > 0:
        org_name = org_name[0]
    else:
        org_name = mac_type+'/'+':'.join(oui_octets)
        
    if math.isnan(msd) is False:
        mfg_name = msd_table[msd_table.Decimal == int(msd)].Company.values
    
        if len(mfg_name) > 0:
            mfg_name = mfg_name[0]
        else:
            mfg_name = int(msd)

    return org_name, mfg_name

def device_data(device_id):
    data = new_device.loc[device_id, extract_columns].values.tolist()

    org_data = grab_org(data[2], data[5])

    days_seen = data[4]
    data = data[:-2]
    data.extend(org_data)
    data.append(days_seen)
    
    return data

In [5]:
old_names = old_devices['device_name'].dropna(how='any').unique()

# Find devices with device types never seen before
new_names = new_devices.copy()
new_names = new_names.dropna(subset=['device_name'])

for old_name in old_names:
    new_names = new_names[new_names.device_name != old_name]

bad_names = list()

# Find devices with very similar names to old devices that have the same services and manufacturer
for new_name_row in range(new_names.id.count()):
    new_device = new_names.iloc[new_name_row]
    new_name = new_device.device_name
    new_uuids = new_device.service_uuids
    new_msd = new_device.msd_key
    
    if new_uuids is None:
        old_uuid_devices = old_devices[old_devices.service_uuids.isna()]
    else:
        old_uuid_devices = old_devices[old_devices.service_uuids == new_uuids]

    if old_uuid_devices.id.count() == 0:
        continue
    
    if numpy.isnan(new_msd):
        old_msd_devices = old_uuid_devices[numpy.isnan(old_uuid_devices.msd_key)]
    else:
        old_msd_devices = old_uuid_devices[old_uuid_devices.msd_key == new_msd]
    
    if old_msd_devices.id.count() == 0:
        continue
    
    for row_num in range(old_msd_devices.id.count()):
        old_name = old_msd_devices.iloc[row_num].device_name
        prefix_length = 4
        suffix_length = -5
        
        if old_name is not None:
            if len(new_name) == len(old_name):
                if new_name[:4] == old_name[:4] or new_name[-5:] == old_name[-5:]:
                    bad_names.append(new_name)
                    break

for bad_name in bad_names:
    new_names = new_names[new_names.device_name != bad_name]

# format count table
name_count = new_names.device_name.value_counts()
name_count.name = ''
name_count = pd.DataFrame(name_count).sort_index()
name_count.index.names = ["device_name"]

extract_columns = ["device_name", "device_type", "address", "service_uuids", "days_seen", "msd_key"]
context_columns = ["device_name", "device_type", "address", "service_uuids", "org name", "manufacturer", "days_seen"]

device_report = list()

# grab contextual data on devices and add them to our device report
for new_name in name_count.index:
    new_device = new_devices[new_devices.device_name == new_name]
    
    # Add devices to our report list
    for device_id in new_device.index:
        device_report.append(device_data(device_id))

In [6]:
# Find devices with device types never seen before

old_types = old_devices['device_type'].dropna(how='any').sort_values().unique()
new_types = new_devices.copy()

for old_type in list(old_types):
    new_types = new_types[new_types.device_type != old_type]

# format count table
type_count = new_types.device_type.value_counts()
type_count.name = ''
type_count = pd.DataFrame(type_count).sort_index()
type_count.index.names = ["device_type"]

# Add any of these devices to our device list if we haven't already

for new_type in type_count.index:
    new_device = new_devices[new_devices.device_type == new_type]
    
    for device_id in new_device.index:
        # Check to see if they would have been reported for their name
        if new_device.loc[device_id, "device_name"] in name_count.index:
            continue
        
        # They haven't been reported before, add them
        device_report.append(device_data(device_id))

In [7]:
# Find devices with Service UUIDs never seen before

old_uuids = old_devices['service_uuids'].dropna(how='any').unique()

new_uuids = new_devices.copy()
for old_uuid in old_uuids:
    new_uuids = new_uuids[new_uuids.service_uuids != old_uuid]

# format count table
uuid_count = new_uuids.service_uuids.value_counts()
uuid_count.name = ''
uuid_count = pd.DataFrame(uuid_count).sort_index()
uuid_count.index.names = ["service_uuids"]

# Add any of these devices to our device list if we haven't already

for new_uuid in uuid_count.index:
    new_device = new_devices[new_devices.service_uuids == new_uuid]
    
    for device_id in new_device.index:
        # Check to see if they would have been reported for their name
        if new_device.loc[device_id, "device_name"] in name_count.index:
            continue
        
        # Check to see if they would have been reported for their type
        if new_device.loc[device_id, "device_type"] in type_count.index:
            continue
        
        # They haven't been reported before, add them
        device_report.append(device_data(device_id))

In [8]:
# Turn our list into a dataframe
if len(device_report) > 0:
    device_report = pd.DataFrame(device_report)
    device_report.columns = context_columns
else:
    device_report = pd.DataFrame(columns = context_columns)

In [9]:
# Dump report to csv
device_report.to_csv('interesting_devices.csv')

In [10]:
display(Markdown("# New Devices of Interest"))
display(Markdown(f"{old_devices.id.count()} devices from {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(old_devices.iloc[0].first_seen))} to {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(old_devices.iloc[-1].first_seen))}"))
display(Markdown(f"{new_devices.id.count()} devices from {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(new_devices.iloc[0].first_seen))} to {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(new_devices.iloc[-1].first_seen))}"))

# New Devices of Interest

43531 devices from 2018-11-20 15:12:37 to 2020-12-26 12:06:22

2232 devices from 2020-12-26 17:32:46 to 2021-01-02 16:07:02

In [11]:
name_count

device_name,Unnamed: 1
9454935E17D0FB50100,1
Elmo,1
ID216,1
Jabra Elite Active 65t,1
LA1-0007-AA54,1
LE-Sparkle Storm,1
LE-maximusxvi,1
S. Elias Ross,1
Sb11008f9072cd95dC,1


In [12]:
type_count

device_type,Unnamed: 1
Axon Body Camera,1
"Fitbit Charge HR, Device Information, Fitbit",1


In [13]:
uuid_count

service_uuids,Unnamed: 1
0003cbbb-0000-1000-8000-00805f9bfff0,1
"180f,1814,180a",1
4a9eeee4-160c-4e38-9693-aa52db9b004e,1
dab91435-b5a1-e29c-b041-bcd562613bdf,1


In [14]:
if os.path.exists(gmaps_apikey_filename):
    gmaps_api = ''
    
    with open(gmaps_apikey_filename, 'r') as file:
        gmaps_api = file.read().replace('\n', '')
    
    gmaps.configure(api_key=gmaps_api)
    
    locations = pd.read_sql_query("SELECT * from locations", db)
    stationary_devices = device_report[device_report['days_seen'] > 1]
    print("Devices only seen one day")
    display(device_report[device_report['days_seen'] == 1].sort_values(by="address")[context_columns[:-1]])
    
    print("Devices seen multiple days")
    
    for x in range(len(stationary_devices.index)):
        display(stationary_devices.iloc[[x]])
        
        device_ids = new_devices[new_devices.address == stationary_devices.iloc[[x]].address.values[0]].id
        device_locations = locations[locations['device_id'] == device_ids.values[0]][['latitude', 'longitude']]
        device_rssi = locations[locations['device_id'] == device_ids.values[0]][['rssi']]
        
        #display(device_rssi.dtypes)
        weights = list()
        
        for weight in device_rssi.values:
            weights.append(weight+101)

        fig = gmaps.figure()
        fig.add_layer(gmaps.heatmap_layer(device_locations))
        display(fig)
else:
    print()
    display(Markdown("**Location mapping not enabled** To enable heatmaps of stationary devices, save your Google Maps Javascript SDK API Key in the file `gmaps_api.key`"))
    display(device_report.sort_values(by="address"))




**Location mapping not enabled** To enable heatmaps of stationary devices, save your Google Maps Javascript SDK API Key in the file `gmaps_api.key`

Unnamed: 0,device_name,device_type,address,service_uuids,org name,manufacturer,days_seen
9,,Axon Body Camera,00:25:DF:22:12:CD,9ec5d2b8-8f51-4dea-9cd3-f3dea220b5e0,Private,,1
11,BLEDIM,,00:A0:50:9A:E4:55,0003cbbb-0000-1000-8000-00805f9bfff0,Cypress Semiconductor,,1
7,S. Elias Ross,"Apple, Nearby",48:CA:DB:29:2F:47,,oui/48:CA:DB,"Apple, Inc.",1
5,LE-Sparkle Storm,Bose,4C:87:5D:22:94:5E,febe,Bose,2561,1
6,LE-maximusxvi,Bose,60:AB:D2:19:BC:2B,febe,Bose,2561,1
3,Jabra Elite Active 65t,"Battery Service, Running Speed and Cadence",74:5C:4B:CD:C2:C7,"180f,1814,180a",GN Audio,,1
12,,,7E:2C:22:13:54:35,4a9eeee4-160c-4e38-9693-aa52db9b004e,cid/7e:2C:22,,1
0,9454935E17D0FB50100,Battery Service,94:54:93:5E:17:D0,180f,Rigado,,1
8,Sb11008f9072cd95dC,iBeacon,98:93:D8:78:B6:99,1122,oui/98:93:D8,"Apple, Inc.",1
4,LA1-0007-AA54,CSR,C1:C2:0C:2E:83:4B,fef1,oui/c0:C2:0C,,1
