Setup globals

In [1]:
import datetime
import pandas as pd
import sqlite3
import time

day_of_seconds = 86400
week_of_seconds = 7 * day_of_seconds
month_of_seconds = 30 * day_of_seconds

today = int(time.mktime(datetime.date.today().timetuple()))

The following cell values are safe to modify as needed

In [2]:
sqlite_file = 'RaMBLE.sqlite'
max_rows = 500

# Typically only looking back the past week
wayback = today-week_of_seconds

Pull in database

In [3]:
pd.set_option('display.max_rows', max_rows)
db = sqlite3.connect(sqlite_file)
new_devices = pd.read_sql_query("SELECT * from devices WHERE first_seen >= %d" % (wayback), db)
old_devices = pd.read_sql_query("SELECT * from devices WHERE first_seen < %d" % (wayback), db)

The database has two important tables: `devices` and `locations`, but this notebook will only be focusing on `devices`.

devices
* id (int)
* address (string: OUI)
* adv_flags (float)
* device_name (string)
* device_type (string)
* first_seen (int)
* last_seen (int)
* msd_key (float, Manufacturer Data: ID)
* msd_field (hex string, Manufacturer Data: Data)
* service_uuids (string uuid)
* service_data (string hex data)
* days_seen (int)
* raw_adv_data (bitstring)

locations
* id (int)
* device_id (int)
* timestamp (int)
* rssi (int)
* tx_power (float)
* latitude (float)
* logitude (float)
* accuracy (float)
* packets_received (int)

# New Devices of Interest

In [19]:
old_names = old_devices['device_name'].dropna(how='any').unique()

# Find devices with device types never seen before
new_names = new_devices.copy()
for old_name in old_names:
    new_names = new_names[new_names.device_name != old_name]

# format count table
name_count = new_names.device_name.value_counts()
name_count.name = ''
name_count = pd.DataFrame(name_count).sort_index()
name_count.index.names = ["device_name"]

device_report = list()

# grab contextual data on devices and add them to our device report
for new_name in name_count.index:
    new_device = new_devices[new_devices.device_name == new_name]
    
    # Add devices to our report list
    for device in new_device.index:
        device_report.append(new_device.loc[device, ["device_name", "device_type", "address", "service_uuids"]].values)

# Find devices with device types never seen before

old_types = old_devices['device_type'].dropna(how='any').sort_values().unique()
new_types = new_devices.copy()

for old_type in list(old_types):
    new_types = new_types[new_types.device_type != old_type]

# format count table
type_count = new_types.device_type.value_counts()
type_count.name = ''
type_count = pd.DataFrame(type_count).sort_index()
type_count.index.names = ["device_type"]

# Add any of these devices to our device list if we haven't already

for new_type in type_count.index:
    new_device = new_devices[new_devices.device_type == new_type]
    
    for device in new_device.index:
        # Check to see if they would have been reported for their name
        if new_device.loc[device, "device_name"] in name_count.index:
            continue
        
        # They haven't been reported before, add them
        device_report.append(new_device.loc[device, ["device_name", "device_type", "address", "service_uuids"]].values)

# Find devices with Service UUIDs never seen before

old_uuids = old_devices['service_uuids'].dropna(how='any').unique()

new_uuids = new_devices.copy()
for old_uuid in old_uuids:
    new_uuids = new_uuids[new_uuids.service_uuids != old_uuid]

# format count table
uuid_count = new_uuids.service_uuids.value_counts()
uuid_count.name = ''
uuid_count = pd.DataFrame(uuid_count).sort_index()
uuid_count.index.names = ["service_uuids"]

# Add any of these devices to our device list if we haven't already

for new_uuid in uuid_count.index:
    new_device = new_devices[new_devices.service_uuids == new_uuid]
    
    for device in new_device.index:
        # Check to see if they would have been reported for their name
        if new_device.loc[device, "device_name"] in name_count.index:
            continue
        
        # Check to see if they would have been reported for their type
        if new_device.loc[device, "device_type"] in type_count.index:
            continue
        
        # They haven't been reported before, add them
        device_report.append(new_device.loc[device, ["device_name", "device_type", "address", "service_uuids"]].values)

# Turn our list into a dataframe
if len(device_report) > 0:
    device_report = pd.DataFrame(device_report)
    device_report.columns = ["device_name", "device_type", "address", "service_uuids"]
else:
    device_report = pd.DataFrame(columns = ["device_name", "device_type", "address", "service_uuids"])
    
# Dump report to csv
device_report.to_csv('interesting_devices.csv')

In [20]:
print(f"{old_devices.id.count()} devices from {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(old_devices.iloc[0].first_seen))} to {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(old_devices.iloc[-1].first_seen))}")
print(f"{new_devices.id.count()} devices from {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(new_devices.iloc[0].first_seen))} to {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(new_devices.iloc[-1].first_seen))}")

11310 devices from 2018-11-20 15:12:37 to 2020-09-01 22:37:15
391 devices from 2020-09-03 08:42:01 to 2020-09-05 13:31:39


In [21]:
name_count

device_name,Unnamed: 1
ALAM (24:F7:2C)�,1
BLE_92EB,1
CCJB618380306,1
DV7200-4.0-382C,1
Dispatch TV,1
Forerunner 25�,1
G-519C9C,1
Galaxy Watch (656E) LE,1
LCIRemoteNwBKfKvbV,1
LG SH4(72),1


In [22]:
type_count

device_type,Unnamed: 1
CSR,1
"Device Information, Fitbit",1
Eddystone URL beacon - http://mrefer.com/4153232615,1
Exposure Notification,1
"Fitbit Charge, Device Information, Fitbit",1
Lippert Components,1


In [23]:
uuid_count

service_uuids,Unnamed: 1
00000000-0200-a58e-e411-afe28044e62c,1
47726f74-6547-4c4f-5353-414253000000,1
fd6f,1


In [24]:
device_report.sort_values(by="address")

Unnamed: 0,device_name,device_type,address,service_uuids
6,G-519C9C,,00:05:C2:51:9C:9C,47726f74-6547-4c4f-5353-414253000000
9,LG SH4(72),LG Electronics,08:EF:3B:D9:D9:72,
15,,Exposure Notification,35:BE:AB:BF:94:69,fd6f
11,VM-207-BLE,Human Interface Device,71:72:59:16:C4:C8,1812
4,Dispatch TV,Google,74:21:CB:E3:6D:62,fea0
10,QHM-0CAE,,AE:0B:00:00:0C:AE,
0,ALAM (24:F7:2C)�,,B8:3A:9D:24:F7:2D,
3,DV7200-4.0-382C,,C0:42:47:03:98:2C,a032
12,,CSR,C1:E9:B6:00:1C:DB,
14,,Eddystone URL beacon - http://mrefer.com/41532...,C7:4E:77:81:86:0F,feaa


## New Vendor Information

In [10]:
old_ouis = list()
new_ouis = list()
new_addresses = list()

for old_address in old_devices['address']:
    oui_octets = old_address.split(':')[:3]
    
    # check to see if this is a global oui
    if int(oui_octets[0], 16) & 0x2 == 0:
        #turn off the multicast bit
        oui_octets[0] = '{:02x}'.format(int(oui_octets[0], 16) & 0xfe)
        old_ouis.append(':'.join(oui_octets))

for new_address in new_devices['address']:
    oui_octets = new_address.split(':')[:3]
    
    # check to see if this is a global oui
    if int(oui_octets[0], 16) & 0x3 > 0:
        continue
    
    #turn off the multicast bit
    oui_octets[0] = '{:02x}'.format(int(oui_octets[0], 16) & 0xfe)
    new_oui = ':'.join(oui_octets)
    
    if new_oui not in old_ouis:
        new_ouis.append(new_oui)
        new_addresses.append(new_address)

device_report = list()

for new_address in new_addresses:
    new_device = new_devices[new_devices.address == new_address]
    
    for device in new_device.index:
        device_report.append(new_device.loc[device, ["device_name", "device_type", "address", "msd_key"]].values)

device_oui_report = pd.DataFrame(device_report)
device_oui_report.columns = ["device_name", "device_type", "address", "Manufacturer ID (raw)"]

In [11]:
pd.DataFrame(pd.Series(new_ouis).value_counts())

Unnamed: 0,0
20:EA:CE,1
6c:A6:57,1
60:D1:FD,1
44:0D:33,1
70:B5:39,1
40:23:43,1
4c:12:67,1
58:BC:11,1
44:72:D4,1
30:5B:BE,1


In [12]:
device_oui_report.sort_values(by="address")

Unnamed: 0,device_name,device_type,address,Manufacturer ID (raw)
28,G-519C9C,,00:05:C2:51:9C:9C,
66,,Microsoft,00:26:9D:67:3A:1F,6.0
2,,Microsoft,08:B3:D8:50:91:21,6.0
52,,Microsoft,0C:5E:4E:7A:0E:FB,6.0
67,,Microsoft,0C:E7:C9:BE:65:02,6.0
48,,Microsoft,10:C0:D2:68:7D:EA,6.0
3,,Microsoft,18:2D:37:5B:4B:DD,6.0
31,,,18:EB:77:A2:61:71,19456.0
4,,Microsoft,20:EA:CE:0E:72:CC,6.0
7,,Microsoft,24:9E:D3:4C:C9:60,6.0


## New Manufacturer ID's

In [13]:
old_msds = old_devices['msd_key'].dropna(how='any').sort_values().unique()
new_msds = new_devices.copy()

for old_msd in list(old_msds):
    new_msds = new_msds[new_msds.msd_key != old_msd]

msd_table = pd.DataFrame(new_msds.msd_key.value_counts())

# Convert msd_key into hex
msd_counts = list()

for msd_key in msd_table.index:
    msd_counts.append(['0x{:02x}'.format(int(float(msd_key))),msd_key, msd_table.loc[msd_key, "msd_key"]])

msd_counts = pd.DataFrame(msd_counts)
msd_counts.columns = ["Manufacturer ID (hex)", "Manufacturer ID (raw)", "Count"]

device_report = list()

for new_msd in msd_counts["Manufacturer ID (raw)"]:
    new_device = new_devices[new_devices.msd_key == new_msd]
    
    for device in new_device.index:
        device_report.append(new_device.loc[device, ["device_name", "device_type", "address", "msd_key"]].values)

device_msd_report = pd.DataFrame(device_report)
device_msd_report.columns = ["device_name", "device_type", "address", "Manufacturer ID (raw)"]

In [14]:
msd_counts

Unnamed: 0,Manufacturer ID (hex),Manufacturer ID (raw),Count
0,0x5c7,1479.0,1


In [15]:
device_msd_report.sort_values(by="address")

Unnamed: 0,device_name,device_type,address,Manufacturer ID (raw)
0,LCIRemoteNwBKfKvbV,Lippert Components,F3:FE:46:53:CA:8F,1479.0


# Historical Data

In [16]:
total_devices = pd.read_sql_query("SELECT * from devices", db)

device_names = pd.DataFrame(total_devices.device_name.value_counts()).sort_index()
device_names.columns = ["Count"]
device_names.to_csv('device_names.csv', index_label='Device Name')
device_names.index.names = ["Top Device Names"]

device_types = pd.DataFrame(total_devices.device_type.value_counts()).sort_index()
device_types.columns = ["Count"]
device_types.to_csv('device_types.csv', index_label='Device Type')
device_types.index.names = ["Top Device Types"]

print(f"Exported {device_names.count()[0]} device names to `device_names.csv`")
print(f"Exported {device_types.count()[0]} device names to `device_types.csv`")

Exported 537 device names to `device_names.csv`
Exported 122 device names to `device_types.csv`


## Count of all Device Names

In [17]:
device_names.sort_values(by="Count", ascending=False).head()

Unnamed: 0_level_0,Count
Top Device Names,Unnamed: 1_level_1
Tile,104
Charge 2,32
Versa,23
H,16
Apple Pencil�������������,15


## Count of all Device Types

In [18]:
device_types.sort_values(by="Count", ascending=False).head()

Unnamed: 0_level_0,Count
Top Device Types,Unnamed: 1_level_1
"Apple, Nearby",6043
Microsoft,1716
iBeacon,1120
"Apple, Handoff",575
Google,348
