In [None]:
!unzip zenlove.zip

## Locate all location files

In [3]:
import os

In [4]:
zendata_filepaths = []
for dirname in os.listdir('./zenlove'):
  if dirname.startswith('.'):
    continue
  for filename in os.listdir('./zenlove/' + dirname):
    if filename.startswith('__MACOSX'):
      continue
    zendata_filepaths.append('./zenlove/' + dirname + '/' + filename)

zendata_filepaths[:3]

['./zenlove/01/2023-01-18.html',
 './zenlove/01/2023-01-08.html',
 './zenlove/01/2023-01-24.html']

## Extract all location data from HTMLs

In [5]:
from bs4 import BeautifulSoup, NavigableString

In [6]:
# parse any file to get the header data
soup = BeautifulSoup(open(zendata_filepaths[0]), 'html.parser')
header_row = soup.find_all("table")[0].find_all("thead")[0].find_all("tr")[1]

header_data = []
for r in header_row:
  if isinstance(r, NavigableString):
    continue
  header_data.append(r.get_text())

header_data[0]='Time'
header_data

['Time',
 'Latitude',
 'Longitude',
 'Altitude',
 'Bearing',
 'Speed',
 'Charging',
 'Level',
 'CC',
 'Name',
 'Foreground',
 'State',
 'Scanning',
 'Connected',
 'BSSID',
 'RSSI',
 'Band',
 'Channel']

In [None]:
geo_data = []

i = 0
for fp in zendata_filepaths:
  print(f'Parsing file {i}/{len(zendata_filepaths)}: {fp}')
  soup = BeautifulSoup(open(fp), 'html.parser')

  geo_rows = soup.find_all("table")[0].find_all("tbody")[0].find_all("tr")
  for r in geo_rows:
    if r is None or isinstance(r, NavigableString):
      continue

    tds = r.find_all("td")  
    row_data = list(map(lambda td: td.get_text(), tds))
    
    geo_data.append(row_data)
  i += 1

print(f'geo data rows: {len(geo_data)}')

## Map pandas df onto open street map

In [11]:
import plotly.express as px
import pandas as pd

In [12]:
df = pd.DataFrame(geo_data, columns=header_data) 
df.head()

Unnamed: 0,Time,Latitude,Longitude,Altitude,Bearing,Speed,Charging,Level,CC,Name,Foreground,State,Scanning,Connected,BSSID,RSSI,Band,Channel
0,05:03:26,42.880 ± 27.02 m,74.618 ± 16.15 m,748.278,-1.0,-1.0,No,15.0,--,--,No,,No,No,--,--,--,--
1,05:13:27,42.880 ± 27.02 m,74.618 ± 20.75 m,748.034,-1.0,-1.0,No,12.0,--,--,No,,No,No,--,--,--,--
2,05:23:58,42.880 ± 27.02 m,74.618 ± 20.43 m,748.34,-1.0,-1.0,No,10.0,--,--,No,,No,No,--,--,--,--
3,05:33:45,42.880 ± 27.02 m,74.618 ± 15.83 m,748.441,-1.0,-1.0,No,8.0,--,--,No,,No,No,--,--,--,--
4,05:43:29,42.880 ± 27.02 m,74.618 ± 20.16 m,748.417,-1.0,-1.0,No,6.0,--,--,No,,No,No,--,--,--,--


In [13]:
trunc_coordinates = lambda it: 0.0 if str(it).startswith('0.0 ') else float(str(it)[:6])
df['Latitude'] = df['Latitude'].apply(trunc_coordinates)
df['Longitude'] = df['Longitude'].apply(trunc_coordinates)
df.head()

Unnamed: 0,Time,Latitude,Longitude,Altitude,Bearing,Speed,Charging,Level,CC,Name,Foreground,State,Scanning,Connected,BSSID,RSSI,Band,Channel
0,05:03:26,42.88,74.618,748.278,-1.0,-1.0,No,15.0,--,--,No,,No,No,--,--,--,--
1,05:13:27,42.88,74.618,748.034,-1.0,-1.0,No,12.0,--,--,No,,No,No,--,--,--,--
2,05:23:58,42.88,74.618,748.34,-1.0,-1.0,No,10.0,--,--,No,,No,No,--,--,--,--
3,05:33:45,42.88,74.618,748.441,-1.0,-1.0,No,8.0,--,--,No,,No,No,--,--,--,--
4,05:43:29,42.88,74.618,748.417,-1.0,-1.0,No,6.0,--,--,No,,No,No,--,--,--,--


In [None]:
fig = px.scatter_mapbox(df, 
                       lat="Latitude", 
                       lon="Longitude", 
                       hover_name="Time", 
                       hover_data=["Time", "Speed"],
                       color_discrete_sequence=['red'],
                       zoom=8, 
                       height=800,
                       width=1000)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()