In [1]:
import os
import dotenv
dotenv.load_dotenv(".env.local")

from supabase import create_client, Client
import time
from copy import deepcopy

import re
import json
from urllib.request import urlopen
import requests
import pandas as pd
import plotly.express as px
px.set_mapbox_access_token(os.environ["MAPBOX_TOKEN"])

In [2]:
client = create_client("https://wvklzyidkkrskdgquocj.supabase.co", os.environ["SUPABASE_KEY"])
response = client.table('visits').select("*").execute()

2024-02-28 18:32:22,203:INFO - HTTP Request: GET https://wvklzyidkkrskdgquocj.supabase.co/rest/v1/visits?select=%2A "HTTP/1.1 200 OK"


In [3]:
visits = deepcopy(response.data)

visits_by_visitor = {}
for visit in visits:
    visitor = visit["ip_address"]
    # Convert time string to timestamp (in seconds) (original format 2024-02-27T04:33:45.574632+00:00)
    visit["created_at"] = time.mktime(time.strptime(visit["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z"))
    if visitor not in visits_by_visitor:
        visits_by_visitor[visitor] = []
    visits_by_visitor[visitor].append(visit)

for visitor, visits in visits_by_visitor.items():
    visits_by_visitor[visitor] = sorted(visits, key=lambda x: x["created_at"])

sessions_by_visitor = {}
for visitor, visits in visits_by_visitor.items():
    sessions = []
    current_session = []
    for visit in visits:
        if len(current_session) == 0:
            current_session.append(visit)
            continue
        last_visit = current_session[-1]
        if visit["created_at"] - last_visit["created_at"] > 60 * 10:
            sessions.append(current_session)
            current_session = []
        current_session.append(visit)
    if len(current_session) > 0:
        sessions.append(current_session)
    sessions_by_visitor[visitor] = sessions

In [4]:
for key in sessions_by_visitor:
    print(key, ":", len(sessions_by_visitor[key]))

121.0.0.1 : 2
64.23.187.48 : 1
172.69.23.187 : 1
76.99.220.36 : 3
172.69.22.42 : 1
172.69.23.220 : 1
140.99.83.129 : 1
192.71.10.105 : 1
89.248.174.42 : 1
178.254.12.183 : 1
203.2.64.59 : 1
72.13.46.5 : 1
34.235.141.218 : 1
114.119.134.208 : 1
159.223.154.247 : 1
179.33.147.80 : 1
104.28.79.177 : 1
108.174.2.214 : 1
195.191.219.131 : 1
139.190.224.16 : 1
104.234.133.157 : 1
45.94.31.129 : 1
17.241.219.109 : 1
66.249.79.134 : 2
66.249.79.132 : 3
35.195.93.98 : 1
3.224.220.101 : 1
23.22.35.162 : 1
216.244.66.228 : 1
212.227.216.232 : 2
212.227.216.160 : 1
212.227.216.240 : 1
212.227.216.200 : 1
212.227.216.168 : 1
212.227.216.192 : 1
212.227.216.136 : 1
212.227.216.208 : 1
212.227.216.224 : 1
212.227.216.176 : 1
66.249.79.133 : 1
43.163.3.58 : 1
47.128.58.54 : 1


In [27]:
response = urlopen("https://ipinfo.io/108.174.2.214/json")
response = json.load(response)
# location_data = {
#     "city": response.get("city"),
#     "region": response.get("region"),
#     "country": response.get("country_name")
# }
response

{'ip': '108.174.2.214',
 'hostname': '108-174-2-214.fwd.linkedin.com',
 'city': 'Centreville',
 'region': 'Virginia',
 'country': 'US',
 'loc': '38.8404,-77.4289',
 'org': 'AS55163 LinkedIn Corporation',
 'postal': '20120',
 'timezone': 'America/New_York',
 'readme': 'https://ipinfo.io/missingauth'}

In [5]:
session_df = {
    "visitor": [],
    "start_time": [],
    "end_time": [],
    "city": [],
    "region": [],
    "country": [],
    "lat": [],
    "lon": []
}
for visitor, sessions in sessions_by_visitor.items():
    for session in sessions:
        ip_info = urlopen(f"https://ipinfo.io/{visitor}/json")
        ip_info = json.load(ip_info)
        location_data = {
            "city": ip_info.get("city"),
            "region": ip_info.get("region"),
            "country": ip_info.get("country_name"),
            "lat": float(ip_info.get("loc").split(",")[0]),
            "lon": float(ip_info.get("loc").split(",")[1]),
        }
        session_df["visitor"].append(visitor)
        session_df["start_time"].append(session[0]["created_at"])
        session_df["end_time"].append(session[-1]["created_at"])
        session_df["city"].append(location_data["city"])
        session_df["region"].append(location_data["region"])
        session_df["country"].append(location_data["country"])
        session_df["lat"].append(location_data["lat"])
        session_df["lon"].append(location_data["lon"])

session_df = pd.DataFrame(session_df)

In [6]:
fig = px.scatter_mapbox(
    session_df, 
    lat="lat", 
    lon="lon", 
    color="end_time", 
    text="city", 
    size=[25] * len(session_df),
    center={"lat": session_df.lat.median(), "lon": session_df.lon.median()},
    zoom=1
)
fig.show()