In [89]:
import pandas as pd
import polars as pl
import plotly.graph_objects as g
import numpy as np
from pygments.lexers import go

from data_proc import access_log_to_df

In [12]:
# load in dataframe from file
print("loading log 1...")
df = access_log_to_df("../access_logs/access_log")
print("done")
print("loading log 2...")
df = pl.concat([df, access_log_to_df("../access_logs/access_log-20250608")], how="vertical")
print("done")
print("loading log 3...")
df = pl.concat([df, access_log_to_df("../access_logs/access_log-20250608")], how="vertical")
print("done")
print("loading log 4...")
df = pl.concat([df, access_log_to_df("../access_logs/access_log-20250608")], how="vertical")
print("done")

# Get all unique dates
dates = set(df['date'])
ips = set(df['ip'])

loading log 1...
done
loading log 2...
done
loading log 3...
done
loading log 4...
done


In [53]:
# Count the # of times an IP has been accessed on a date
ip_access_by_date = {}
for date in dates:

    # Add new date to the dict
    if ip_access_by_date.get(date) is None:
        ip_access_by_date.update({date: {}})

    d = ip_access_by_date[date]

    entries = df.filter(pl.col("date") == date)

    for entry in entries[:, "ip"]:
        if entry not in d:
            d.update({entry: 0})

        d.update({entry: d[entry] + 1})

    ip_access_by_date.update({date: d})

In [90]:
# 1 chart for each date
for date in dates:
    access_df = pd.DataFrame.from_dict({"ips": ip_access_by_date[date].keys(),
                                        "accesses": ip_access_by_date[date].values()})
    access_df.sort_values(by=["accesses"], inplace=True)
    fig = g.Figure()
    fig.update_layout(yaxis_type="log")
    fig.add_trace(g.Scatter(x=access_df["ips"], y=access_df["accesses"], mode="lines"))
    fig.write_html(f"./access_by_date_plots/{date.replace("/", "-")}.html")


