In [33]:
# aggregated_analytics.ipynb

import sys
sys.path.append('..')  # add parent directory to system path
import json
import pandas as pd
import plotly.express as px

from scripts.fetch_data_plausible import fetch_data_aggregate_many, fetch_data_timeseries_many

# Define the site ids
site_ids = ['apps.sismo.io','demo.apps.sismo.io' ,'sismo.io', 'build.sismo.io', 'case-studies.sismo.io','dev.vault-beta.sismo.io', 'vault-beta.sismo.io', 'docs.sismo.io', 'factory.sismo.io', 'resources.sismo.io']

# log the length of the site ids
print(f"Fetching data for {len(site_ids)} sites")

Fetching data for 10 sites


In [36]:
# Fetch the data
data = fetch_data_aggregate_many(site_ids)

# Show the data in a table
# [
#     {
#         "results": {
#             "pageviews": {
#                 "value": 19
#             },
#             "visitors": {
#                 "value": 18
#             },
#             "visits": {
#                 "value": 18
#             }
#         },
#         "site_id": "apps.sismo.io"
#     },
#     {
#         "results": {
#             "pageviews": {
#                 "value": 0
#             },
#             "visitors": {
#                 "value": 0
#             },
#             "visits": {
# ...
#         },
#         "site_id": "resources.sismo.io"
#     }
# ]

# Convert the data to a pandas dataframe
df = pd.DataFrame(data)

# Unpack the results into columns
df = pd.concat([df.drop(['results'], axis=1), df['results'].apply(pd.Series)], axis=1)
# Remove the {'value': } part of the data in pageviews, visitors and visits
df['pageviews'] = df['pageviews'].apply(lambda x: x['value'])
df['visitors'] = df['visitors'].apply(lambda x: x['value'])
df['visits'] = df['visits'].apply(lambda x: x['value'])

# Show the data in a table
df

# Plot the data use whole numbers for the y-axis
fig = px.bar(df, x='site_id', y='pageviews', title='Plausible Analytics pageviews last day')
fig.update_layout(yaxis_title='Count', autosize=False, width=900, height=500, xaxis_tickangle=-45)
fig.show()

In [35]:
# fetch timeseries data
data_timeseries = fetch_data_timeseries_many(site_ids)
data_timeseries

flat_data = []
for site_data in data_timeseries:
    site_id = site_data['site_id']
    for result in site_data['results']:
        result['site_id'] = site_id
        flat_data.append(result)

# Create a DataFrame
df = pd.DataFrame(flat_data)

# Ensure the date column is in datetime format
df['date'] = pd.to_datetime(df['date'])
df

# Pivot your DataFrame
pivot_df = df.pivot(index='date', columns='site_id', values='pageviews')

# Plot the data for all sites in a single plot, separate lines for each site
fig1 = px.line(pivot_df, title='Plausible Analytics pageviews last 30 days')
fig1.update_layout(yaxis_title='Count', autosize=False, width=900, height=500)
fig1.show()

# Plot a the data of total pageviews for all sites in a single plot
fig2 = px.area(pivot_df, title='Plausible Analytics pageviews last 30 days')
fig2.update_layout(yaxis_title='Count', autosize=False, width=900, height=500)
fig2.show()




