# Analyze the usage of the Tool

In [None]:
%pip install -q pandas matplotlib seaborn

In [98]:
import sqlite3
import pandas as pd             #db querys with visualization
import matplotlib.pyplot as plt # diagrams
import seaborn as sns           # Heatmaps etc.

## Open Database Connection

In [43]:
# check that the path is fitting to your config file
# in theory we could also use the config.py to load this information (maybe next version)
connection = sqlite3.connect("analytics.db")
cursor = connection.cursor()

## Analyze sources of the Queries

In [None]:
# # Basics for reading the data (useful for export, save to csv or ...)
# cursor.execute("select * from tblSessions")
# rows = cursor.fetchall()
# rows

First read all Data into the Memory and analyze it then. It's more performant then adHoc Queries but AdHoc might be better if the Server is running in parallel.

In [None]:
query = "select * from tblSessions"
df_sessions = pd.read_sql_query(query, connection)
df_sessions.head()

Filter by date if it is useful for your analysis (optional!)

In [None]:
# Year-Month-Day Hours:Minutes:Seconds
start_date = "2025-01-01 00:00:00"
end_date = "2025-01-23 18:25:00"
query = f"select * from tblSessions where Timestamp between '{start_date}' and '{end_date}'"
df_sessions = pd.read_sql_query(query, connection)
df_sessions.head()

Enhancement of the data with calculated values used later

In [92]:
# Instaed of Timestamp which is a String we can use a real datetime object for filtering
df_sessions['Datetime'] = pd.to_datetime(df_sessions["Timestamp"])
# For analysis it is interesting which day of week has the entry
df_sessions['Day'] = df_sessions["Datetime"].dt.day_name()

### Where does the Users comes from

Sample of AdHoc Query for the same content as blow.
Use it only if you need special data or rrelations or if your amount of data is very big

In [None]:
query = "select Continent, Country, City, count(1) as SessionCount from tblSessions group by Continent, Country, City"
df_where = pd.read_sql_query(query, connection)
df_where.head()

Analytics with in memory data

In [None]:
df_grouped_location = df_sessions.groupby(["Continent", "Country", "City"]).size().reset_index(name="SessionCount")
df_grouped_location.head()

In [None]:
#Group by Countries
df_country = df_grouped_location.groupby("Country")["SessionCount"].sum().reset_index()
df_country = df_country.sort_values(by="SessionCount", ascending=False)
plt.bar(df_country["Country"], df_country["SessionCount"], color="skyblue")
plt.xlabel="Country"
plt.ylabel="Number of Sessions"
plt.title("Number of Sessions by Country")
# optimization depending on your data
#plt.figure(figsize=(10,6))
#plt.xticks(rotation=90)
plt.show()

Select a country to get more details about the cities

In [None]:
selected_country = "The Netherlands"
df_citys = df_grouped_location[df_grouped_location["Country"]==selected_country]
# sort by count
df_citys = df_citys.sort_values(by="SessionCount", ascending=False)
plt.bar(df_citys["City"], df_citys["SessionCount"])
plt.xlabel="City"
plt.ylabel="Number of Sessions"
plt.title(f"Numer of Sessions by City in {selected_country}")
plt.show()

### Zeitliche Verteilung der User

In [None]:
query = """
select strftime('%Y-%m-%d', Timestamp) as Date, Count(*) as SessionCount
from tblSessions
Group by Date
Order by Date
"""

df_time_distribution = pd.read_sql_query(query, connection)
df_time_distribution.head()

Day of Week and Time

In [None]:
df_time_distribution = df_sessions
df_time_distribution['Hour'] = df_time_distribution["Datetime"].dt.hour
df_time_distribution = df_sessions.groupby(["Day", "Hour"]).size().reset_index(name="SessionCount")
df_time_distribution.head()

In [None]:
heatmap = df_time_distribution.pivot_table(index="Day", columns="Hour", values="SessionCount", aggfunc="sum", fill_value=0)
ordered_days= ["Monday", "Tuesday","Wednesday","Thursday", "Friday","Saturday","Sunday"]
heatmap = heatmap.reindex(ordered_days)
sns.heatmap(heatmap, cmap="coolwarm", annot=True, fmt="g")
plt.title("Heatmap of Sessions by Hour and Weekday")
plt.xlabel = "Hour of Day"
plt.ylabel = "Day of Week"
plt.tight_layout()
plt.show()

### Browser, Languages und Operating Systems

In [None]:
query = """
select client, Count(*) as SessionCount
from tblSessions
Group by client
Order by SessionCount DESC
"""

df_clients = pd.read_sql_query(query, connection)
df_clients.head()