In [1]:
import numpy as np
import pandas as pd
import os
import dash
from dash import dash_table
from dash import dcc, html
import plotly.express as px
from datetime import datetime, timedelta
import plotly.figure_factory as ff

In [2]:
from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import (
    DateRange,
    Dimension,
    Metric,
    RunReportRequest,
    OrderBy,
    CohortSpec,
    Cohort,
    CohortsRange,
)

In [3]:
## Set up global variables

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = 'service_account.json'
property_id = '468166589'

client = BetaAnalyticsDataClient()

In [4]:
request = RunReportRequest(
        property='properties/'+property_id,
    dimensions=[
        Dimension(name="sessionSource")       
    ],
    metrics=[Metric(name="activeUsers"),
            Metric(name="newUsers"),
             Metric(name="bounceRate"),
             Metric(name="averageSessionDuration")
            ],  
    date_ranges=[DateRange(start_date="2025-01-01" , end_date="today")]
)

# Fetch the response
response1 = client.run_report(request)

In [5]:
data = []
for row in response1.rows:  
    data.append({
        'Source': row.dimension_values[0].value,
        'Active Users': int(row.metric_values[0].value),
        'New Users': int(row.metric_values[1].value),
        'Bounce Rate': float(row.metric_values[2].value),
        'Avg Session Duration (s)': float(row.metric_values[3].value)
    })

topsources = pd.DataFrame(data) 
topsources

Unnamed: 0,Source,Active Users,New Users,Bounce Rate,Avg Session Duration (s)
0,google,66094,60366,0.139083,3515.82052
1,(not set),6950,893,1.0,6341.877914
2,google-play,5893,5396,0.142129,3153.871281
3,(direct),5753,1662,0.177894,1626.371407


In [6]:
request = RunReportRequest(
        property='properties/'+property_id,
    dimensions=[
        Dimension(name="country"),
        Dimension(name="deviceCategory")
    ],
    metrics=[Metric(name="totalUsers"),
            ],  
     order_bys=[OrderBy(metric=OrderBy.MetricOrderBy(metric_name="totalUsers") , desc=True)],
    date_ranges=[DateRange(start_date="2025-01-01" , end_date="today")],
)

# Fetch the response
response2 = client.run_report(request)

In [7]:
data = []
for row in response2.rows:  
    data.append({
        'Country': row.dimension_values[0].value,
        'Device': row.dimension_values[1].value,
        'Total Users': int(row.metric_values[0].value),
    })

topcountries = pd.DataFrame(data) 
topcountries

Unnamed: 0,Country,Device,Total Users
0,Senegal,mobile,4220
1,Nigeria,mobile,4089
2,Côte d’Ivoire,mobile,3997
3,Türkiye,mobile,2959
4,Tanzania,mobile,2638
...,...,...,...
301,Taiwan,tablet,1
302,Trinidad & Tobago,tablet,1
303,Turkmenistan,tablet,1
304,Uzbekistan,tablet,1


In [8]:
# Define cohort start date
start_date = datetime(2025, 1, 1)  # First cohort starts Jan 1, 2025
num_weeks = 6  # Number of weekly cohorts

# Generate weekly cohorts dynamically
cohorts = []
for i in range(num_weeks):
    cohort_start = start_date + timedelta(weeks=i)
    cohort_end = cohort_start + timedelta(days=6)  # One full week
    cohorts.append(
        Cohort(
            name=f"Cohort_{i+1}",  # Name cohort as Cohort_1, Cohort_2, ...
            dimension="firstSessionDate",
            date_range=DateRange(start_date=cohort_start.strftime("%Y-%m-%d"), 
                                 end_date=cohort_end.strftime("%Y-%m-%d"))
        )
    )

# Define the cohort specification
cohort_spec = CohortSpec(
    cohorts=cohorts,
    cohorts_range=CohortsRange(granularity="WEEKLY", start_offset=0, end_offset=6)  # Retention over 6 weeks
)


# Define the request
request = RunReportRequest(
        property='properties/'+property_id,
    dimensions=[
        Dimension(name="cohort"),         # Weekly cohort grouping
        Dimension(name="cohortNthWeek")   # Number of weeks since first session
    ],
    metrics=[Metric(name="cohortActiveUsers")],  # Retained active users per week
    order_bys=[OrderBy(dimension=OrderBy.DimensionOrderBy(dimension_name="cohort"))],
    cohort_spec=cohort_spec
)

# Fetch the response
response = client.run_report(request)

In [9]:
import matplotlib.pyplot as plt
import seaborn as sns
# Process response into a DataFrame
data = []
for row in response.rows:
    cohort_label = row.dimension_values[0].value  # Weekly cohort label (Week_1, Week_2, etc.)
    week_number = int(row.dimension_values[1].value)  # Week since signup
    active_users = int(row.metric_values[0].value)  # Active users retained

    data.append([cohort_label, week_number, active_users])

# Create a DataFrame
retention = pd.DataFrame(data, columns=["cohort", "week", "active_users"])

# Get cohort size (week 0 users)
cohort_sizes = retention[retention["week"] == 0].set_index("cohort")["active_users"]

# Merge cohort sizes to calculate retention percentage
retention = retention.merge(cohort_sizes.rename("cohort_size"), on="cohort")

# Calculate retention percentage
retention["retention_percentage"] = (retention["active_users"] / retention["cohort_size"]) * 100
#retention
# Pivot for heatmap
#retention_pivot = retention.pivot(index="cohort", columns="week", values="retention_percentage").fillna(0).round(1)

# Pivot the table for heatmap (weeks as columns)
retention_pivot_users = retention.pivot(index="cohort", columns="week", values="active_users").fillna(0).astype(int)
retention_pivot_percentage = retention.pivot(index="cohort", columns="week", values="retention_percentage").fillna(0)

# **Reverse Cohort Order**
retention_pivot_users = retention_pivot_users[::-1]
retention_pivot_percentage = retention_pivot_percentage[::-1]

annotations = np.array([
    [f"{users} ({pct:.1f}%)" for users, pct in zip(row_users, row_pct)]
    for row_users, row_pct in zip(retention_pivot_users.values, retention_pivot_percentage.values)
])

# Create combined annotations (Active Users + Retention %)
#annotations = retention_pivot_users.astype(str) + " (" + retention_pivot_percentage.round(1).astype(str) + "%)"
#retention
# 📊 Plot the Heatmap
#plt.figure(figsize=(14, 6))
#ax = sns.heatmap(retention_pivot_percentage, annot=annotations.values, fmt="", cmap="Blues", linewidths=0.5)

# Labels and title
#plt.title("Cohort Retention Heatmap (Users & %)")
#plt.xlabel("Weeks Since Signup")
#plt.ylabel("Cohort (Signup Week)")

# ✅ Save and Show Plot
#plot_file = "Cohort_retention_heatmap.png"
#plt.savefig(plot_file, dpi=300, bbox_inches="tight")
#plt.show()

In [10]:
request = RunReportRequest(
        property='properties/'+property_id,
    dimensions=[
        Dimension(name="country")
    ],
    metrics=[Metric(name="totalPurchasers"),
            ],  
     order_bys=[OrderBy(metric=OrderBy.MetricOrderBy(metric_name="totalPurchasers") , desc=True)],
    date_ranges=[DateRange(start_date="2025-01-01" , end_date="today")],
)

# Fetch the response
response2 = client.run_report(request)

In [11]:
data = []
for row in response2.rows:  
    data.append({
        'Country': row.dimension_values[0].value,
        'Total Purchasers': int(row.metric_values[0].value),
    })

purchasers = pd.DataFrame(data) 
purchasers

Unnamed: 0,Country,Total Purchasers
0,Nigeria,34
1,India,13
2,Senegal,13
3,Pakistan,12
4,Türkiye,11
5,Saudi Arabia,10
6,Tanzania,10
7,Kenya,9
8,Israel,6
9,Malaysia,6


In [12]:
import dash
import dash_bootstrap_components as dbc
from dash import dcc
from dash import html
import plotly.express as px
import plotly.figure_factory as ff
from datetime import timedelta

# Sample Data (Replace with your actual data)
total_active_users = topsources["Active Users"].sum()
total_new_users = topsources["New Users"].sum()
total_purchasers = purchasers["Total Purchasers"].sum()
avg_bounce_rate = topsources["Bounce Rate"].mean()
avg_session_duration = topsources["Avg Session Duration (s)"].mean()
purchasers_pivot = purchasers.pivot_table(index="Country", values="Total Purchasers", aggfunc="sum")

# Create Graphs
fig_pie = px.pie(topsources, names="Source", values="Active Users", title="Traffic Sources Distribution")
fig_bar = px.bar(topsources, x="Source", y=["Active Users", "New Users"], barmode="group", title="Active vs New Users per Source")
fig_donut = px.pie(topcountries, names="Device", values="Total Users", title="Total Users by Device", hole=0.4)
fig_countries = px.bar(topcountries.nlargest(10, "Total Users"), x="Country", y="Total Users", color="Country", title="Top Countries by Active Users")
fig_retention_trends = px.line(
    retention, x="week", y="retention_percentage", color="cohort",
    title="Weekly Retention Trends", markers=True
)
fig_heatmap = ff.create_annotated_heatmap(
    z=retention_pivot_percentage.values,
    x=list(retention_pivot_percentage.columns),
    y=list(retention_pivot_percentage.index),
    annotation_text=annotations,
    colorscale="Blues",
    showscale=True
).update_layout(title="User Retention Cohort Analysis")
fig_purchasers_heatmap = ff.create_annotated_heatmap(
    z=purchasers_pivot.values,
    x=["Total Purchasers"],  
    y=purchasers_pivot.index.tolist(),
    colorscale="Reds",
    showscale=True
).update_layout(title="Total Purchasers by Country Heatmap", margin=dict(l=100, r=20, t=50, b=50))


# Dash App Setup
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = dbc.Container([
    html.H1("GA4 Analytics Dashboard", className="text-center mb-4"),
    
    # KPI Cards Section
    dbc.Row([
        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Active Users", className="card-title"),
                html.H2(f"{total_active_users:,}", className="text-primary")
            ])
        ], className="shadow-lg rounded-4"), width=2),
        
        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("New Users", className="card-title"),
                html.H2(f"{total_new_users:,}", className="text-success")
            ])
        ], className="shadow-lg rounded-4"), width=2),

        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Purcahsers", className="card-title"),
                html.H2(f"{total_purchasers:,}", className="text-primary")
            ])
        ], className="shadow-lg rounded-4"), width=2),
        
        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Avg Bounce Rate", className="card-title"),
                html.H2(f"{avg_bounce_rate:.2f}%", className="text-danger")
            ])
        ], className="shadow-lg rounded-4"), width=3),
        
        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Avg Session Duration", className="card-title"),
                html.H2(f"{avg_session_duration:.2f} sec", className="text-warning")
            ])
        ], className="shadow-lg rounded-4"), width=3),
    ], className="mb-4"),
    
    # Charts Section
    dbc.Row([
        dbc.Col(dbc.Card(dcc.Graph(figure=fig_pie), className="shadow-lg p-3 rounded-4"), width=6),
        dbc.Col(dbc.Card(dcc.Graph(figure=fig_bar), className="shadow-lg p-3 rounded-4"), width=6),
    ], className="mb-4"),

    dbc.Row([
    dbc.Col(dbc.Card(dcc.Graph(figure=fig_purchasers_heatmap), className="shadow-lg p-3 rounded-4"), width=12),
], className="mb-4"),

    dbc.Row([
        dbc.Col(dbc.Card(dcc.Graph(figure=fig_countries),className="shadow-lg p-3 rounded-4"), width=12)
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col(dbc.Card(dcc.Graph(figure=fig_donut), className="shadow-lg p-3 rounded-4"), width=6),
        dbc.Col(dbc.Card(dcc.Graph(figure=fig_retention_trends), className="shadow-lg p-3 rounded-4"), width=6),
    ], className="mb-4"),
    
    dbc.Row([
        dbc.Col(dbc.Card(dcc.Graph(figure=fig_heatmap), className="shadow-lg p-3 rounded-4"), width=12),
        
    ]),
], fluid=True)

# Run app
if __name__ == '__main__':
    app.run_server(debug=True)