In [None]:
# Core libraries
import pandas as pd
import numpy as np


# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objects as go
from matplotlib.colors import LogNorm
import matplotlib.ticker as mticker
from matplotlib.cm import ScalarMappable
from matplotlib.colors import Normalize
from matplotlib.ticker import FuncFormatter
from matplotlib.colors import LinearSegmentedColormap, to_hex

In [None]:
# Setting custom color schema
custom_cmap = LinearSegmentedColormap.from_list("custom_cmap", ["steelblue", "white", "orange"])
custom_cmap2 = LinearSegmentedColormap.from_list("custom_cmap", ["lightsteelblue", "white", "orange"])

In [None]:
custom_scale = [
    [0.0, "steelblue"],
    [0.5, "white"],
    [1.0, "orange"]
]

In [None]:
def convert_to_datetime(df, columns, errors='coerce', format=None):
    for col in columns:
        if col in df.columns:
            df[col] = pd.to_datetime(df[col], errors=errors, format=format)
        else:
            print(f"Warning: Column '{col}' not found in DataFrame.")
    return df

## Trial goals

In [None]:
df_tg = pd.read_csv('trial_goals_mart.csv')
df_tg.columns = df_tg.columns.str.lower()
print("shape of the data:", df_tg.shape)
df_tg

In [None]:
df_tg.dtypes

In [None]:
df_tg = convert_to_datetime(df_tg, ['timestamp', 'trial_start', 'trial_start_real', 'trial_end', 'trial_week'])
print(df_tg.dtypes)
df_tg.head()

In [None]:
df_tg.isnull().sum()

In [None]:
funnel_counts = (
    df_tg
    .groupby(['activity_name', 'funnel_step'])['organization_id']
    .nunique()
    .reset_index(name='orgs')
)

funnel_counts = funnel_counts.sort_values('funnel_step')

first_step_count = funnel_counts.loc[funnel_counts['funnel_step'] == 1, 'orgs'].values[0]
funnel_counts['share'] = funnel_counts['orgs'] / first_step_count
funnel_counts['label'] = funnel_counts['orgs'].astype(str) + ' orgs (' + (funnel_counts['share'] * 100).round(1).astype(str) + '%)'

fig = px.funnel(
    funnel_counts,
    y='activity_name',
    x='orgs',
    labels={'orgs': 'Organizations', 'activity_name': 'Funnel Step'},
    title='Trial Goals Completed by Organizations'
)

fig.update_traces(marker=dict(color='steelblue'))

fig.update_layout(
    yaxis=dict(
        categoryorder='array',
        categoryarray=funnel_counts.sort_values('funnel_step')['activity_name'].tolist()
    )
)

for i, row in funnel_counts.iterrows():
    fig.add_annotation(
        x=row['orgs'],
        y=row['activity_name'],
        text=row['label'],
        showarrow=False,
        xanchor='left',
        yanchor='middle',
        font=dict(size=12, color='black')
    )

fig.show()

In [None]:
df = df_tg.copy()

df['trial_week'] = pd.to_datetime(df['trial_week']).dt.date

grouped = (
    df.groupby(['trial_week', 'activity_name', 'funnel_step'])['organization_id']
    .nunique()
    .reset_index(name='unique_org_count')
)

# Order activities by funnel_step
activity_order = grouped[['activity_name', 'funnel_step']].drop_duplicates().sort_values('funnel_step')

grouped['activity_name'] = pd.Categorical(
    grouped['activity_name'],
    categories=activity_order['activity_name'],
    ordered=True
)

# Pivot for heatmap
heatmap_data = grouped.pivot(index='activity_name', columns='trial_week', values='unique_org_count').fillna(0)

# Plotly heatmap expects a matrix (2D array)
fig = px.imshow(
    heatmap_data.values,
    labels=dict(x="Trial Start Week", y="Activity Name", color="Unique Organizations"),
    x=[str(d) for d in heatmap_data.columns],
    y=heatmap_data.index,
    aspect="auto",
    color_continuous_scale=['steelblue', 'white', 'orange'],  # your custom cmap colors
    text_auto=True
)

fig.update_layout(
    title='Trial goals completion',
    xaxis_tickangle=-45,
)

fig.show()

In [None]:
df = df_tg.copy()

df['days_since_trial_start'] = (df['timestamp'].dt.date - df['trial_start'].dt.date).dt.days
df = df[df['days_since_trial_start'] >= 0]

total_unique_orgs = df['organization_id'].nunique()

all_days = sorted(df['days_since_trial_start'].unique())
activities = df['activity_name'].unique()

results = []

for activity in activities:
    activity_df = df[df['activity_name'] == activity]
    orgs_seen = set()
    for day in all_days:
        # All orgs active on or before this day for this activity
        orgs_up_to_day = set(
            activity_df[activity_df['days_since_trial_start'] <= day]['organization_id'].unique()
        )
        cumulative_unique_count = len(orgs_up_to_day)
        share = cumulative_unique_count / total_unique_orgs
        results.append({
            'activity_name': activity,
            'days_since_trial_start': day,
            'cumulative_unique_orgs': cumulative_unique_count,
            'cumulative_share': share
        })

# Create DataFrame from results
cumulative_df = pd.DataFrame(results)


fig = px.line(
    cumulative_df,
    x='days_since_trial_start',
    y='cumulative_share',
    color='activity_name',
    title='Speed of reaching trial goals by days since trial start',
    labels={
        'days_since_trial_start': 'Days since trial start',
        'cumulative_share': 'Share of total trialists',
        'activity_name': 'Activity Name'
    },
    markers=True
)

fig.update_yaxes(tickformat='.0%')

fig.show()

## Trial activation

In [None]:
df_ta = pd.read_csv('trial_activation_mart.csv')
df_ta.columns = df_ta.columns.str.lower()
print("shape of the data:", df_ta.shape)
df_ta

In [None]:
df_ta = convert_to_datetime(df_ta, ['timestamp', 'trial_start', 'trial_start_real', 'trial_end', 'trial_week'])
print(df_ta.dtypes)
df_ta.head()

In [None]:
status_heatmap = (
    df_ta
    .groupby(['trial_week', 'trial_status'])
    .size()
    .reset_index(name='org_count')
)

total_per_week = (
    df_ta
    .groupby('trial_week')['organization_id']
    .nunique()
    .reset_index(name='total_orgs')
)

status_heatmap = status_heatmap.merge(total_per_week, on='trial_week')
status_heatmap['percent'] = (status_heatmap['org_count'] / status_heatmap['total_orgs']) * 100
status_heatmap['label'] = status_heatmap.apply(
    lambda row: f"{row['percent']:.1f}%", axis=1
)

z_values = status_heatmap.pivot(index='trial_status', columns='trial_week', values='org_count').fillna(0)
text_labels = status_heatmap.pivot(index='trial_status', columns='trial_week', values='label').fillna("")

ordered_status = ['Completed', 'Not Completed', 'Unknown']
z_values = z_values.reindex([s for s in ordered_status if s in z_values.index])
text_labels = text_labels.reindex(z_values.index)

fig = go.Figure(data=go.Heatmap(
    z=z_values.values,
    x=z_values.columns.astype(str),
    y=z_values.index,
    text=text_labels.values,
    texttemplate="%{text}",
    colorscale=custom_scale,
    colorbar_title="Organizations"
))

fig.update_layout(
    title="Trial Status Distribution by Trial Week",
    xaxis_title="Trial Week",
    yaxis_title="Trial Status"
)

fig.show()

In [None]:
df=df_ta.copy()

grouped = (
    df.groupby(['last_trial_activity', 'total_steps'])
    .size()
    .reset_index(name='record_count')
)

# Pivot for heatmap: rows = last_trial_activity, columns = total_steps
heatmap_data = grouped.pivot(index='last_trial_activity', columns='total_steps', values='record_count').fillna(0)

fig = px.imshow(
    heatmap_data.values,
    labels=dict(x="Total Steps", y="Last Trial Activity", color="Number of Records"),
    x=heatmap_data.columns.astype(str),
    y=heatmap_data.index,
    aspect="auto",
    color_continuous_scale=['steelblue', 'white', 'orange'],
    text_auto=True
)

fig.update_layout(
    title='Latest status: last trial activity per total trial steps completed',
    yaxis=dict(title=''),
    xaxis_tickangle=-45,
    height=600,
    margin=dict(l=150, r=50, t=80, b=150)
)

fig.show()