In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
from datetime import datetime, timedelta
import os
import sys

# Dynamic path resolution to find 'src' directory
def add_src_to_path():
    current_dir = os.path.dirname(os.path.abspath(__file__)) if '__file__' in globals() else os.getcwd()
    for _ in range(5):
        src_path = os.path.join(current_dir, 'src')
        if os.path.isdir(src_path):
            if src_path not in sys.path:
                sys.path.insert(0, src_path)
            return src_path
        parent_dir = os.path.dirname(current_dir)
        if parent_dir == current_dir:
            break
        current_dir = parent_dir
    raise FileNotFoundError("Could not find 'src' directory.")

# Add src to path and import config
src_path = add_src_to_path()
from config import SYNTHETIC_DATA_PATH

# Set plotting style
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)

Revenue Data Review

In [2]:
print("Loading revenue data...")
revenue_df = pd.read_csv(os.path.join(SYNTHETIC_DATA_PATH, '02_sub_revenue_monthly.csv'))

# Look at the first few rows
print("\nFirst 5 rows:")
print(revenue_df.head())

# See what columns we have
print("\nColumns in the data:")
print(revenue_df.columns.tolist())

# Basic info about the data
print("\nData info:")
print(revenue_df.info())

# Some basic statistics
print("\nBasic statistics:")
print(revenue_df.describe())

Loading revenue data...

First 5 rows:
                             account_id       month  tenure_months  seats  \
0  957a3bb1-c9cd-4abb-a19f-a858c9674ab1  2022-01-01              1    402   
1  957a3bb1-c9cd-4abb-a19f-a858c9674ab1  2022-02-01              2    402   
2  957a3bb1-c9cd-4abb-a19f-a858c9674ab1  2022-03-01              3    402   
3  957a3bb1-c9cd-4abb-a19f-a858c9674ab1  2022-04-01              4    402   
4  957a3bb1-c9cd-4abb-a19f-a858c9674ab1  2022-05-01              5    402   

         plan    MRR     ARR  churn_flag       event_type  MRR_change  
0  Enterprise  24120  289440           0  Retained (Flat)     24120.0  
1  Enterprise  24120  289440           0  Retained (Flat)         0.0  
2  Enterprise  24120  289440           0  Retained (Flat)         0.0  
3  Enterprise  24120  289440           0  Retained (Flat)         0.0  
4  Enterprise  24120  289440           0  Retained (Flat)         0.0  

Columns in the data:
['account_id', 'month', 'tenure_months', 'se

Reviewing an Account over time

In [3]:
# Convert month to datetime
revenue_df['month'] = pd.to_datetime(revenue_df['month'])

# Pick one account to follow
account_id = revenue_df['account_id'].iloc[500]
print(f"Following account: {account_id}")

# Get all months for this account
account_data = revenue_df[revenue_df['account_id'] == account_id].sort_values('month')

print("\nThis account's MRR over time:")
print(account_data[['month', 'MRR', 'seats', 'plan', 'event_type']].to_string(index=False))

# Calculate simple retention
first_mrr = account_data['MRR'].iloc[0]
last_mrr = account_data['MRR'].iloc[-1]

print(f"\nFirst month MRR: ${first_mrr:,.2f}")
print(f"Last month MRR: ${last_mrr:,.2f}")
print(f"Change: ${last_mrr - first_mrr:,.2f}")
print(f"Retention rate: {(last_mrr / first_mrr * 100):.1f}%")

Following account: 12db382e-e3c0-4d62-9935-1c0e2d0500e2

This account's MRR over time:
     month  MRR  seats  plan          event_type
2022-02-01  570     38 Basic     Retained (Flat)
2022-03-01  570     38 Basic     Retained (Flat)
2022-04-01  570     38 Basic     Retained (Flat)
2022-05-01  570     38 Basic     Retained (Flat)
2022-06-01  570     38 Basic     Retained (Flat)
2022-07-01  570     38 Basic     Retained (Flat)
2022-08-01  570     38 Basic     Retained (Flat)
2022-09-01  570     38 Basic     Retained (Flat)
2022-10-01  570     38 Basic     Retained (Flat)
2022-11-01  570     38 Basic     Retained (Flat)
2022-12-01  570     38 Basic     Retained (Flat)
2023-01-01  570     38 Basic     Retained (Flat)
2023-02-01  570     38 Basic     Retained (Flat)
2023-03-01  570     38 Basic     Retained (Flat)
2023-04-01  570     38 Basic     Retained (Flat)
2023-05-01  570     38 Basic     Retained (Flat)
2023-06-01  570     38 Basic     Retained (Flat)
2023-07-01  570     38 Basic   

Month over month NRR Calculation

In [4]:
# Sort by month
revenue_df = revenue_df.sort_values(['account_id', 'month'])

# Get total MRR for each month
monthly_totals = revenue_df.groupby('month')['MRR'].sum().reset_index()
monthly_totals = monthly_totals.sort_values('month')

print("Total MRR by month:")
print(monthly_totals.head(10))

# Calculate month-over-month growth
monthly_totals['previous_mrr'] = monthly_totals['MRR'].shift(1)
monthly_totals['mrr_change'] = monthly_totals['MRR'] - monthly_totals['previous_mrr']
monthly_totals['growth_rate'] = (monthly_totals['MRR'] / monthly_totals['previous_mrr'] - 1) * 100

print("\nMonth-over-month growth:")
print(monthly_totals[['month', 'MRR', 'mrr_change', 'growth_rate']].head(10))

Total MRR by month:
       month       MRR
0 2022-01-01     29175
1 2022-02-01   1553835
2 2022-03-01   2698010
3 2022-04-01   3879330
4 2022-05-01   5009105
5 2022-06-01   5980325
6 2022-07-01   6932170
7 2022-08-01   8134980
8 2022-09-01   9255030
9 2022-10-01  10571625

Month-over-month growth:
       month       MRR  mrr_change  growth_rate
0 2022-01-01     29175         NaN          NaN
1 2022-02-01   1553835   1524660.0  5225.912596
2 2022-03-01   2698010   1144175.0    73.635553
3 2022-04-01   3879330   1181320.0    43.784864
4 2022-05-01   5009105   1129775.0    29.122941
5 2022-06-01   5980325    971220.0    19.389092
6 2022-07-01   6932170    951845.0    15.916275
7 2022-08-01   8134980   1202810.0    17.351132
8 2022-09-01   9255030   1120050.0    13.768319
9 2022-10-01  10571625   1316595.0    14.225724


In [5]:
import pandas as pd
import os
import sys

# Add src to path
def add_src_to_path():
    current_dir = os.path.dirname(os.path.abspath(__file__)) if '__file__' in globals() else os.getcwd()
    for _ in range(5):
        src_path = os.path.join(current_dir, 'src')
        if os.path.isdir(src_path):
            if src_path not in sys.path:
                sys.path.insert(0, src_path)
            return src_path
        parent_dir = os.path.dirname(current_dir)
        if parent_dir == current_dir:
            break
        current_dir = parent_dir
    raise FileNotFoundError("Could not find 'src' directory.")

add_src_to_path()
from config import SYNTHETIC_DATA_PATH

# Load data
revenue_df = pd.read_csv(os.path.join(SYNTHETIC_DATA_PATH, '02_sub_revenue_monthly.csv'))
revenue_df['month'] = pd.to_datetime(revenue_df['month'])

# Count unique accounts per month
accounts_per_month = revenue_df.groupby('month')['account_id'].nunique().reset_index()
accounts_per_month.columns = ['month', 'active_accounts']

# Count NEW accounts (first appearance)
first_month = revenue_df.groupby('account_id')['month'].min().reset_index()
first_month.columns = ['account_id', 'first_month']
new_accounts_per_month = first_month.groupby('first_month').size().reset_index()
new_accounts_per_month.columns = ['month', 'new_accounts']

# Merge together
summary = accounts_per_month.merge(new_accounts_per_month, on='month', how='left')
summary['new_accounts'] = summary['new_accounts'].fillna(0).astype(int)

# Add total MRR
monthly_mrr = revenue_df.groupby('month')['MRR'].sum().reset_index()
summary = summary.merge(monthly_mrr, on='month')

print("Monthly Summary (first 10 months):")
print(summary.head(10).to_string(index=False))

print("\n" + "="*60)
print("JANUARY 2022:")
jan_accounts = revenue_df[revenue_df['month'] == '2022-01-01']['account_id'].nunique()
jan_mrr = revenue_df[revenue_df['month'] == '2022-01-01']['MRR'].sum()
print(f"Active accounts: {jan_accounts}")
print(f"Total MRR: ${jan_mrr:,.2f}")

print("\nFEBRUARY 2022:")
feb_accounts = revenue_df[revenue_df['month'] == '2022-02-01']['account_id'].nunique()
feb_mrr = revenue_df[revenue_df['month'] == '2022-02-01']['MRR'].sum()
feb_new = summary[summary['month'] == '2022-02-01']['new_accounts'].values[0]
print(f"Active accounts: {feb_accounts}")
print(f"New accounts: {feb_new}")
print(f"Total MRR: ${feb_mrr:,.2f}")
print(f"MRR increase: ${feb_mrr - jan_mrr:,.2f}")

Monthly Summary (first 10 months):
     month  active_accounts  new_accounts      MRR
2022-01-01                4             4    29175
2022-02-01              159           155  1553835
2022-03-01              289           136  2698010
2022-04-01              423           141  3879330
2022-05-01              540           131  5009105
2022-06-01              662           139  5980325
2022-07-01              776           129  6932170
2022-08-01              895           140  8134980
2022-09-01             1019           135  9255030
2022-10-01             1142           133 10571625

JANUARY 2022:
Active accounts: 4
Total MRR: $29,175.00

FEBRUARY 2022:
Active accounts: 159
New accounts: 155
Total MRR: $1,553,835.00
MRR increase: $1,524,660.00
