# Task 3

### a)

In [None]:
import numpy as np
import pandas as pd

# Create array of days from 0 to 30
x = np.arange(0, 31) 

# Retention curves for group A and B (calculated via https://mycurvefit.com/)
y_a = 0.03594404 + (1.320257 - 0.03594404) / (1 + (x / 0.6530059) ** 0.9492318)
y_b = 0.02983316 + (480458 - 0.02983316)/(1 + (x/6.395712e-11)**0.5914175)

# Ensure day 0 retention is 100%
y_a[0] = 1
y_b[0] = 1

# Create a DataFrame with days and retention values
df = pd.DataFrame({
    "days": x,
    "ret_a": y_a,
    "ret_b": y_b
})

# Calculate Daily Active Users (DAU) for both groups
df['dau_a'] = 20000 * df['ret_a']
df['dau_b'] = 20000 * df['ret_b']

# Calculate paying DAU based on purchase ratios
df['payer_dau_a'] = 0.03 * df['dau_a']
df['payer_dau_b'] = 0.032 * df['dau_b']

# Calculate ad impressions per day (multiplying DAU by impression factor)
df['imp_a'] = 2.3 * df['dau_a']
df['imp_b'] = 1.5 * df['dau_b']

# Calculate ad revenue per day (impressions * eCPM / 1000)
df['ad_revenue_a'] = 0.001 * df['imp_a'] * 10
df['ad_revenue_b'] = 0.001 * df['imp_b'] * 10.7

# Calculate cumulative sums for paying DAU and ad revenue
df['cum_payer_dau_a'] = df['payer_dau_a'].cumsum()
df['cum_payer_dau_b'] = df['payer_dau_b'].cumsum()
df['cum_ad_revenue_a'] = df['ad_revenue_a'].cumsum()
df['cum_ad_revenue_b'] = df['ad_revenue_b'].cumsum()

df.head(31)

Unnamed: 0,days,ret_a,ret_b,dau_a,dau_b,payer_dau_a,payer_dau_b,imp_a,imp_b,ad_revenue_a,ad_revenue_b,cum_payer_dau_a,cum_payer_dau_b,cum_ad_revenue_a,cum_ad_revenue_b
0,0,1.0,1.0,20000.0,20000.0,600.0,640.0,46000.0,30000.0,460.0,321.0,600.0,640.0,460.0,321.0
1,1,0.549956,0.479281,10999.129484,9585.619924,329.973885,306.739838,25297.997814,14378.429886,252.979978,153.8492,929.973885,946.739838,712.979978,474.8492
2,2,0.365799,0.328128,7315.979882,6562.55014,219.479396,210.001604,16826.753728,9843.82521,168.267537,105.32893,1149.453281,1156.741442,881.247515,580.17813
3,3,0.280486,0.264527,5609.711566,5290.539325,168.291347,169.297258,12902.336601,7935.808987,129.023366,84.913156,1317.744628,1326.0387,1010.270881,665.091286
4,4,0.23092,0.227808,4618.400982,4556.166433,138.552029,145.797326,10622.322259,6834.24965,106.223223,73.126471,1456.296657,1471.836026,1116.494104,738.217757
5,5,0.19841,0.203332,3968.200817,4066.639004,119.046025,130.132448,9126.86188,6099.958506,91.268619,65.269556,1575.342682,1601.968474,1207.762723,803.487313
6,6,0.175395,0.185597,3507.895139,3711.944642,105.236854,118.782229,8068.158819,5567.916962,80.681588,59.576711,1680.579536,1720.750703,1288.444311,863.064024
7,7,0.15822,0.172025,3164.391707,3440.494286,94.931751,110.095817,7278.100925,5160.741429,72.781009,55.219933,1775.511287,1830.84652,1361.22532,918.283958
8,8,0.144898,0.161227,2897.956392,3224.548192,86.938692,103.185542,6665.299702,4836.822288,66.652997,51.753998,1862.449979,1934.032062,1427.878317,970.037956
9,9,0.134255,0.152386,2685.098245,3047.722766,80.552947,97.527129,6175.725963,4571.58415,61.75726,48.91595,1943.002926,2031.559191,1489.635577,1018.953907


In [None]:
# Calculate total DAU for group A up to and including day 14 (end of day 15)
total_dau_a = df[df['days'] <= 14]['dau_a'].sum()

# Calculate total DAU for group B up to and including day 14 (end of day 15)
total_dau_b = df[df['days'] <= 14]['dau_b'].sum()

# Print total DAU counts for both groups
print(f"Total DAU count by the end of day 15 for Var. A: {total_dau_a}")
print(f"Total DAU count by the end of day 15 for Var. B: {total_dau_b}")


Total DAU count by the end of day 15 for Var. A: 76070.97480259145
Total DAU count by the end of day 15 for Var. B: 76876.29044592673


#### b)

In [25]:
# Calculate total cumulative ad revenue for group A up to and including day 14 (end of day 15)
total_cumrev_a = df[df['days'] <= 14]['cum_ad_revenue_a'].sum()

# Calculate total cumulative ad revenue for group B up to and including day 14 (end of day 15)
total_cumrev_b = df[df['days'] <= 14]['cum_ad_revenue_b'].sum()

# Calculate total cumulative paying users for group A up to and including day 14
total_payers_a = df[df['days'] <= 14]['cum_payer_dau_a'].sum()

# Calculate total cumulative paying users for group B up to and including day 14
total_payers_b = df[df['days'] <= 14]['cum_payer_dau_b'].sum()

# Print cumulative ad revenue for both groups
print(f"Total Cum Ad Rev by the end of day 15 for Var. A: {total_cumrev_a}")
print(f"Total Cum Ad Rev by the end of day 15 for Var. B: {total_cumrev_b}")

# Print cumulative paying user counts for both groups
print(f"Total Cum Payer Count by the end of day 15 for Var. A: {total_payers_a}")
print(f"Total Cum Payer Count by the end of day 15 for Var. B: {total_payers_b}")

# Calculate and print "Critical IAP ARPPU" metric between the two groups
# (difference in cumulative revenue divided by difference in cumulative paying users)
print(f"Critical IAP ARPPU: {(total_cumrev_a - total_cumrev_b) / (total_payers_b - total_payers_a)}")


Total Cum Ad Rev by the end of day 15 for Var. A: 19210.729863507568
Total Cum Ad Rev by the end of day 15 for Var. B: 13109.291113983641
Total Cum Payer Count by the end of day 15 for Var. A: 25057.47373500987
Total Cum Payer Count by the end of day 15 for Var. B: 26136.904401711938
Critical IAP ARPPU: 5.6524600770935605


### c)

In [26]:
# Calculate total cumulative ad revenue for group A up to and including day 29 (end of day 30)
total_cumrev_a = df[df['days'] <= 29]['cum_ad_revenue_a'].sum()

# Calculate total cumulative ad revenue for group B up to and including day 29 (end of day 30)
total_cumrev_b = df[df['days'] <= 29]['cum_ad_revenue_b'].sum()

# Calculate total cumulative paying users for group A up to and including day 29
total_payers_a = df[df['days'] <= 29]['cum_payer_dau_a'].sum()

# Calculate total cumulative paying users for group B up to and including day 29
total_payers_b = df[df['days'] <= 29]['cum_payer_dau_b'].sum()

# Print cumulative ad revenue for both groups at the end of day 30
print(f"Total Cum Ad Rev by the end of day 30 for Var. A: {total_cumrev_a}")
print(f"Total Cum Ad Rev by the end of day 30 for Var. B: {total_cumrev_b}")

# Print cumulative paying user counts for both groups at the end of day 30
print(f"Total Cum Payer Count by the end of day 30 for Var. A: {total_payers_a}")
print(f"Total Cum Payer Count by the end of day 30 for Var. B: {total_payers_b}") 

# Calculate and print "Critical IAP ARPPU" metric between the two groups
# (difference in cumulative revenue divided by difference in cumulative paying users)
print(f"Critical IAP ARPPU: {(total_cumrev_a - total_cumrev_b) / (total_payers_b - total_payers_a)}")


Total Cum Ad Rev by the end of day 30 for Var. A: 50207.18827814777
Total Cum Ad Rev by the end of day 30 for Var. B: 35785.74890119652
Total Cum Payer Count by the end of day 30 for Var. A: 65487.63688454057
Total Cum Payer Count by the end of day 30 for Var. B: 71348.53363478437
Critical IAP ARPPU: 2.4606199343728994


### d)

In [None]:
import numpy as np
import pandas as pd

# Initialize cumulative variables
cum_ad_rev_a = 0
cum_ad_rev_b = 0
cum_payer_a = 0
cum_payer_b = 0

# Days array
x = np.arange(0, 30)

# Retention curves for group A and B
y_a = 0.03594404 + (1.320257 - 0.03594404) / (1 + (x / 0.6530059) ** 0.9492318)
y_b = 0.02983316 + (480458 - 0.02983316)/(1 + (x/6.395712e-11)**0.5914175)

# Ensure day 0 retention is 1
y_a[0] = 1
y_b[0] = 1

# Loop over first 15 days for cumulative calculation
for i in range(15):
    # Build DataFrame with retention
    df = pd.DataFrame({
        "days": x,
        "ret_a": y_a,
        "ret_b": y_b
    })
    
    # Adjust purchase ratios for group A
    purchase_ratio_a = np.full(len(df), 0.03)   # default purchase ratio
    purchase_ratio_a[i] = 0.04                   # spike on day i
    purchase_ratio_a[i:i+11] = np.linspace(0.04, 0.03, 11)  # gradual adjustment
    
    # Adjust purchase ratios for group B
    purchase_ratio_b = np.full(len(df), 0.032)  # default purchase ratio
    purchase_ratio_b[i] = 0.042                  # spike on day i
    purchase_ratio_b[i:i+11] = np.linspace(0.042, 0.032, 11) # gradual adjustment
    
    # Compute daily active users (DAU)
    df['dau_a'] = 20000 * df['ret_a']
    df['dau_b'] = 20000 * df['ret_b']
    
    # Compute paying DAU based on purchase ratios
    df['payer_dau_a'] = purchase_ratio_a * df['dau_a']
    df['payer_dau_b'] = purchase_ratio_b * df['dau_b']
    
    # Compute ad impressions
    df['imp_a'] = 2.3 * df['dau_a']
    df['imp_b'] = 1.5 * df['dau_b']
    
    # Compute ad revenue
    df['ad_revenue_a'] = 0.001 * df['imp_a'] * 10
    df['ad_revenue_b'] = 0.001 * df['imp_b'] * 10.7
    
    # Compute cumulative sums for payers and ad revenue
    df['cum_payer_dau_a'] = df['payer_dau_a'].cumsum()
    df['cum_payer_dau_b'] = df['payer_dau_b'].cumsum()
    df['cum_ad_revenue_a'] = df['ad_revenue_a'].cumsum()
    df['cum_ad_revenue_b'] = df['ad_revenue_b'].cumsum()
    
    # Select the day of interest and accumulate totals
    row = df[df['days'] == 14 - i].iloc[0]   
    cum_ad_rev_a += row['cum_ad_revenue_a']
    cum_ad_rev_b += row['cum_ad_revenue_b']
    cum_payer_a  += row['cum_payer_dau_a']
    cum_payer_b  += row['cum_payer_dau_b']

# Print final cumulative results
print("Final totals:")
print("cum_ad_rev_a:", cum_ad_rev_a)
print("cum_ad_rev_b:", cum_ad_rev_b)
print("cum_payer_a:", cum_payer_a)
print("cum_payer_b:", cum_payer_b)
print(f"Critical IAP ARPPU: {(cum_ad_rev_a - cum_ad_rev_b) / (cum_payer_b - cum_payer_a)}")


Final totals:
cum_ad_rev_a: 19210.729863507568
cum_ad_rev_b: 13109.291113983643
cum_payer_a: 26744.482757034428
cum_payer_b: 27816.31296034639
Critical IAP ARPPU: 5.692542280176881


#### e)

In [None]:
import numpy as np
import pandas as pd

# Retention function for group A
def new_ret_a(x):
    x = np.array(x)
    # Day 0 retention = 1, otherwise exponential decay
    return np.where(x == 0, 1, 0.58 * np.exp(-0.12 * (x - 1)))

# Retention function for group B
def new_ret_b(x):
    x = np.array(x)
    # Day 0 retention = 1, otherwise exponential decay
    return np.where(x == 0, 1, 0.52 * np.exp(-0.10 * (x - 1)))

# Initialize cumulative variables
cum_ad_rev_a = cum_ad_rev_b = cum_payer_a = cum_payer_b = 0

# Function to run a scenario with given DAU base, retention functions, and days to sum
def run_df(base_a, base_b, ret_a, ret_b, day_range):
    global cum_ad_rev_a, cum_ad_rev_b, cum_payer_a, cum_payer_b
    
    # Create array of days
    x = np.arange(0, 30)
    
    # Build DataFrame with retention for both groups
    df = pd.DataFrame({"days": x, "ret_a": ret_a(x), "ret_b": ret_b(x)})
    
    # Ensure day 0 retention is exactly 1
    df.iloc[0, df.columns.get_loc('ret_a')] = 1
    df.iloc[0, df.columns.get_loc('ret_b')] = 1
    
    # Calculate daily active users based on retention
    df['dau_a'], df['dau_b'] = base_a * df['ret_a'], base_b * df['ret_b']
    
    # Calculate paying DAU (users who make a purchase)
    df['payer_dau_a'], df['payer_dau_b'] = 0.03 * df['dau_a'], 0.032 * df['dau_b']
    
    # Calculate impressions per day (assume some multiplier per DAU)
    df['imp_a'], df['imp_b'] = 2.3 * df['dau_a'], 1.5 * df['dau_b']
    
    # Calculate ad revenue based on impressions and eCPM
    df['ad_revenue_a'], df['ad_revenue_b'] = 0.001 * df['imp_a'] * 10, 0.001 * df['imp_b'] * 10.7
    
    # Compute cumulative sums for payers and ad revenue
    df['cum_payer_dau_a'], df['cum_payer_dau_b'] = df['payer_dau_a'].cumsum(), df['payer_dau_b'].cumsum()
    df['cum_ad_revenue_a'], df['cum_ad_revenue_b'] = df['ad_revenue_a'].cumsum(), df['ad_revenue_b'].cumsum()
    
    # Sum cumulative values for the specified range of days
    for i in day_range:
        row = df.iloc[i]
        cum_ad_rev_a += row['cum_ad_revenue_a']
        cum_ad_rev_b += row['cum_ad_revenue_b']
        cum_payer_a  += row['cum_payer_dau_a']
        cum_payer_b  += row['cum_payer_dau_b']

# Case 1: between D11 and D29 with 20K daily installs
run_df(
    20000, 20000, 
    lambda x: 0.03594404 + (1.320257 - 0.03594404)/(1 + (x/0.6530059)**0.9492318),
    lambda x: 0.02983316 + (480458 - 0.02983316)/(1 + (x/6.395712e-11)**0.5914175),
    range(11,30)
)

# Case 2: between D0 and D10 with 12K daily installs
run_df(
    12000, 12000, 
    lambda x: 0.03594404 + (1.320257 - 0.03594404)/(1 + (x/0.6530059)**0.9492318),
    lambda x: 0.02983316 + (480458 - 0.02983316)/(1 + (x/6.395712e-11)**0.5914175),
    range(11)
)

# Case 3: between D0 and D10 with 8K daily installs
run_df(8000, 8000, new_ret_a, new_ret_b, range(11))


print("Final Totals:")
print("cum_ad_rev_a:", cum_ad_rev_a)
print("cum_ad_rev_b:", cum_ad_rev_b)
print("cum_payer_a:", cum_payer_a)
print("cum_payer_b:", cum_payer_b)
print(f"Critical IAP ARPPU: {(cum_ad_rev_a-cum_ad_rev_b)/(cum_payer_b-cum_payer_a)}")


Final Totals:
cum_ad_rev_a: 51494.742812679164
cum_ad_rev_b: 36629.86203830828
cum_payer_a: 67167.05584262499
cum_payer_b: 73031.50063712556
Critical IAP ARPPU: 2.5347464756272498
