In [1]:
import pystan
import pandas as pd
from bayesian_analysis import BayesianAnalysis, BayesianRevenue, BayesianConversion
from bayesian_testing import BayesianTesting

In [2]:
pystan.__version__

'2.19.0.0'

In [3]:
sm_revenue = pystan.StanModel(file='revenue_model.stan')

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_e10b5aaf0bda8c3fd55c449a8894ff67 NOW.


In [4]:
column_for_data_analysis = 'data'
analysis_type = 'Bayesian-Revenue'
stan_model = sm_revenue
prior_alpha = 1
prior_beta = 1

In [5]:
bt = BayesianTesting(column_for_data_analysis, analysis_type, 
                     stan_model, prior_alpha, prior_beta)

In [6]:
df = pd.read_csv('../sevenpack/data/exp_385.csv')
from pandas import Timestamp
dt = pd.to_datetime(df['participation_timestamp_rounded_up'])
idx = dt<Timestamp('2020-02-01')
subxp = df.loc[idx]

In [7]:
posteriors = bt._generate_bucket_posteriors(subxp)

In [8]:
stats = bt._generate_test_statistics(posteriors)

In [9]:
stats

[([('control',
    array([6.71683526, 6.79202183, 6.84737884, 6.90742433, 6.9807618 ])),
   ('show_modal_forced_selection',
    array([6.30682227, 6.37025563, 6.4214336 , 6.47082686, 6.53993842])),
   ('show_modal_optional_selection',
    array([6.37557257, 6.4406132 , 6.49467203, 6.56257134, 6.62918632]))],
  [('control', 1.0),
   ('show_modal_forced_selection', 0.0),
   ('show_modal_optional_selection', 0.0)],
  [('control', 0.0),
   ('show_modal_forced_selection', 0.42884195127625757),
   ('show_modal_optional_selection', 0.35037095303429405)])]

In [None]:
boop = BayesianRevenue('data', 1, 1, sm_revenue)

In [None]:
df = pd.read_csv('../sevenpack/data/exp_385.csv')

In [None]:
from pandas import Timestamp
dt = pd.to_datetime(df['participation_timestamp_rounded_up'])
idx = dt<Timestamp('2020-02-01')
subxp = df.loc[idx]

In [None]:
unique_buckets = subxp.bucket.unique()

In [None]:
unique_buckets

In [None]:
results = []

In [None]:
len(subxp)

In [None]:
results = []
for bucket in unique_buckets:
    bucket_df = subxp.loc[subxp["bucket"] == bucket]
    res = boop._generate_posterior_distribution(bucket_df)
    results.append((bucket, res))

In [None]:
n = len(results)

In [None]:
buckets, posts = zip(*results)

In [None]:
buckets[2]

In [None]:
for bucket, post in results:
    print(bucket)

In [None]:
import seaborn as sns
sns.distplot(results[0])
sns.distplot(results[1])
sns.distplot(results[2])

In [10]:
def _calculate_expected_loss(posteriors: list, num_groups: int) -> list:
    buckets, res_lists = zip(*posteriors)
    loss_stats = []
    for i in range(num_groups):
        j = 0
        loss_i = 0
        while(j < num_groups):
            if j == i:
                j += 1
                continue
            else:
                diff = res_lists[j] - res_lists[i]
                loss_i = np.mean(np.maximum(diff, loss_i))
                j += 1
        loss_stats.append((buckets[i], loss_i))
    

    return loss_stats


In [11]:
losses = _calculate_expected_loss(posteriors, 3)

In [12]:
losses

[('control', 0.0),
 ('show_modal_forced_selection', 0.42884195127625757),
 ('show_modal_optional_selection', 0.35037095303429405)]

In [None]:
def _calculate_chance_to_beat_all(posteriors: list, num_groups: int) -> list:
    buckets, res_lists = zip(*posteriors)
    prob_stats = []
    for i in range(num_groups):
        j = 0
        prob_i = 0
        
        
    if num_groups == 2:
        bucket_a = buckets[0]
        bucket_b = buckets[1]
        a = res_lists[0]
        b = res_lists[1]
        diff = a - b
        prob_a = np.sum(diff>0)/len(diff)
        prob_b = 1 - prob_a
        prob_stats.append((bucket_a, prob_a))
        prob_stats.append((bucket_b, prob_b))
        
    if num_groups == 3:
        bucket_a = buckets[0]
        bucket_b = buckets[1]
        bucket_c = buckets[2]
        a = res_lists[0]
        b = res_lists[1]
        c = res_lists[2]
        diff_ab = a - b
        diff_ac = a - c
        diff_bc = b - c
        samples = len(diff_ab)

        prob_a = np.sum((diff_ac > 0) & (diff_ab > 0))/samples
        prob_b = np.sum((diff_bc > 0) & (diff_ab < 0))/samples
        prob_c = np.sum((diff_ac < 0) & (diff_bc < 0))/samples
        prob_stats.append((bucket_a, prob_a))
        prob_stats.append((bucket_b, prob_b))
        prob_stats.append((bucket_c, prob_c))
        
    return prob_stats

In [None]:
prob_stats = _calculate_chance_to_beat_all(results, 3)

In [None]:
prob_stats

In [None]:
buckets, res_lists = zip(*results)
bucket_a = buckets[0]
bucket_b = buckets[1]
bucket_c = buckets[2]
a = res_lists[0]
b = res_lists[1]
c = res_lists[2]
diff_ab= a -b
diff_ac= a -c

In [None]:
np.sum(diff_ab > 0)

In [None]:
np.sum(diff_ac > 0)/len(diff_ac)

In [None]:
np.sum((diff_ac > 0) & (diff_ab > 0))

In [None]:
sns.distplot(diff_ab)
sns.distplot(diff_ac)

In [None]:
losses = []

In [None]:
losses = []
for i in range(n):
    j = i+1
    while(j < n):
        diff = results[j] - results[i]
        print(f'i,j:{i, j}')
        if j == i+1:
            loss_i = np.maximum(diff, 0)
        else:
            loss_i = np.maximum(diff, loss_i)
        #print(diffs)
        j += 1
    loss_i = np.mean(loss_i)
    print(loss_i)
    losses.append(loss_i)
        

In [None]:
losses

In [None]:
bucket_df = df.loc[df["bucket"] == unique_buckets[0]]
res = boop._generate_posterior_distribution(bucket_df)
results.append(res)

In [None]:
bucket_df = df.loc[df["bucket"] == unique_buckets[1]]
res = boop._generate_posterior_distribution(bucket_df)
results.append(res)

In [None]:
boop._generate_posterior_distribution_for_revenue(df)

In [None]:
df2 = pd.read_csv('../sevenpack/data/xp_432_cvr.csv')

In [None]:
sm_conversion = pystan.StanModel(file = 'conversion_model.stan')

In [None]:
boop2 = BayesianConversion('is_converted', 1, 1, sm_conversion)

In [None]:
res = boop2._generate_posterior_distribution_for_conversions(df2)

In [None]:
res

In [None]:
sns.distplot(res)