In [None]:
import sys
import os
from pyspark.sql import SparkSession
import pyspark.sql.functions as F

import logging
import numpy as np

sys.path.append(os.path.dirname(os.path.abspath('')))
from stratification.params import SplitBuilderParams
from prepilot.params import PrepilotParams
from prepilot.prepilot_split_builder import PrepilotSplitBuilder
from prepilot.prepilot_experiment_builder import PrepilotExperimentBuilder

logging.basicConfig(level = logging.INFO)

%load_ext autoreload
%autoreload 2

In [None]:
spark = (SparkSession
         .builder
         .master("local")
         .appName("gbc_ab_pyspark")
         # Add postgres jar
         #.config("spark.driver.extraClassPath", "/home/jovyan/work/jars/postgresql-9.4.1207.jar")
         .getOrCreate())
sc = spark.sparkContext

In [None]:
prepilot_params = PrepilotParams(
    metrics_names=['pre_gp_orders'],
    injects=[ 1.01, 1.03, 1.04, 1.05],
    min_group_size=50000, 
    max_group_size=150000, 
    step=50000,
    bootstrap_metric = np.median,
    iterations_number = 10,
    n_buckets = 1000,
    max_beta_score=0.2,
    min_beta_score=0.05,
)

In [None]:
split_builder_params = SplitBuilderParams(
    map_group_names_to_sizes={
        'control': 10000,
        'target': 10000
    },
    region_col = "moda_city",
    split_metric_col = "pre_gp_orders",
    customer_col = "customer_id",
    cols = [],
    cat_cols=[
        #'offer_rk_goal',
        #'offer_rk_campaign'
    ],
    pvalue=0.05,
    n_top_cat=100,
    stat_test="ttest_ind"
)

In [None]:
df = (
    spark.read
    .format("csv")
    .option("header", True)
    .option("inferSchema",True)
    .option("sep", ";")
    .load("TLO.csv")
)

In [None]:
prepilot_guests_collector = PrepilotSplitBuilder(spark, df,
                                                [(10000,10000), (20000,20000)],
                                                split_builder_params,
                                                3)

In [None]:
splited_df = prepilot_guests_collector.collect()

In [None]:
splited_df.show(10)

In [None]:
prepilot = PrepilotExperimentBuilder(spark, df,
                                     prepilot_params,
                                     split_builder_params)

In [None]:
beta, alpha = prepilot.collect()

In [None]:
beta.show()

In [None]:
alpha.show()

In [None]:
spark.stop()