# Experimenting With Sequential Testing

This notebook will eventually be productionized.

In [0]:
import pandas as pd
from statsmodels.stats.power import TTestIndPower
from pyspark.sql import SparkSession, Row
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, IntegerType
from scipy.stats import norm
from utils import generate_sequential_plan
import yaml 
import os

## Creating a reference for key values of a sequential test

In [0]:
%sql
select
  category,
  round(mean(amount), 2) as mean_amount,
  round(stddev(amount), 2) as std_amount
from
  workspace.money_mop.silver_daily_transactions
group by
  category

In [0]:
df = _sqldf.toPandas()

df

In [0]:
effect_percent = 0.05

df['mde'] = df['mean_amount'] * effect_percent
df['effect_size'] = df['mde'] / df['std_amount']
df

In [0]:
analysis = TTestIndPower()
df['n_per_group'] = df['effect_size'].apply(
    lambda es: analysis.solve_power(effect_size=es, power=0.8, alpha=0.05)
).round().astype(int)
df

In [0]:
config = yaml.safe_load(open("../config.yaml"))

In [0]:
spark = SparkSession.builder.getOrCreate()

In [0]:
schema = StructType([
    StructField("category", StringType(), True),
    StructField("mean_amount", DoubleType(), True),
    StructField("std_amount", DoubleType(), True),
    StructField("mde", DoubleType(), True),
    StructField("effect_size", DoubleType(), True),
    StructField("n_per_group", IntegerType(), True)
])

In [0]:
experiment_power = spark.createDataFrame(df)
table_name = config["tables"]["experiment_power"]

In [0]:
experiment_power.write.format("delta").mode("overwrite").saveAsTable(table_name)


## Creating a sequential plan table

In [0]:
power_df = experiment_power.select("category", "n_per_group").toPandas()

power_df

### Notes on the following code
I could not find a package that worked well for sequantial testing in python, so I asked ChatGPT to create a function to do so. This could eventually become a package.

In [0]:
power_df = spark.table("money_mop.ref_experiment_power")  # must have category & n_per_group

# Generate plan
plan_df = generate_sequential_plan(
    power_df,
    n_looks=config["sequential"]["n_looks"],
    alpha=config["sequential"]["alpha"],
    method=config["sequential"]["spending_method"]
)

plan_df.write.mode("overwrite").format("delta") \
      .saveAsTable("money_mop.experiment_sequential_plan")