In [1]:
import os

import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from datasoup.small_sample_experiments.data_gen import Customers, SimulationConfig, Simulator, StoreParameters, Transactions

RNG = np.random.default_rng(0)

IMG_PATH = "datasoup/small_sample_experiments/images"
DEFAULT_PLOTLY_LAYOUT = go.Layout(
    autosize=False,
    width=1000,
    height=400,
    xaxis=go.layout.XAxis(linecolor="black", linewidth=1, mirror=True),
    yaxis=go.layout.YAxis(linecolor="black", linewidth=1, mirror=True),
    margin=go.layout.Margin(l=50, r=50, b=50, t=50, pad=20),
)

In [2]:
sim_config = SimulationConfig.get_default_skewed_config()
simulator = Simulator.initialize_from_config(sim_config, RNG)
transactions = simulator.get_transactions(RNG)

In [3]:
first_treated_day = transactions[transactions[Transactions.IS_TREATED_COL]][Transactions.DAY_COL].min()
treated_time_frame_transactions = transactions[first_treated_day <= transactions[Transactions.DAY_COL]]

len(treated_time_frame_transactions) / len(transactions)

0.5033113949122119

In [4]:
treated_time_frame_transactions.groupby(Transactions.IS_TREATED_COL)[Transactions.PURCHASE_QUANTITY_COL].sum()

is_treated
False    2537837.0
True     1246757.0
Name: purchase_quantity, dtype: float64

In [5]:
metrics_frame = (
    treated_time_frame_transactions.groupby([Transactions.IS_TREATED_COL, Transactions.STORE_ID_COL])
    .agg(
        {
            Transactions.PURCHASE_QUANTITY_COL: "sum",
            Transactions.CUSTOMER_ID_COL: "nunique",
            Transactions.TRANSACTION_ID_COL: "count",
        }
    )
    .reset_index(drop=False)
    .sort_values(by=[Transactions.STORE_ID_COL, Transactions.IS_TREATED_COL])
    .reset_index(drop=True)
)
metrics_frame["purchases_per_transaction"] = (
    metrics_frame[Transactions.PURCHASE_QUANTITY_COL] / metrics_frame[Transactions.TRANSACTION_ID_COL]
)
metrics_frame["purchases_per_customer"] = metrics_frame[Transactions.PURCHASE_QUANTITY_COL] / metrics_frame[Transactions.CUSTOMER_ID_COL]

metrics_frame

Unnamed: 0,is_treated,store_id,purchase_quantity,customer_id,transaction_id,purchases_per_transaction,purchases_per_customer
0,False,0,107665.0,1083,4494,23.957499,99.413666
1,True,1,32290.0,791,3415,9.455344,40.821745
2,True,2,4952.0,351,1503,3.294744,14.108262
3,False,3,25809.0,390,1644,15.698905,66.176923
4,False,4,8098.0,551,2366,3.422654,14.696915
...,...,...,...,...,...,...,...
115,True,115,3041.0,473,2066,1.471926,6.429175
116,False,116,8912.0,246,1004,8.876494,36.227642
117,False,117,1776.0,401,1716,1.034965,4.428928
118,False,118,27155.0,832,3493,7.774120,32.638221


In [6]:
metrics_frame.groupby(Transactions.IS_TREATED_COL).agg(
    {
        Transactions.PURCHASE_QUANTITY_COL: "mean",
        "purchases_per_transaction": "mean",
        "purchases_per_customer": "mean",
    }
)

Unnamed: 0_level_0,purchase_quantity,purchases_per_transaction,purchases_per_customer
is_treated,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,31331.320988,12.917977,54.387453
True,31968.128205,12.004087,52.06593


In [7]:
from datasoup.utils import plotly_subplot_from_figs

fig_1 = px.histogram(
    data_frame=metrics_frame,
    x="purchases_per_customer",
    color=Transactions.IS_TREATED_COL,
    nbins=30,
    marginal="box",
)
fig_2 = px.histogram(
    data_frame=metrics_frame,
    x="purchases_per_transaction",
    color=Transactions.IS_TREATED_COL,
    nbins=30,
    marginal="box",
)

ModuleNotFoundError: No module named 'datasoup.utils'