This notebook shows how to use the washover module. In particular, it shows how to apply a ConstantWashover object with a 30min time delta.

In [1]:
## Import Libraries

import numpy as np
import pandas as pd
from datetime import datetime, timedelta

from cluster_experiments import ConstantWashover

In [5]:
## Generate a dummy dataset of 4 time periods of 2 hours with 10 orders each

np.random.seed(42)

num_rows = 10

def random_timestamp(start_time, end_time):
    time_delta = end_time - start_time
    random_seconds = np.random.randint(0, time_delta.total_seconds())
    return start_time + timedelta(seconds=random_seconds)

def generate_data(start_time, end_time, treatment):
    data = {
        'order_id': np.random.randint(10**9, 10**10, size=num_rows),
        'city_code': 'VAL',
        'activation_time_local': [random_timestamp(start_time, end_time) for _ in range(num_rows)],
        'bin_start_time_local': start_time,
        'treatment': treatment
    }
    return pd.DataFrame(data)

start_times = [datetime(2024, 1, 22, 9, 0), datetime(2024, 1, 22, 11, 0),
               datetime(2024, 1, 22, 13, 0), datetime(2024, 1, 22, 15, 0)]

treatments = ['control', 'variation', 'variation', 'control']

dataframes = [generate_data(start, start + timedelta(hours=2), treatment) for start, treatment in zip(start_times, treatments)]

df = pd.concat(dataframes).sort_values(by='activation_time_local').reset_index(drop=True)

df.head(40)

Unnamed: 0,order_id,city_code,activation_time_local,bin_start_time_local,treatment
0,5298312065,VAL,2024-01-22 09:21:07,2024-01-22 09:00:00,control
1,5325316860,VAL,2024-01-22 09:25:28,2024-01-22 09:00:00,control
2,8395928407,VAL,2024-01-22 09:31:39,2024-01-22 09:00:00,control
3,1787846414,VAL,2024-01-22 09:45:34,2024-01-22 09:00:00,control
4,3563451924,VAL,2024-01-22 09:50:05,2024-01-22 09:00:00,control
5,3253890010,VAL,2024-01-22 09:53:22,2024-01-22 09:00:00,control
6,5495394815,VAL,2024-01-22 09:59:16,2024-01-22 09:00:00,control
7,5684118973,VAL,2024-01-22 10:04:50,2024-01-22 09:00:00,control
8,5537253172,VAL,2024-01-22 10:17:38,2024-01-22 09:00:00,control
9,3652062880,VAL,2024-01-22 10:56:15,2024-01-22 09:00:00,control


In [6]:
## Define washover with 30 min duration
washover = ConstantWashover(washover_time_delta=timedelta(minutes=30))

## Apply washover to the dataframe, the orders with activation time within the first 30 minutes after every change in the treatment column, clustering by city and 2h time bin, will be dropped
df_analysis_washover = washover.washover(
    df=df,
    truncated_time_col='bin_start_time_local',
    treatment_col='treatment',
    cluster_cols=['city_code','bin_start_time_local'],
    original_time_col='activation_time_local',
)

## Check that the orders have been correctly dropped (indexes 0,1,10,11,30,31,32)
df_analysis_washover.head(40)

Unnamed: 0,order_id,city_code,activation_time_local,bin_start_time_local,treatment
2,8395928407,VAL,2024-01-22 09:31:39,2024-01-22 09:00:00,control
3,1787846414,VAL,2024-01-22 09:45:34,2024-01-22 09:00:00,control
4,3563451924,VAL,2024-01-22 09:50:05,2024-01-22 09:00:00,control
5,3253890010,VAL,2024-01-22 09:53:22,2024-01-22 09:00:00,control
6,5495394815,VAL,2024-01-22 09:59:16,2024-01-22 09:00:00,control
7,5684118973,VAL,2024-01-22 10:04:50,2024-01-22 09:00:00,control
8,5537253172,VAL,2024-01-22 10:17:38,2024-01-22 09:00:00,control
9,3652062880,VAL,2024-01-22 10:56:15,2024-01-22 09:00:00,control
12,1279394470,VAL,2024-01-22 11:43:33,2024-01-22 11:00:00,variation
13,5147358011,VAL,2024-01-22 12:04:03,2024-01-22 11:00:00,variation


In [7]:
print('df:', df.shape)
print('df_analysis_washover:', df_analysis_washover.shape)

df: (40, 5)
df_analysis_washover: (33, 5)
