# Exploratory Analysis - Verification Flow Experiment

Quick exploration of the data before formal analysis. Just checking things look right.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# load data
assignments = pd.read_csv('../data/experiment_assignments.csv')
verification = pd.read_csv('../data/verification_attempts.csv')
users = pd.read_csv('../data/users.csv')

print(f"Users: {len(users)}")
print(f"Assignments: {len(assignments)}")
print(f"Verification attempts: {len(verification)}")

In [None]:
# quick check on variant split
assignments['variant'].value_counts()

In [None]:
# looks close to 50/50, thats good
# merge to get conversion

tier1 = verification[verification['verification_tier'] == 1].copy()
tier1['converted'] = (tier1['completion_status'] == 'completed').astype(int)

df = assignments.merge(tier1[['user_id', 'converted']], on='user_id', how='left')
df['converted'] = df['converted'].fillna(0)

df.head()

In [None]:
# conversion by variant
df.groupby('variant')['converted'].agg(['sum', 'count', 'mean'])

In [None]:
# hmm treatment looks a bit higher but sample is small
# lets check by device

df.groupby(['variant', 'device_type'])['converted'].mean().unstack()

In [None]:
# interesting, iOS slightly higher in control
# treatment effect seems similar across devices though

# quick viz
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

# conversion by variant
conv_by_variant = df.groupby('variant')['converted'].mean()
axes[0].bar(conv_by_variant.index, conv_by_variant.values)
axes[0].set_title('Conversion by Variant')
axes[0].set_ylabel('Conversion Rate')

# by device
conv_pivot = df.groupby(['variant', 'device_type'])['converted'].mean().unstack()
conv_pivot.plot(kind='bar', ax=axes[1])
axes[1].set_title('Conversion by Variant & Device')
axes[1].set_ylabel('Conversion Rate')
axes[1].legend(title='Device')

plt.tight_layout()
plt.show()

In [None]:
# check pre-experiment metrics are balanced
pre_metrics = pd.read_csv('../data/user_pre_metrics.csv')
df_with_pre = df.merge(pre_metrics, on='user_id')

# t-tests for balance
from scipy.stats import ttest_ind

for col in ['pre_sessions_count', 'pre_matches_count', 'pre_messages_sent']:
    ctrl = df_with_pre[df_with_pre['variant'] == 'control'][col]
    trt = df_with_pre[df_with_pre['variant'] == 'treatment'][col]
    t, p = ttest_ind(ctrl, trt)
    print(f"{col}: t={t:.3f}, p={p:.3f}")

In [None]:
# all balanced, good
# correlation between pre and post for CUPED?

# need to aggregate post metrics... actually lets just check sessions
events = pd.read_csv('../data/events.csv')
post_sessions = events.groupby('user_id').size().reset_index(name='post_sessions')

df_cuped = df_with_pre.merge(post_sessions, on='user_id', how='left')
df_cuped['post_sessions'] = df_cuped['post_sessions'].fillna(0)

# correlation
corr = df_cuped[['pre_sessions_count', 'post_sessions']].corr().iloc[0,1]
print(f"Pre-post sessions correlation: {corr:.3f}")

In [None]:
# decent correlation, CUPED should help
# variance reduction estimate: r^2 = correlation^2
print(f"Expected variance reduction: {corr**2:.1%}")

## Notes

- Sample split looks fine (~50/50)
- Treatment shows ~15% lift but need formal test
- Pre-metrics balanced
- CUPED should give ~40% variance reduction
- Sample might be too small for significance... need to check power