# Discovering Relationships with Measures of Association

## Environment Setup and Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import phik

# Set style
sns.set_style("whitegrid") 
sns.set_palette('viridis')
plt.rcParams['axes.spines.top'] = False
plt.rcParams['axes.spines.right'] = False
plt.rcParams['font.family'] = 'monospace'

In [None]:
# Load data
potential_outcomes_df = pd.read_pickle('../data/potential_outcomes_df.pkl')
observational_df = pd.read_pickle('../data/observational_df.pkl')

## Causal Mechanisms

<center>
<img 
  src="../assets/confounding_bias.png" 
  alt="Confounding Relationships" 
  style="width:500px;height:auto;"
> 

In [None]:
# Observational data
observational_df.head()

### True Causal Effects


In [None]:
potential_outcomes_df.head()

### Dangers of Unadjusted Estimates

In [None]:
# Check treatment effect
signup_rate_by_treatment = potential_outcomes_df.groupby('upsell_marketing')['amu_signup'].mean()
print(f"Signup rates by treatment group:\n{signup_rate_by_treatment}")

# Plot distribution
plt.figure(figsize=(8, 6))
potential_outcomes_df.groupby('upsell_marketing')['amu_signup'].mean().plot(kind='bar', color=['tab:blue', 'tab:green'])
plt.title("Signup Rate by Upsell Message Exposure")
plt.xlabel("Upsell Marketing (0 = No, 1 = Yes)")
plt.ylabel("Signup Rate")
plt.xticks(rotation=0)
plt.show()

In [None]:
biased_lift = (
    (potential_outcomes_df['amu_signup'][potential_outcomes_df.upsell_marketing==1].mean()) - 
    (potential_outcomes_df['amu_signup'][potential_outcomes_df.upsell_marketing==0].mean())
)

actual_lift = potential_outcomes_df.individual_treatment_effect.mean()
print(
    f'Biased Marketing Lift: {biased_lift:.2%}',
    f'Acutal Marketing Lift: {actual_lift:.2%}', 
    sep='\n'
)