# Causal Pricing Model

Data: We simulate data, but you can replace it with your actual dataset.\
Treatment Model: Predicts the price using the covariates. This is your first nuisance model.\
Outcome Model: Predicts demand using the covariates. This is your second nuisance model.\
Residual Calculation: Subtract predicted demand from actual demand and predicted price from actual price.\
Causal Impact: We estimate the causal impact by regressing the residuals of demand on the residuals of price.

In [22]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Step 1: Data Simulation Function
def simulate_data(n_samples=1000, seed=42, true_causal_effect=-0.5):
    np.random.seed(seed)
    
    # Simulate price as before
    price = np.random.normal(10, 2, n_samples)
    
    # Simulate covariates
    advertising_spend = np.random.uniform(500, 1000, n_samples)  # Advertising spend
    holiday_season = np.random.choice([0, 1], n_samples)  # Binary indicator for holiday season
    competitor_price = np.random.normal(9, 2, n_samples)  # Competitor's price
    
    # Introduce a causal effect of price on demand and incorporate covariates into demand generation
    epsilon = np.random.normal(0, 1, n_samples)  # Random noise
    demand = (20 + true_causal_effect * price 
              + 0.01 * advertising_spend 
              + 5 * holiday_season 
              - 0.3 * competitor_price 
              + epsilon)
    
    # Create and return the DataFrame
    data = pd.DataFrame({
        'demand': demand,
        'price': price,
        'advertising_spend': advertising_spend,
        'holiday_season': holiday_season,
        'competitor_price': competitor_price
    })
    
    return data

# Step 2: Train Models for Treatment (Price) and Outcome (Demand)
def train_models(X_train, y_train_demand, y_train_price):
    # Treatment model (Price prediction)
    treatment_model = RandomForestRegressor(n_estimators=100, random_state=42)
    treatment_model.fit(X_train, y_train_price)
    
    # Outcome model (Demand prediction)
    outcome_model = RandomForestRegressor(n_estimators=100, random_state=42)
    outcome_model.fit(X_train, y_train_demand)
    
    return treatment_model, outcome_model

# Step 3: Predict Treatment and Outcome, and Compute Residuals
def compute_residuals(treatment_model, outcome_model, X_train, y_train_demand, y_train_price):
    # Predict price and demand
    predicted_price = treatment_model.predict(X_train)
    predicted_demand = outcome_model.predict(X_train)
    
    # Calculate residuals
    residuals_demand = y_train_demand - predicted_demand
    residuals_price = y_train_price - predicted_price
    
    return residuals_demand, residuals_price

# Step 4: Estimate Causal Impact Using a Machine Learning Model
def estimate_causal_impact(residuals_price, residuals_demand):
    # Random Forest model for causal impact estimation
    effect_model = LinearRegression()
    effect_model.fit(residuals_price.values.reshape(-1, 1), residuals_demand)
        
    causal_impact = effect_model.coef_[0]

    return causal_impact


In [23]:
# Simulate the data
data = simulate_data(n_samples=1000, seed=42, true_causal_effect=-0.5)

# Split the data into features (X) and target variables (y)
X = data[['advertising_spend', 'holiday_season', 'competitor_price']]  # Covariates
y_demand = data['demand']
y_price = data['price']

# Split into training and testing sets
X_train, X_test, y_train_demand, y_test_demand, y_train_price, y_test_price = train_test_split(
    X, y_demand, y_price, test_size=0.2, random_state=42)

# Train treatment (price) and outcome (demand) models
treatment_model, outcome_model = train_models(X_train, y_train_demand, y_train_price)

# Compute residuals for both demand and price
residuals_demand, residuals_price = compute_residuals(treatment_model, outcome_model, X_train, y_train_demand, y_train_price)

# Estimate the causal impact of price on demand
causal_impact = estimate_causal_impact(residuals_price, residuals_demand)

print(f"The estimated causal impact of price on demand is: {causal_impact:.4f}")

The estimated causal impact of price on demand is: -0.4839
