# Causal Pricing Model

Data: We simulate data, but you can replace it with your actual dataset.\
Treatment Model: Predicts the price using the covariates. This is your first nuisance model.\
Outcome Model: Predicts demand using the covariates. This is your second nuisance model.\
Residual Calculation: Subtract predicted demand from actual demand and predicted price from actual price.\
Causal Impact: We estimate the causal impact by regressing the residuals of demand on the residuals of price.

In [60]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Step 1: Data Simulation Function
def simulate_data(n_samples=1000, seed=42, true_causal_effect=-0.5):
    np.random.seed(seed)
    
    # Simulate price as before
    price = np.random.normal(10, 2, n_samples)
    
    # Simulate covariates
    advertising_spend = np.random.uniform(500, 1000, n_samples)  # Advertising spend
    holiday_season = np.random.choice([0, 1], n_samples)  # Binary indicator for holiday season
    competitor_price = np.random.normal(9, 2, n_samples)  # Competitor's price
    
    # Introduce a causal effect of price on demand and incorporate covariates into demand generation
    epsilon = np.random.normal(0, 1, n_samples)  # Random noise
    demand = (20 + true_causal_effect * price 
              + 0.01 * advertising_spend 
              + 5 * holiday_season 
              - 0.3 * competitor_price 
              + epsilon)
    
    # Create and return the DataFrame
    data = pd.DataFrame({
        'demand': demand,
        'price': price,
        'advertising_spend': advertising_spend,
        'holiday_season': holiday_season,
        'competitor_price': competitor_price
    })
    
    return data

# Step 2: Train Models for Treatment (Price) and Outcome (Demand)
def train_models(X_train, y_train_demand, y_train_price):
    # Treatment model (Price prediction)
    treatment_model = RandomForestRegressor(n_estimators=100, random_state=42)
    treatment_model.fit(X_train, y_train_price)
    
    # Outcome model (Demand prediction)
    outcome_model = RandomForestRegressor(n_estimators=100, random_state=42)
    outcome_model.fit(X_train, y_train_demand)
    
    return treatment_model, outcome_model

# Step 3: Predict Treatment and Outcome, and Compute Residuals
def compute_residuals(treatment_model, outcome_model, X_train, y_train_demand, y_train_price):
    # Predict price and demand
    predicted_price = treatment_model.predict(X_train)
    predicted_demand = outcome_model.predict(X_train)
    
    # Calculate residuals
    residuals_demand = y_train_demand - predicted_demand
    residuals_price = y_train_price - predicted_price

    return residuals_demand, residuals_price

# Step 4: Estimate Causal Impact Using a Machine Learning Model
def estimate_causal_impact(residuals_price, residuals_demand):
    # Random Forest model for causal impact estimation
    effect_model = LinearRegression()
    effect_model.fit(residuals_price.values.reshape(-1, 1), residuals_demand)
        
    causal_impact = effect_model.coef_[0]

    return causal_impact

def validate_model(model, X, y):
    scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error')
    rmse = np.sqrt(-scores.mean())
    return rmse

In [61]:
# Simulate the data
data = simulate_data(n_samples=10000, seed=42, true_causal_effect=-0.5)

# Split the data into features (X) and target variables (y)
X = data[['advertising_spend', 'holiday_season', 'competitor_price']]  # Covariates
y_demand = data['demand']
y_price = data['price']

# Split into training and testing sets
X_train, X_test, y_train_demand, y_test_demand, y_train_price, y_test_price = train_test_split(
    X, y_demand, y_price, test_size=0.2, random_state=42)

# Train treatment (price) and outcome (demand) models
treatment_model, outcome_model = train_models(X_train, y_train_demand, y_train_price)

# Compute residuals for both demand and price
residuals_demand, residuals_price = compute_residuals(treatment_model, outcome_model, X_train, y_train_demand, y_train_price)

# Estimate the causal impact of price on demand
causal_impact = estimate_causal_impact(residuals_price, residuals_demand)

print(f"The estimated causal impact of price on demand is: {causal_impact:.4f}")

treatment_rmse = validate_model(treatment_model, X_train, y_train_price)
outcome_rmse = validate_model(outcome_model, X_train, y_train_demand)

print(f"Treatment model RMSE: {treatment_rmse:.4f}")
print(f"Outcome model RMSE: {outcome_rmse:.4f}")

The estimated causal impact of price on demand is: -0.4772
Treatment model RMSE: 2.1462
Outcome model RMSE: 1.5555


Can we see what the demand will change by if we change the price by £x?

In [52]:
def predict_demand_with_price_change(treatment_model, effect_model, advertising_spend, holiday_season, competitor_price, current_price, price_change):
    # Create covariates for the current situation
    current_covariates = np.array([[advertising_spend, holiday_season, competitor_price]])
    print(current_covariates)
    # Predict the current price from covariates
    predicted_current_price = treatment_model.predict(current_covariates)
    print(predicted_current_price)
    # Predict the residual demand with the current price
    residual_demand_current = effect_model.predict(predicted_current_price.reshape(-1, 1))
    print(residual_demand_current)
    # New price scenario
    new_price = current_price + price_change
    print(new_price)
    # Predict the new residual demand with the new price
    predicted_new_price = np.array([[new_price]])
    residual_demand_new = effect_model.predict(predicted_new_price)
    print(residual_demand_new)
    # Calculate the causal effect
    causal_effect = residual_demand_new - residual_demand_current

    return causal_effect

In [59]:
current_competitor_price = 1
holiday_season_value = 1
price_change = 1
advertising_spend_value = 500  # Set an example advertising spend

# Calculate causal effect of changing the price by £10
causal_effect = predict_demand_with_price_change(treatment_model, effect_model, advertising_spend_value, holiday_season_value, current_competitor_price, current_price=10, price_change=price_change)

print(f"The estimated causal effect of changing the price by £{price_change} is: {causal_effect[0]:.4f} units of demand.")

The estimated causal effect of changing the price by £1 is: 0.0000 units of demand.


