# Causal Impact & Investment Decision Analysis
## Tutorial Notebook

This notebook demonstrates how to use the Causal Impact Analysis library for:
1. Loading and preparing data
2. Running causal impact analysis
3. Calculating financial metrics (ROI)
4. Advanced analytics (Propensity Matching, DiD, etc.)
5. Generating reports

In [None]:
# Standard imports
import sys
import os
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go

# Set display options
pd.set_option('display.max_columns', 20)
plt.style.use('seaborn')

print("✓ Imports complete")

## 1. Data Loading & Preparation

In [None]:
from src.data_pipeline import DataPipeline

# Load configuration and data
pipeline = DataPipeline('../config.yaml')
pipeline.load_data()
pipeline.clean_data()

# Preview the data
print(f"Dataset shape: {pipeline.cleaned_data.shape}")
pipeline.cleaned_data.head()

In [None]:
# Create time series for analysis
intervention_date = '2024-03-01'
pipeline.create_time_series(intervention_date=intervention_date)

# Plot the time series
ts_data = pipeline.time_series_data

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=ts_data['date'], 
    y=ts_data['revenue_usd'],
    mode='lines+markers',
    name='Revenue'
))
fig.add_vline(x=intervention_date, line_dash='dash', line_color='red')
fig.update_layout(
    title='Revenue Over Time with Intervention',
    xaxis_title='Date',
    yaxis_title='Revenue (USD)'
)
fig.show()

## 2. Causal Impact Analysis

In [None]:
from src.causal_analysis import CausalAnalyzer
import yaml

# Load config
with open('../config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Get analysis data
analysis_data = pipeline.get_analysis_series(metric='revenue_usd')

# Run causal impact analysis
analyzer = CausalAnalyzer(analysis_data, config=config)
analyzer.run_causal_impact()

# Get results
metrics = analyzer.get_impact_metrics()

print("=" * 50)
print("CAUSAL IMPACT RESULTS")
print("=" * 50)
for key, value in metrics.items():
    if isinstance(value, float):
        print(f"{key}: {value:,.4f}")
    else:
        print(f"{key}: {value}")

In [None]:
# Visualize the causal impact
dates = analyzer.dates
actual = analyzer.predictions['actual']
predicted = analyzer.predictions['predicted']

fig = go.Figure()

# Observed
fig.add_trace(go.Scatter(
    x=dates, y=actual,
    mode='lines',
    name='Observed',
    line=dict(color='blue', width=2)
))

# Counterfactual
fig.add_trace(go.Scatter(
    x=dates, y=predicted,
    mode='lines',
    name='Counterfactual',
    line=dict(color='orange', width=2, dash='dash')
))

fig.add_vline(x=intervention_date, line_dash='dot', line_color='red')

fig.update_layout(
    title='Observed vs Counterfactual Revenue',
    xaxis_title='Date',
    yaxis_title='Revenue (USD)',
    template='plotly_white'
)
fig.show()

## 3. Financial Analysis (ROI Calculation)

In [None]:
from src.financial_analysis import FinancialAnalyzer

# Calculate ROI
campaign_cost = 5000  # Set your campaign cost

fin_analyzer = FinancialAnalyzer(metrics, campaign_cost=campaign_cost)
fin_analyzer.calculate_roi()

# Display financial results
results = fin_analyzer.financial_results

print("=" * 50)
print("FINANCIAL ANALYSIS")
print("=" * 50)
print(f"Campaign Cost:    ${campaign_cost:,.2f}")
print(f"Revenue Lift:     ${results['cumulative_effect']:,.2f}")
print(f"Net Profit:       ${results['net_profit']:,.2f}")
print(f"ROI:              {results['roi_percentage']:.1f}%")
print(f"ROI Ratio:        {results['roi_ratio']:.2f}x")

In [None]:
# Generate business narrative
narrative = fin_analyzer.generate_business_narrative()
print("\nBusiness Narrative:")
print("-" * 50)
print(narrative)

## 4. Segment Comparison

In [None]:
# Compare different segments
segments_results = {}

for channel in ['email', 'social', 'search']:
    try:
        pipeline.create_time_series(
            intervention_date=intervention_date,
            segment_col='channel',
            segment_val=channel
        )
        
        analysis_data = pipeline.get_analysis_series(metric='revenue_usd')
        seg_analyzer = CausalAnalyzer(
            analysis_data, 
            config=config, 
            segment=('channel', channel)
        )
        seg_analyzer.run_causal_impact()
        seg_metrics = seg_analyzer.get_impact_metrics()
        
        seg_fin = FinancialAnalyzer(seg_metrics, campaign_cost=campaign_cost/3)
        seg_fin.calculate_roi()
        
        segments_results[channel] = {
            'effect': seg_metrics['cumulative_effect'],
            'p_value': seg_metrics['p_value'],
            'roi': seg_fin.financial_results['roi_percentage']
        }
    except Exception as e:
        print(f"Could not analyze {channel}: {e}")

# Display comparison
comparison_df = pd.DataFrame(segments_results).T
comparison_df.columns = ['Cumulative Effect ($)', 'P-Value', 'ROI (%)']
comparison_df

## 5. Advanced Analytics Demo

In [None]:
# Propensity Score Matching Demo
from src.propensity_matching import PropensityMatcher

# Create sample data
np.random.seed(42)
n = 500

psm_data = pd.DataFrame({
    'treatment': np.random.binomial(1, 0.3, n),
    'age': np.random.normal(40, 10, n),
    'income': np.random.lognormal(10, 0.5, n),
    'tenure': np.random.uniform(0, 10, n)
})

# Add outcome with treatment effect
psm_data['outcome'] = (
    50 + 
    0.5 * psm_data['age'] + 
    0.0001 * psm_data['income'] +
    psm_data['treatment'] * 20 +  # True treatment effect
    np.random.normal(0, 10, n)
)

# Run propensity matching
matcher = PropensityMatcher(
    data=psm_data,
    treatment_col='treatment',
    covariates=['age', 'income', 'tenure'],
    outcome_col='outcome'
)

matcher.fit()
matched = matcher.match(caliper=0.1)

print(f"Original treated: {(psm_data['treatment'] == 1).sum()}")
print(f"Matched treated: {(matched['treatment'] == 1).sum()}")

# Estimate ATT
att_results = matcher.estimate_att()
print(f"\nATT Estimate: {att_results['att']:.2f}")
print(f"True Effect:  20.00")

## 6. Generating a PDF Report

In [None]:
from src.pdf_report import PDFReportGenerator
import os

# Prepare results for report
report_results = {
    **metrics,
    'roi_percentage': results['roi_percentage'],
    'net_profit': results['net_profit']
}

# Create output directory
os.makedirs('../output', exist_ok=True)

# Generate report
generator = PDFReportGenerator(
    analysis_results=report_results,
    output_path='../output/analysis_report.pdf',
    title='Campaign Impact Analysis',
    author='Analytics Team'
)

output = generator.generate(
    include_methodology=True,
    include_recommendations=True
)

print(f"Report generated: {output}")

## 7. Using the Experiment Tracker

In [None]:
from src.experiment_tracker import ExperimentTracker

# Initialize tracker
tracker = ExperimentTracker('../experiments')

# Log this analysis run
with tracker.start_run('campaign_analysis', 'notebook_demo') as run:
    # Log parameters
    run.log_params({
        'intervention_date': intervention_date,
        'campaign_cost': campaign_cost,
        'model': 'bayesian_ridge'
    })
    
    # Log metrics
    run.log_metrics({
        'cumulative_effect': metrics['cumulative_effect'],
        'p_value': metrics['p_value'],
        'roi_percentage': results['roi_percentage']
    })
    
    run.set_tag('source', 'jupyter_notebook')

print("✓ Run logged to experiment tracker")

# Compare runs
comparison = tracker.compare_runs('campaign_analysis')
comparison

## Summary

In this notebook, we demonstrated:

1. **Data Pipeline**: Loading, cleaning, and preparing time series data
2. **Causal Analysis**: Running Bayesian structural time-series analysis
3. **Financial Metrics**: Calculating ROI and business metrics
4. **Segment Comparison**: Analyzing impact across different segments
5. **Advanced Analytics**: Propensity score matching
6. **Reporting**: Generating PDF reports
7. **Experiment Tracking**: Logging runs for reproducibility

For more information, see the [documentation](../docs/) and [README](../README.md).