In [2]:
!pip install yfinance
!pip install dowhy

Collecting yfinance
  Downloading yfinance-0.2.37-py2.py3-none-any.whl (72 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.0/73.0 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting html5lib>=1.1
  Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.2/112.2 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peewee>=3.16.2
  Downloading peewee-3.17.1.tar.gz (3.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting beautifulsoup4>=4.11.1
  Using cached beautifulsoup4-4.12.3-py3-none-any.whl (147 kB)
Collecting frozendict>=2.3.4
  Downloading frozendict-2.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux20

In [3]:
import yfinance as yf
import pandas as pd
from dowhy import CausalModel
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Download historical stock data for multiple stocks
tickers = ["AAPL", "MSFT", "GOOGL"]  # Example: Apple, Microsoft, and Google
start_date = "2020-01-01"
end_date = "2023-01-01"

In [5]:
# Fetch the closing prices
data = yf.download(tickers, start=start_date, end=end_date)["Close"]

# Compute daily returns as percentage change of the closing prices
daily_returns = data.pct_change().dropna()

[*********************100%%**********************]  3 of 3 completed


In [6]:
daily_returns.head()


Ticker,AAPL,GOOGL,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-03,-0.009722,-0.005231,-0.012452
2020-01-06,0.007968,0.026654,0.002585
2020-01-07,-0.004703,-0.001932,-0.009118
2020-01-08,0.016086,0.007118,0.015928
2020-01-09,0.021241,0.010498,0.012493


In [11]:
# For simplicity, let's consider AAPL returns as our outcome, and MSFT and GOOGL returns as our treatments
# In a real-world scenario, the choice of treatments and outcome should be based on the hypothesis about causal relationships
outcome = 'AAPL'
# treatments = ['MSFT', 'GOOGL']
treatments = ['MSFT']


In [12]:
# Add a constant to include as bias (intercept) in linear regression models
daily_returns['const'] = 1

In [15]:
help(CausalModel)

Help on class CausalModel in module dowhy.causal_model:

class CausalModel(builtins.object)
 |  CausalModel(data, treatment, outcome, graph=None, common_causes=None, instruments=None, effect_modifiers=None, estimand_type='nonparametric-ate', proceed_when_unidentifiable=False, missing_nodes_as_confounders=False, identify_vars=False, **kwargs)
 |  
 |  Main class for storing the causal model state.
 |  
 |  Methods defined here:
 |  
 |  __init__(self, data, treatment, outcome, graph=None, common_causes=None, instruments=None, effect_modifiers=None, estimand_type='nonparametric-ate', proceed_when_unidentifiable=False, missing_nodes_as_confounders=False, identify_vars=False, **kwargs)
 |      Initialize data and create a causal graph instance.
 |      
 |      Assigns treatment and outcome variables.
 |      Also checks and finds the common causes and instruments for treatment
 |      and outcome.
 |      
 |      At least one of graph, common_causes or instruments must be provided. If
 |

In [13]:
# Define the causal model
model = CausalModel(data=daily_returns,
                    treatment=treatments,
                    outcome=outcome,
                    common_causes=['GOOGL'])

# Identify the causal effect using the default method
identified_estimand = model.identify_effect()

# Estimate the causal effect using linear regression
estimate = model.estimate_effect(identified_estimand,
                                 method_name="backdoor.linear_regression")




  intercept_parameter = self.model.params[0]


In [14]:
print(estimate)


*** Causal Estimate ***

## Identified estimand
Estimand type: EstimandType.NONPARAMETRIC_ATE

### Estimand : 1
Estimand name: backdoor
Estimand expression:
   d                  
───────(E[AAPL|const])
d[MSFT]               
Estimand assumption 1, Unconfoundedness: If U→{MSFT} and U→AAPL then P(AAPL|MSFT,const,U) = P(AAPL|MSFT,const)

## Realized estimand
b: AAPL~MSFT+const
Target units: ate

## Estimate
Mean value: 0.8617162704467773

