In [None]:
!pip install causalml

Installing collected packages: ppft, pox, pathos, forestci, causalml
Successfully installed causalml-0.15.5 forestci-0.6 pathos-0.2.9 pox-0.3.6 ppft-1.7.7


In [None]:
import pandas as pd
import numpy as np
# causalml을 사용한 인과추론 분석 (X-learner 기반)
from causalml.inference.meta import BaseXLearner
from causalml.propensity import compute_propensity_score
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression

from causalml.inference.meta import BaseSLearner, BaseTLearner, BaseXLearner, BaseRLearner, BaseDRLearner

# S-LEANER


In [None]:
from sklearn.linear_model import LinearRegression
from causalml.dataset import synthetic_data

np.random.seed(41)
# 사용 방법
num_features = 5
Y, X, T, tau, b, e = synthetic_data(mode=1, n=100, p=num_features)

In [None]:
# 모델 생성
model = LinearRegression()

# 데이터 준비
data_X = pd.DataFrame(X, columns = [ 'X'+str(i) for i in range(num_features) ] )
data_X['T'] = T

# 학습
model.fit(data_X, Y)

# 결과 확인
data_0 = data_X.copy()
data_1 = data_X.copy()

data_0['T']=0
data_1['T']=1

y0 = model.predict(data_0)
y1 = model.predict(data_1)

CATE = y1-y0

## 패키지 결과 확인

In [None]:
lrS = BaseSLearner(LinearRegression())

lrS.fit(X, T, Y) # fit
CATE_package = lrS.predict(X)

In [None]:
ATE = np.mean(CATE)
ATE_package = np.mean(CATE_package)

print(ATE)
print(ATE_package)

0.6730737424211355
0.6730737424211367


# T 러너

In [None]:
model0 = LinearRegression()
model1 = LinearRegression()

data_X_T0 = data_X[data_X['T']==0]
data_X_T1 = data_X[data_X['T']==1]

model0.fit(data_X_T0, Y[data_X['T']==0])
model1.fit(data_X_T1, Y[data_X['T']==1])

CATE_T = model1.predict(data_X)-model0.predict(data_X)
print(np.mean(CATE_T))

0.6185525659059508


In [None]:
lrT = BaseTLearner(LinearRegression(), LinearRegression())
lrT.fit(X, T, Y)
CATE_T_package = lrT.predict(X)

print(np.mean(CATE_T_package))

0.6185525659059506


# X 러너

In [None]:
data_X['Y'] = Y
df = data_X.copy()

df_0 = df[df['T']==0]
df_1 = df[df['T']==1]

In [None]:
X_list = ['X0', 'X1', 'X2', 'X3', 'X4']
model0 = LinearRegression()
model1 = LinearRegression()

# T러너 처럼 모델 2개로 학습시작
model0.fit(df_0[X_list], df_0['Y'])
model1.fit(df_1[X_list], df_1['Y'])

# 잔차 구하기
tau_hat_0 = model1.predict(df_0[X_list]) - df_0['Y'] # 만약, 처치를 했다면?
tau_hat_1 = df_1['Y'] - model0.predict(df_1[X_list]) # 만약 처치를 안했다면?

tau0_model = LinearRegression()
tau1_model = LinearRegression()

tau0_model.fit(df_0[X_list], tau_hat_0)
tau1_model.fit(df_1[X_list], tau_hat_1)

# 성향점수
ps_model = LogisticRegression()
ps_model.fit(df[X_list], df['T'])
e_x = ps_model.predict_proba(df[X_list])[:, 1]  # P(T=1 | X)

# τ̂₁(x), τ̂₀(x)
tau1_hat = tau1_model.predict(df[X_list])
tau0_hat = tau0_model.predict(df[X_list])

# 최종 X-Learner 추정값
CATE_X = e_x * tau0_hat + (1 - e_x) * tau1_hat
np.mean(CATE_X)


np.float64(0.6185525659059503)

In [None]:
lrX = BaseXLearner(learner = LinearRegression())
lrX.fit(X, T, Y)
CATE_X_package = lrX.predict(X)

print(np.mean(CATE_X_package))


0.6185525659059505
