In [None]:
!pip install econml

In [None]:
# Load libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.dummy import DummyClassifier
from econml.dml import LinearDML

In [None]:
# Load dataset
datos1 = pd.read_csv('SimulationHetogeneity-01.csv')
datos1

In [None]:
# Define roles
y1 = datos1['Y']
T1 = datos1['T']
X1 = datos1.drop(['id','Y','T'], axis=1)

In [None]:
# Initialize models
model_y1 = LinearRegression()
model_t1 = DummyClassifier(strategy='prior')

In [None]:
# Estimate ATE
est1 = LinearDML(model_y=model_y1,model_t=model_t1,discrete_treatment=True,random_state=12345)
est1=est1.fit(y1,T1,X=X1)

In [None]:
# Print results
print("ATE:",est1.ate(X=X1))
print("95% Confidence Interval ATE:",est1.ate_interval(X1))
est1.summary()

In [None]:
# Predict tau in dataset datos1
effect1=est1.effect(X1)
effect1

In [None]:
# Histogram of Effect1
plt.figure(figsize=(10, 6))
sns.histplot(data=effect1, bins=30)
plt.title('Histogram')
plt.show()

In [None]:
# Read new dataset
datos2 = pd.read_csv('SimulationHetogeneity-02.csv')
datos2

In [None]:
# Roles
X2 = datos2.drop(['id','Y0','Y1'], axis=1)
X2

In [None]:
# Predict tau in new dataset
effect2=est1.effect(X2)
effect2

In [None]:
# Histogram of Effect2
plt.figure(figsize=(10, 6))
sns.histplot(data=effect2, bins=30)
plt.title('Histogram')
plt.show()

In [None]:
# Select observations with positive tau
select2 = (effect2 > 0).astype(int)
selection2 = select2 * datos2['Y1'] + (1 - select2) * datos2['Y0']


In [None]:
# Calculate sum of outcomes under this selection
selection_sum = selection2.sum()
print("Sum of outcomes under selection:", selection_sum)

# Compare to applying treatment to everyone and no one
sum_Y1 = datos2['Y1'].sum()
sum_Y0 = datos2['Y0'].sum()
print("Sum if treated everyone:", sum_Y1)
print("Sum if treated no one:", sum_Y0)


In [None]:
# Optimal selection based on the best potential outcome for each observation (crystal-ball solution)
optimal_selection_sum = datos2[['Y0', 'Y1']].max(axis=1).sum()
print("Sum of optimal selection:", optimal_selection_sum)