In [None]:
import os
import sys
import math
import logging
from pathlib import Path

import numpy as np
import scipy as sp
import sklearn
import statsmodels.api as sm
from statsmodels.formula.api import ols

%load_ext autoreload
%autoreload 2

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set_context("poster")
sns.set(rc={'figure.figsize': (16, 9.)})
sns.set_style("whitegrid")

import pandas as pd
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)

logging.basicConfig(level=logging.INFO, stream=sys.stdout)

In [None]:
from justcause import *

**PLEASE** save this file right now using the following naming convention: `NUMBER_FOR_SORTING-YOUR_INITIALS-SHORT_DESCRIPTION`, e.g. `1.0-fw-initial-data-exploration`. Use the number to order the file within the directory according to its usage.

# Quickstart

In [42]:
from justcause.data.sets import load_ihdp
from justcause.learners import SLearner
from justcause.learners.propensity import estimate_propensities
from justcause.metrics import pehe_score, mean_absolute
from justcause.evaluation import calc_scores

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

import pandas as pd

replications = load_ihdp(select_rep=[0, 1, 2])
slearner = SLearner(LinearRegression())
metrics = [pehe_score, mean_absolute]
scores = []

for rep in replications:
   train, test = train_test_split(rep, train_size=0.8)
   p = estimate_propensities(train.np.X, train.np.t)
   slearner.fit(train.np.X, train.np.t, train.np.y, weights=1/p)
   pred_ite = slearner.predict_ite(test.np.X, test.np.t, test.np.y)
   scores.append(calc_scores(test.np.ite, pred_ite, metrics))

pd.DataFrame(scores)



Unnamed: 0,pehe_score,mean_absolute
0,0.91889,0.030689
1,0.795572,0.133251
2,0.849018,0.1239
