/
causalinference.py
80 lines (76 loc) · 3.29 KB
/
causalinference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def causal_inference_model(
df,
method="t-learner",
metalearner_type=None,
treatment_col="treatment",
outcome_col="outcome",
text_col=None,
ignore_cols=[],
include_cols=[],
treatment_effect_col="treatment_effect",
learner=None,
effect_learner=None,
min_df=0.05,
max_df=0.5,
ngram_range=(1, 1),
stop_words="english",
verbose=1,
):
"""
```
Infers causality from the data contained in `df` using a metalearner.
This function is a wrapper to the CausalNLP.CausalInferenceModel class.
For more details on methods and capabilities of the returned `CausalInferenceModel` object,
see the [CausalNLP documentation](https://amaiya.github.io/causalnlp/causalinference.html).
Usage:
>>> cm = causal_inference_model(df,
treatment_col='Is_Male?',
outcome_col='Post_Shared?', text_col='Post_Text',
ignore_cols=['id', 'email'])
cm.fit()
**Parameters:**
* **df** : pandas.DataFrame containing dataset
* **method** : metalearner model to use. One of {'t-learner', 's-learner', 'x-learner', 'r-learner'} (Default: 't-learner')
* **metalearner_type** : Alias of **method** parameter for backwards compatibility. If not None, overrides method.
* **treatment_col** : treatment variable; column should contain binary values: 1 for treated, 0 for untreated.
* **outcome_col** : outcome variable; column should contain the categorical or numeric outcome values
* **text_col** : (optional) text column containing the strings (e.g., articles, reviews, emails).
* **ignore_cols** : columns to ignore in the analysis
* **include_cols** : columns to include as covariates (e.g., possible confounders)
* **treatment_effect_col** : name of column to hold causal effect estimations. Does not need to exist. Created by CausalNLP.
* **learner** : an instance of a custom learner. If None, a default LightGBM will be used.
# Example
learner = LGBMClassifier(num_leaves=1000)
* **effect_learner**: used for x-learner/r-learner and must be regression model
* **min_df** : min_df parameter used for text processing using sklearn
* **max_df** : max_df parameter used for text procesing using sklearn
* **ngram_range**: ngrams used for text vectorization. default: (1,1)
* **stop_words** : stop words used for text processing (from sklearn)
* **verbose** : If 1, print informational messages. If 0, suppress.
**Returns:**
`CausalNLP.CausalInferenceModel` object
```
"""
try:
import causalnlp
except ImportError:
raise Exception("CausalNLP must be installed: pip install causalnlp")
from causalnlp import CausalInferenceModel
return CausalInferenceModel(
df,
method=method,
metalearner_type=metalearner_type,
treatment_col=treatment_col,
outcome_col=outcome_col,
text_col=text_col,
ignore_cols=ignore_cols,
include_cols=include_cols,
treatment_effect_col=treatment_effect_col,
learner=learner,
effect_learner=effect_learner,
min_df=min_df,
max_df=max_df,
ngram_range=ngram_range,
stop_words=stop_words,
verbose=verbose,
)