In [1]:
import os
import pandas as pd
import numpy as np

from scm_models import LawSchool


In [2]:
# set relevant paths
wrk_dir = os.path.dirname(os.getcwd())
data_path = wrk_dir + '\\' + 'data' + '\\'
rslt_path = wrk_dir + '\\' + 'results' + '\\'

## Generate the structural counterfactuals (SCF) for pooled U (pU)

In [3]:
df = pd.read_csv(rslt_path + 'pU_upd_LawData.csv', sep='|')
df.head(5)

Unnamed: 0,LSAT,UGPA,female,male,white,nonwhite,U
2,36,3.0,1,0,1,0,0.576901
14,35,2.9,0,1,1,0,0.578569
33,32,3.4,0,1,1,0,-0.204209
51,36,3.4,1,0,1,0,-0.102606
57,33,3.3,1,0,1,0,0.095161


In [4]:
ugpa_weights = pd.read_csv(rslt_path + 'pU_wUGPA_LawData.csv', sep='|')
ugpa_weights

Unnamed: 0,female,male,white,nonwhite,ugpa0,eta_u_ugpa
0,0.892257,0.778555,0.872479,0.643996,1.565787,-0.265698


In [5]:
lsat_weights = pd.read_csv(rslt_path + 'pU_wLSAT_LawData.csv', sep='|')
lsat_weights

Unnamed: 0,female,male,white,nonwhite,lsat0,eta_u_lsat
0,0.888693,0.903381,0.944002,0.807412,1.780424,-0.029426


In [6]:
dag_law_school = [('U', 'UGPA', ugpa_weights.loc[0, 'eta_u_ugpa']),
                  ('U', 'LSAT', lsat_weights.loc[0, 'eta_u_lsat']),
                  ('female', 'UGPA', ugpa_weights.loc[0, 'female']),
                  ('male', 'UGPA', ugpa_weights.loc[0, 'male']),
                  ('white', 'UGPA', ugpa_weights.loc[0, 'white']),
                  ('nonwhite', 'UGPA', ugpa_weights.loc[0, 'nonwhite']),
                  ('female', 'LSAT', lsat_weights.loc[0, 'female']),
                  ('male', 'LSAT', lsat_weights.loc[0, 'male']),
                  ('white', 'LSAT', lsat_weights.loc[0, 'white']),
                  ('nonwhite', 'LSAT', lsat_weights.loc[0, 'nonwhite'])  
                 ]
dag_law_school

[('U', 'UGPA', -0.265697955298776),
 ('U', 'LSAT', -0.0294260223637853),
 ('female', 'UGPA', 0.8922571716904371),
 ('male', 'UGPA', 0.7785545101456421),
 ('white', 'UGPA', 0.872479482912175),
 ('nonwhite', 'UGPA', 0.643996208779713),
 ('female', 'LSAT', 0.888693464312875),
 ('male', 'LSAT', 0.9033814542553911),
 ('white', 'LSAT', 0.944001586963716),
 ('nonwhite', 'LSAT', 0.807412156047182)]

In [7]:
scm_law_school = LawSchool(dag_law_school)

In [8]:
scm_law_school.nodes

['U', 'UGPA', 'LSAT', 'female', 'male', 'white', 'nonwhite']

In [9]:
scm_law_school.weights

{('U', 'UGPA'): -0.265697955298776,
 ('U', 'LSAT'): -0.0294260223637853,
 ('female', 'UGPA'): 0.8922571716904371,
 ('male', 'UGPA'): 0.7785545101456421,
 ('white', 'UGPA'): 0.872479482912175,
 ('nonwhite', 'UGPA'): 0.643996208779713,
 ('female', 'LSAT'): 0.888693464312875,
 ('male', 'LSAT'): 0.9033814542553911,
 ('white', 'LSAT'): 0.944001586963716,
 ('nonwhite', 'LSAT'): 0.807412156047182}

In [11]:
scm_law_school.adjacency_mtr

Unnamed: 0,U,UGPA,LSAT,female,male,white,nonwhite
U,0,-0.265698,-0.029426,0,0,0,0
UGPA,0,0.0,0.0,0,0,0,0
LSAT,0,0.0,0.0,0,0,0,0
female,0,0.892257,0.888693,0,0,0,0
male,0,0.778555,0.903381,0,0,0,0
white,0,0.872479,0.944002,0,0,0,0
nonwhite,0,0.643996,0.807412,0,0,0,0


In [12]:
scm_law_school.adjacency_lst

{'U': ['UGPA', 'LSAT'],
 'UGPA': [],
 'LSAT': [],
 'female': ['UGPA', 'LSAT'],
 'male': ['UGPA', 'LSAT'],
 'white': ['UGPA', 'LSAT'],
 'nonwhite': ['UGPA', 'LSAT']}

In [13]:
"""
The structal equations:

UGPA = ugpa0 + w_U->UGPA * U + w_female->UGPA * female +
        w_male->UGPA * male + w_white->UGPA * white +  w_nonwhite->UGPA * nonwhite 
        
LSAT = lsat0 + w_U->UGPA * U + w_female->UGPA * female +
        w_male->UGPA * male + w_white->UGPA * white +  w_nonwhite->UGPA * nonwhite 
"""

'\nThe structal equations:\n\nUGPA = ugpa0 + w_U->UGPA * U + w_female->UGPA * female +\n        w_male->UGPA * male + w_white->UGPA * white +  w_nonwhite->UGPA * nonwhite \n        \nLSAT = lsat0 + w_U->UGPA * U + w_female->UGPA * female +\n        w_male->UGPA * male + w_white->UGPA * white +  w_nonwhite->UGPA * nonwhite \n'

In [15]:
# notice that each weight except for the intercept belongs to the adjacency matrix
adj_mtr = scm_law_school.adjacency_mtr
adj_mtr

Unnamed: 0,U,UGPA,LSAT,female,male,white,nonwhite
U,0,-0.265698,-0.029426,0,0,0,0
UGPA,0,0.0,0.0,0,0,0,0
LSAT,0,0.0,0.0,0,0,0,0
female,0,0.892257,0.888693,0,0,0,0
male,0,0.778555,0.903381,0,0,0,0
white,0,0.872479,0.944002,0,0,0,0
nonwhite,0,0.643996,0.807412,0,0,0,0


In [25]:
def pred_ugpa(v_u, v_female, v_male, v_white, v_nonwhite):
    return (ugpa_weights.loc[0, 'ugpa0'] + 
            adj_mtr.loc['U']['UGPA'] * v_u +
            adj_mtr.loc['female']['UGPA'] * v_female +
            adj_mtr.loc['male']['UGPA'] * v_male +
            adj_mtr.loc['white']['UGPA'] * v_white +
            adj_mtr.loc['nonwhite']['UGPA'] * v_nonwhite)

def pred_lsat(v_u, v_female, v_male, v_white, v_nonwhite):
    return np.exp(lsat_weights.loc[0, 'lsat0'] + 
                  adj_mtr.loc['U']['LSAT'] * v_u +
                  adj_mtr.loc['female']['LSAT'] * v_female +
                  adj_mtr.loc['male']['LSAT'] * v_male +
                  adj_mtr.loc['white']['LSAT'] * v_white +
                  adj_mtr.loc['nonwhite']['LSAT'] * v_nonwhite)

In [29]:
for idx, row in df.iterrows():
    print(idx)
    
    p_ugpa = pred_ugpa(v_u=row['U'], 
                       v_female=row['female'], 
                       v_male=row['male'], 
                       v_white=row['white'], 
                       v_nonwhite=row['nonwhite'])
    
    p_lsat = pred_ugpa(v_u=row['U'], 
                       v_female=row['female'], 
                       v_male=row['male'], 
                       v_white=row['white'], 
                       v_nonwhite=row['nonwhite'])
    
    print(row['UGPA'])
    print(p_ugpa)
    
    print(row['LSAT'])
    print(p_lsat)

2
3.0
3.1772420083095594
36.0
3.1772420083095594
14
2.9
3.063096274499293
35.0
3.063096274499293
33
3.4
3.271078900286651
32.0
3.271078900286651
51
3.4
3.3577856252741225
36.0
3.3577856252741225
57
3.3
3.3052394726409235
33.0
3.3052394726409235
78
2.8
3.0789167217396964
34.0
3.0789167217396964
96
3.3
3.3394498919044144
42.0
3.3394498919044144
122
3.3
3.351941587552325
45.0
3.351941587552325
155
3.2
3.2157254089100884
40.0
3.2157254089100884
159
3.4
3.36799734165337
38.0
3.36799734165337
164
3.7
3.4255683760695006
37.0
3.4255683760695006
166
3.4
3.2284951697388813
31.0
3.2284951697388813
172
3.5
3.306821081619966
29.0
3.306821081619966
174
3.8
3.4815117480631814
39.0
3.4815117480631814
183
3.7
3.370582356302341
32.0
3.370582356302341
188
3.0
3.101561672992284
33.0
3.101561672992284
200
3.0
3.1590030256189983
30.0
3.1590030256189983
209
3.4
3.3247636601524815
29.0
3.3247636601524815
215
3.0
3.1830111952970346
38.0
3.1830111952970346
217
2.6
2.9262744348215857
34.0
2.9262744348215857
224


3.557147299894495
42.0
3.557147299894495
2129
2.7
2.925303237320424
33.0
2.925303237320424
2139
3.0
3.1847207333354364
39.0
3.1847207333354364
2148
3.5
3.4055065878671935
38.0
3.4055065878671935
2165
3.5
3.3968516630598606
36.0
3.3968516630598606
2178
3.3
3.2543030593913396
37.0
3.2543030593913396
2207
3.9
3.58426879058362
40.0
3.58426879058362
2210
3.7
3.509983425731852
41.0
3.509983425731852
2212
3.5
3.4076639647534503
37.0
3.4076639647534503
2218
3.4
3.2327220640959853
31.0
3.2327220640959853
2227
3.2
3.2772883497474683
38.0
3.2772883497474683
2233
2.9
3.0848331286581074
40.0
3.0848331286581074
2255
2.7
2.9894336421448235
39.0
2.9894336421448235
2260
3.0
3.1861280663435227
38.0
3.1861280663435227
2276
3.1
3.2320215009769178
38.0
3.2320215009769178
2280
2.5
2.963829444149844
37.0
2.963829444149844
2286
2.3
2.7787546770893883
30.0
2.7787546770893883
2292
3.3
3.298362742506091
33.0
3.298362742506091
2296
2.5
2.901186053847097
39.0
2.901186053847097
2299
3.8
3.5186209333524645
34.0
3.51

2.975561196042322
46.0
2.975561196042322
5255
3.4
3.3385383568243294
47.0
3.3385383568243294
5277
3.7
3.4624292675829627
43.0
3.4624292675829627
5280
3.7
3.44359723981519
41.0
3.44359723981519
5288
3.6
3.478200274536692
43.0
3.478200274536692
5304
3.5
3.378602095355626
46.0
3.378602095355626
5316
3.8
3.4341374062684493
37.0
3.4341374062684493
5322
3.1
3.2078924522201566
48.0
3.2078924522201566
5332
3.7
3.531833498149074
47.0
3.531833498149074
5336
3.7
3.450713922708555
43.0
3.450713922708555
5343
3.0
3.0254509295772083
40.0
3.0254509295772083
5381
3.5
3.2399482210253616
39.0
3.2399482210253616
5382
3.3
3.338072889793841
42.0
3.338072889793841
5386
3.3
3.343897723215793
43.0
3.343897723215793
5387
3.4
3.317050195254287
41.0
3.317050195254287
5397
3.7
3.339971268399633
40.0
3.339971268399633
5398
3.9
3.5999386650873313
41.0
3.5999386650873313
5404
2.8
2.916130681611711
35.0
2.916130681611711
5425
3.9
3.5251009455148408
39.0
3.5251009455148408
5431
3.2
3.2228099956914615
42.0
3.2228099956

2.9494981598854952
39.0
2.9494981598854952
7227
3.1
3.0986993064427173
31.0
3.0986993064427173
7236
3.0
3.1103492144707476
34.0
3.1103492144707476
7246
3.0
3.1189442258638294
37.0
3.1189442258638294
7252
3.2
3.2727058537239917
38.0
3.2727058537239917
7253
3.8
3.501521689643134
29.0
3.501521689643134
7255
3.2
3.193686503231909
34.0
3.193686503231909
7263
3.5
3.3793717608481244
31.0
3.3793717608481244
7265
2.7
2.845988045658098
28.0
2.845988045658098
7267
3.1
3.15957608282889
36.0
3.15957608282889
7268
3.1
3.1859563163591083
43.0
3.1859563163591083
7275
2.8
3.099706897911091
39.0
3.099706897911091
7277
3.4
3.346922141353567
35.0
3.346922141353567
7279
2.5
2.975410615511581
41.0
2.975410615511581
7288
3.4
3.3382984917390925
32.0
3.3382984917390925
7292
2.9
3.0748743086381154
37.0
3.0748743086381154
7301
2.7
3.0566449097427144
39.0
3.0566449097427144
7302
3.5
3.3206928869605274
33.0
3.3206928869605274
7309
3.2
3.2808612833371047
39.0
3.2808612833371047
7312
2.6
2.894328560423398
36.0
2.894

9370
3.9
3.5439430444761455
45.0
3.5439430444761455
9377
2.9
3.1568296586363505
41.0
3.1568296586363505
9403
2.8
2.984106526083184
36.0
2.984106526083184
9416
3.2
3.2870568955057187
40.0
3.2870568955057187
9418
3.0
3.1454194333032146
44.0
3.1454194333032146
9419
3.2
3.2851306981188215
40.0
3.2851306981188215
9426
3.2
3.2082781694750393
36.0
3.2082781694750393
9442
2.8
2.8690361761833634
24.0
2.8690361761833634
9474
2.9
3.1285806164518593
37.0
3.1285806164518593
9478
3.0
3.1800264489253784
37.0
3.1800264489253784
9485
3.7
3.4832814805785977
35.0
3.4832814805785977
9496
3.0
3.203417573806274
42.0
3.203417573806274
9519
3.4
3.3526051125224448
35.0
3.3526051125224448
9520
3.8
3.5055955153802127
45.0
3.5055955153802127
9531
3.2
3.2677831137029116
36.0
3.2677831137029116
9563
2.7
2.9218969175816816
30.0
2.9218969175816816
9577
3.1
3.211535411402941
33.0
3.211535411402941
9599
2.9
3.0981850509784663
44.0
3.0981850509784663
9601
2.8
3.104691928227895
39.0
3.104691928227895
9606
3.4
3.301164712

13207
2.9
2.97822271447543
42.0
2.97822271447543
13246
3.7
3.5214099960394356
45.0
3.5214099960394356
13254
3.3
3.2681795358088546
42.0
3.2681795358088546
13261
3.6
3.4187200241136315
44.0
3.4187200241136315
13267
3.7
3.522353119288392
44.0
3.522353119288392
13269
3.4
3.3176915932171944
42.0
3.3176915932171944
13270
3.6
3.415012436378896
45.0
3.415012436378896
13279
3.4
3.3913695999685376
45.0
3.3913695999685376
13289
3.4
3.403025523252469
47.0
3.403025523252469
13295
3.6
3.4482483767306555
36.0
3.4482483767306555
13296
3.6
3.2488566956255753
30.0
3.2488566956255753
13297
3.6
3.306790698166174
44.0
3.306790698166174
13309
3.5
3.4272311109230005
42.0
3.4272311109230005
13314
3.6
3.4288583281413674
47.0
3.4288583281413674
13315
3.5
3.4257970142080834
43.0
3.4257970142080834
13328
3.8
3.5006311560526364
44.0
3.5006311560526364
13346
2.8
3.0402346987856443
39.0
3.0402346987856443
13356
2.9
3.1220119631643732
48.0
3.1220119631643732
13357
3.6
3.3984304615315817
42.0
3.3984304615315817
13369

2.9
2.9486279736617864
18.0
2.9486279736617864
15258
2.8
3.0052410516210446
31.0
3.0052410516210446
15269
3.1
3.073167724637722
26.0
3.073167724637722
15274
2.4
2.7806941657485202
30.0
2.7806941657485202
15286
2.8
2.9826163107063826
37.0
2.9826163107063826
15288
2.8
2.8834544666547055
28.0
2.8834544666547055
15296
2.4
2.675389313223506
20.0
2.675389313223506
15304
2.9
2.9110104154501952
23.0
2.9110104154501952
15306
2.7
2.952408400089555
29.0
2.952408400089555
15336
2.5
2.7488797596252126
27.0
2.7488797596252126
15364
2.7
3.002768337554175
41.0
3.002768337554175
15388
3.2
3.097305218366989
36.0
3.097305218366989
15395
3.8
3.5015191341512195
29.0
3.5015191341512195
15400
3.1
3.147175427964164
35.0
3.147175427964164
15407
3.0
3.1148722028272995
36.0
3.1148722028272995
15421
3.5
3.4157797474191782
39.0
3.4157797474191782
15426
2.8
3.103321344842757
39.0
3.103321344842757
15433
2.4
2.7404038403647215
36.0
2.7404038403647215
15461
3.4
3.1792601325558962
36.0
3.1792601325558962
15479
3.4
3.3

3.6
3.3800178821641564
36.0
3.3800178821641564
16993
3.0
3.1406689392097102
42.0
3.1406689392097102
17006
3.0
3.1221898607258676
38.0
3.1221898607258676
17013
3.4
3.1715066421270617
31.0
3.1715066421270617
17014
3.3
3.252993900940504
38.0
3.252993900940504
17019
3.7
3.498040039879852
37.0
3.498040039879852
17033
3.2
3.170848488612728
30.0
3.170848488612728
17035
3.2
3.177559978981226
31.0
3.177559978981226
17040
2.8
3.0843090827730295
35.0
3.0843090827730295
17043
3.0
3.1046383235765274
32.0
3.1046383235765274
17051
3.0
3.103844930933643
34.0
3.103844930933643
17055
3.7
3.3886621325491806
27.0
3.3886621325491806
17056
3.4
3.2829509154617256
33.0
3.2829509154617256
17058
3.9
3.555354488973999
30.0
3.555354488973999
17069
3.5
3.3691883098169533
29.0
3.3691883098169533
17080
3.6
3.392135048069116
38.0
3.392135048069116
17098
2.6
2.8484689291155183
40.0
2.8484689291155183
17099
3.3
3.245049752351503
36.0
3.245049752351503
17100
3.5
3.302184245617183
38.0
3.302184245617183
17101
2.9
2.94303

3.4116889055459603
39.0
3.4116889055459603
20169
3.9
3.5953946534610246
38.0
3.5953946534610246
20176
3.3
3.25739641561371
40.0
3.25739641561371
20178
3.4
3.371611647675781
39.0
3.371611647675781
20179
3.4
3.3810124324005835
40.0
3.3810124324005835
20183
3.2
3.226581306513124
42.0
3.226581306513124
20190
2.2
2.7932330638107916
45.0
2.7932330638107916
20203
3.3
3.266081105284212
41.0
3.266081105284212
20209
3.6
3.4824135841496746
44.0
3.4824135841496746
20231
3.5
3.4077642000235655
38.0
3.4077642000235655
20241
3.3
3.295451737229086
48.0
3.295451737229086
20251
3.4
3.325096108445751
43.0
3.325096108445751
20256
3.8
3.4811390069040553
40.0
3.4811390069040553
20265
3.9
3.6148743376068686
45.0
3.6148743376068686
20270
3.7
3.506419233657527
39.0
3.506419233657527
20275
3.4
3.3147133462355742
42.0
3.3147133462355742
20278
3.9
3.441165210984388
28.0
3.441165210984388
20279
3.7
3.4319715575794425
38.0
3.4319715575794425
20284
3.7
3.441793631988545
42.0
3.441793631988545
20289
3.6
3.47885551130

In [30]:
# or...
df['fct_UGPA'] = df.apply(lambda row: pred_ugpa(
    v_u=row['U'], v_female=row['female'], v_male=row['male'], v_white=row['white'], v_nonwhite=row['nonwhite']),
                          axis=1
                         )

df['fct_LSAT'] = df.apply(lambda row: pred_lsat(
    v_u=row['U'], v_female=row['female'], v_male=row['male'], v_white=row['white'], v_nonwhite=row['nonwhite']), 
                          axis=1
                         )

In [31]:
df[['fct_UGPA', 'fct_LSAT']]

Unnamed: 0,fct_UGPA,fct_LSAT
2,3.177242,36.457338
14,3.063096,36.994960
33,3.271079,37.856994
51,3.357786,37.193646
57,3.305239,36.977827
...,...,...
21735,3.079251,37.061207
21752,3.318393,37.031733
21767,3.051754,36.948518
21776,3.191758,33.573799


### Generate the structural counterfactuals

In [32]:
# new df with the interventions... e.g., do(sex=male) then female = 0 and male = 1 for all individuals
do_df = df.copy()
do_df

do_df['female'] = 0
do_df['male']   = 1
do_df

Unnamed: 0,LSAT,UGPA,female,male,white,nonwhite,U,test,fct_UGPA,fct_LSAT
2,36,3.0,0,1,1,0,0.576901,3.177242,3.177242,36.457338
14,35,2.9,0,1,1,0,0.578569,3.063096,3.063096,36.994960
33,32,3.4,0,1,1,0,-0.204209,3.271079,3.271079,37.856994
51,36,3.4,0,1,1,0,-0.102606,3.357786,3.357786,37.193646
57,33,3.3,0,1,1,0,0.095161,3.305239,3.305239,36.977827
...,...,...,...,...,...,...,...,...,...,...
21735,39,2.9,0,1,1,0,0.517769,3.079251,3.079251,37.061207
21752,27,3.4,0,1,1,0,0.045657,3.318393,3.318393,37.031733
21767,32,2.9,0,1,1,0,0.621257,3.051754,3.051754,36.948518
21776,26,3.5,0,1,0,1,-0.765606,3.191758,3.191758,33.573799


In [33]:
do_df['scf_UGPA'] = do_df.apply(lambda row: pred_ugpa(
    v_u=row['U'], v_female=row['female'], v_male=row['male'], v_white=row['white'], v_nonwhite=row['nonwhite']),
                          axis=1
                         )

do_df['scf_LSAT'] = do_df.apply(lambda row: pred_lsat(
    v_u=row['U'], v_female=row['female'], v_male=row['male'], v_white=row['white'], v_nonwhite=row['nonwhite']), 
                          axis=1
                         )

In [34]:
do_df

Unnamed: 0,LSAT,UGPA,female,male,white,nonwhite,U,test,fct_UGPA,fct_LSAT,scf_UGPA,scf_LSAT
2,36,3.0,0,1,1,0,0.576901,3.177242,3.177242,36.457338,3.063539,36.996775
14,35,2.9,0,1,1,0,0.578569,3.063096,3.063096,36.994960,3.063096,36.994960
33,32,3.4,0,1,1,0,-0.204209,3.271079,3.271079,37.856994,3.271079,37.856994
51,36,3.4,0,1,1,0,-0.102606,3.357786,3.357786,37.193646,3.244083,37.743978
57,33,3.3,0,1,1,0,0.095161,3.305239,3.305239,36.977827,3.191537,37.524965
...,...,...,...,...,...,...,...,...,...,...,...,...
21735,39,2.9,0,1,1,0,0.517769,3.079251,3.079251,37.061207,3.079251,37.061207
21752,27,3.4,0,1,1,0,0.045657,3.318393,3.318393,37.031733,3.204690,37.579668
21767,32,2.9,0,1,1,0,0.621257,3.051754,3.051754,36.948518,3.051754,36.948518
21776,26,3.5,0,1,0,1,-0.765606,3.191758,3.191758,33.573799,3.191758,33.573799


In [35]:
# can do individual counterfactuals given an index...!

### --- misc:

In [None]:
# male ugpa + white ugpa + ugpa_intercept
0.761571 + 0.880808 + 1.574632
# gives the intercept when I run male and white as the baseline
# but here is interesting bcs i wonder how this looks in terms of edges, no?

In [None]:
"""
same dist of Us and LSAT and UGPA for male and white as benchmarks
> sense_cols
[1] "female"   "nonwhite"
> eta_a_ugpa
[1]  0.1132429 -0.2275812
> eta_a_lsat
[1] -0.01449607 -0.13603405
> ugpa0
[1] 3.216965
> lsat0
[1] 3.627609
"""

In [None]:
# we can retreive the weights too...
# ugpa0 would mean white and male.. thus:
3.216965 + -0.2275812 # male + non-white

# vs all vars
0.761571 + 0.652492 -0.262836

In [None]:
"""
without the intercepts but still same dist of Us and LSAT and UGPA for male and white as benchmarks
> sense_cols
[1] "female"   "male"     "white"    "nonwhite"
> eta_a_ugpa
[1] 1.621399 1.508263 1.708761 1.480915
> eta_a_lsat
[1] 1.776778 1.791329 1.836215 1.700086

> eta_u_ugpa
[1] 0.2690262
> eta_u_lsat
[1] 0.02924528
"""

#https://stats.stackexchange.com/questions/7948/when-is-it-ok-to-remove-the-intercept-in-a-linear-regression-model
#https://stats.oarc.ucla.edu/other/mult-pkg/faq/general/faq-why-are-r2-and-f-so-large-for-models-without-a-constant/

# first, it's whether to include or not the intercept, which I think we should. otherwise, we claim that the 
# function passes through 0 and E[Y|X=0]=0 which is not the case, here
# second, whethere to include all the single encodings or the diverging groups
# based on standard ML and Stats practices, if we include the intercept, we should drop the base group(s) for proper identification
# not sure why kusner didn't do this in their paper... when, e.g., they ran their models and R kept dropping variables
# bcs they had an intercept!

In [None]:
# we need to know the functional specifications of each X
# the dag to know where to intervene 
# tbh.... I don't get the weights? why do we need to find U|evidence? the abduction step makes no sense!!!
# maybe ask karima?

In [None]:
# consider some factual of a female candidate
i = 0 
df.iloc[i,]

In [None]:
i_ugpa_scf = ugpa_w["ugpa0"] + ugpa_w["eta_u_ugpa"]*df.iloc[i]['U'] + ugpa_w['white'] + ugpa_w['male']
i_ugpa_scf

In [None]:
i_lsat_scf = np.exp(lsat_w["lsat0"] + lsat_w["eta_u_lsat"]*df.iloc[i]['U'] + lsat_w['white'] + lsat_w['male'])
i_lsat_scf

In [None]:
# the above mapping needs to be made cleaner... like a class.. translate the dag into a mapping [check the imt scripts for inspiration]
