In [8]:
import pandas as pd
import numpy as np
import scipy.stats
import math
import warnings
warnings.filterwarnings("ignore")


### This routine handles these steps:
#### 1) read in projection data
#### 2) merge with population data for each group
#### 3) converge age groups to total population
#### 4) get update population shares
#### 5) read into gams , run gams 
#### 6) re-import gams results (housing prices, wages, and updates population shares (from logit)
#### 7) prep gams results for incorporation into logit routine
#### 8) re-run shares in stata
#### 9) map out migration flows for top entry exit


### Steps 1-5
##### (1) read in projection data, (2) merge pop data fro each group, (3) translate age group and skill changes to total population pct change (4) get updated gams shares (5) read into gams, and run new counter factual 1 time

In [None]:
# tmp0_gr=tmp0.groupby(['msa','statefip'],as_index=False).agg({'id':sum})
# bmk0_gr=bmk0.groupby(['msa'],as_index=False).agg({'id':sum})
# msaid_state_lookup=tmp0_gr[['msa','statefip']].merge(bmk0_gr[['msa']],on='msa').groupby(['msa','statefip'],as_index=False).sum()
# msaid_state_lookup.to_excel('/Users/hannahkamen/Downloads/msaid_state_lookup.xlsx')



In [None]:
###these are the population shares changes from the first cycle of GAMS data inputs (after initial logit shock with temp)
###the shares caluclated here represent the population change from the previous iteration 
state_split=pd.read_excel('/Users/hannahkamen/Downloads/state_pop_educ_shares.xlsx')
state_age_shares=pd.read_excel('/Users/hannahkamen/Downloads/state_age_shares.xlsx')
msaid_state_lookup=pd.read_excel('/Users/hannahkamen/Downloads/msaid_state_lookup.xlsx')

state_split=state_split[['skl_2','skl_3','skl_4','skl_5','skl_6','skl_7','unskl_2','unskl_3','unskl_4','unskl_5','unskl_6','unskl_7','state','state_pop']]

master=pd.DataFrame()
for a in [2]:
    for i in [0,1]:       
        try:
            del tmp0
        except:
            print("first loop")
        
        #(1) read in projection data, and benchmark population shares

        tmp0=pd.read_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/projection_data_age2_%s_wbmk_iter2.dta'%i)
        #############merge state lookup onto both projections 
        #sl=pd.read_excel('/Users/hannahkamen/Downloads/statelookup2.xlsx')
        #tmp0=tmp0.merge(sl,on='statefip',how='inner')

        #(2) merge pop share estimates from each group

        ########group benchmark and projected by state, sum shares and chosen to get pct change
        tmp=tmp0.groupby(['state'],as_index=False).agg({'fexthot_28':max,'fextcold':max,'share_it12':sum,'share_it1':sum,'share':sum,'share_b':sum})

        ########merge grouped projections with benchmark to get pct change
        tmp['pct_change']=(tmp['share_it12']-tmp['share_it1'])/tmp['share_it1']

        ########merge pct changes onto population by age

        tmp=tmp.merge(state_age_shares,on='state',how='inner')
        tmp['age_id']= a
        tmp['educ_id']=i
        ########SET AGE GROUP TWO TO 100 PERCENT OF POPULATION FOR NOW
        #tmp['contribution_to_total_change']=tmp['pct_change']*tmp['%s'%a]
        tmp['contribution_to_total_change']=tmp['pct_change']*1
        
        ######## append all age and skill datasets
        master=master.append(tmp)






In [17]:

#(3) translate age group to total population pct change


#master_gr=master.groupby(['state','educ_id'],as_index=False).agg({'contribution_to_total_change':sum})


#####get MSA Identifier information

msa_id=pd.read_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/msa_identifier.dta')
msa_vars=pd.read_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/second_stage_dataset_cl.dta')




In [18]:
msa_vars_nowa=msa_vars
for var in [x for x in msa_vars_nowa.columns if "wa_" in x]:
    
    del msa_vars_nowa[var]

In [21]:
msa_vars_nowa.to_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/second_stage_dataset_nowa.dta')


In [None]:
master_gr['contribution_to_total_change'].describe()

In [None]:
###import state lookup

sl=pd.read_excel('/Users/hannahkamen/Downloads/statelookup2.xlsx')

In [None]:
#(4) export to gams format
master_gr['educ_id']=master_gr['educ_id'].astype(str).str.replace('0','unskl').str.replace('1','skl')
master_gr=master_gr.merge(sl,on='state',how='inner')
master_gr=master_gr.rename(columns={'abbrev':'','educ_id':'sk','contribution_to_total_change':'skill_shr'})


In [None]:
master_gr_lm=master_gr[['','sk','skill_shr']]
master_gr_lm.to_csv('/Users/hannahkamen/Downloads/le0_shock_0_it2.csv',index=False)

In [None]:
master.to_excel('/Users/hannahkamen/Downloads/iteration_2_state_shares.xlsx')

In [None]:
#tmp['fexthot_28'].describe()
#tmp['fextcold'].describe()

#### (6) re-import GAMS results, merge with pop changes

In [3]:
##### import GAMS results
#y_rpt=pd.read_csv('/Users/hannahkamen/Downloads/y_rpt.csv')
phou_rpt=pd.read_csv('/Users/hannahkamen/Downloads/phou_rpt2.csv')
npl_rpt=pd.read_csv('/Users/hannahkamen/Downloads/npl_rpt2.csv')
####msa pop data
msa=pd.read_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/second_stage_dataset_cl.dta')

sl=pd.read_excel('/Users/hannahkamen/Downloads/statelookup2.xlsx')


In [None]:
tmp.sort_values(by='fexthot_28',ascending=False)[['state','fexthot_28']].head()

In [None]:
tmp.sort_values(by='fextcold',ascending=False)[['state','fextcold']].head()

In [5]:
w_rpt=pd.read_csv('/Users/hannahkamen/Downloads/w_rpt2.csv')

In [None]:
w_rpt=w_rpt[['region','skill','household','value']]

In [None]:
tmp.columns

In [7]:
w_rpt=w_rpt.reset_index()
w_rpt.pivot(index='region',columns=['skill','household'],values='value')


skill,skl,unskl,skl,unskl,skl,unskl,skl,unskl,skl,unskl
household,hh1,hh1,hh2,hh2,hh3,hh3,hh4,hh4,hh5,hh5
region,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
AK,0.986878,0.987722,0.980795,0.982506,0.977577,0.980163,0.981009,0.986233,1.000638,1.002237
AL,0.938519,0.98058,0.907516,0.970384,0.889491,0.963808,0.848119,0.948881,0.908147,0.970332
AR,0.993087,1.011684,0.985136,1.017826,0.982778,1.020393,0.976795,1.02294,0.994933,1.017025
AZ,0.994107,1.041628,1.000137,1.075786,1.003865,1.083764,1.018589,1.110495,1.015221,1.071079
CA,1.003203,1.015266,1.025443,1.045089,1.03576,1.059445,1.042918,1.075335,1.034004,1.057284
CO,0.936951,0.927802,0.913906,0.901054,0.89465,0.878758,0.879019,0.858577,0.932036,0.920723
CT,1.094907,1.048216,1.14533,1.071398,1.187442,1.092344,1.197025,1.098664,1.104792,1.056789
DC,0.995919,1.004571,0.991149,1.011593,0.990797,1.012837,0.973848,1.022403,0.995182,1.009346
DE,1.105924,1.070017,1.196409,1.128792,1.274025,1.179012,1.453301,1.294561,1.170909,1.114506
FL,0.827192,0.897564,0.719865,0.832006,0.652559,0.792216,0.617103,0.770859,0.782114,0.862784


In [None]:
test_wage=pd.read_csv('/Users/hannahkamen/Downloads/npl_rpt1.csv')
test_ph=pd.read_csv('/Users/hannahkamen/Downloads/phou_rpt1.csv')

In [None]:
test_wage['value'].min()

In [None]:
test_wage[test_wage['region']=='MT']

In [None]:
####create table with percent of total state that each MSA contributes to
# msa=msa[['statefip','msa','lnpop']]
# msa['msa_pop']=np.exp(msa['lnpop'])
# msa_tot=msa.groupby('statefip',as_index=False).agg({'msa_pop':sum})
# msa_tot=msa_tot.rename(columns={'msa_pop':'msa_pop_total'})
# msa=msa.merge(msa_tot,on='statefip')
# msa['pct_state_total']=msa['msa_pop']/msa['msa_pop_total']
# msa.to_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/msa_pop_pct.dta')


In [None]:
npl_rpt=npl_rpt[['region','skill','value']]
npl_rpt=npl_rpt.rename(columns={'region':'abbrev','value':'pl'})

In [None]:
phou_rpt=phou_rpt[['pct','region']]
phou_rpt=phou_rpt.rename(columns={'region':'ph','pct':'abbrev'})

In [None]:
#####import state pop shares by skilled and unskilled
state_educ=pd.read_excel('/Users/hannahkamen/Downloads/state_educ_shares.xlsx')
del state_educ['statefip']
state_educ=state_educ.merge(sl,on='state',how='inner')
state_educ=state_educ[['statefip','skl','unskl','state','abbrev','state_pop']]

In [None]:
###merge all fields
r_df=phou_rpt.merge(npl_rpt, on='abbrev').merge(master_gr, left_on=['abbrev','skill'], right_on=['','sk']).merge(state_educ,on=['abbrev','statefip','state'])



In [None]:
master_gr.head()

In [None]:
state_educ.head()

In [None]:
phou_rpt.head()

In [None]:
r_df.head()

In [None]:
list(r_df)

In [None]:
####pivot skill pct changes
pop_changes=r_df.pivot(index='state',columns='sk',values='skill_shr').reset_index()
pop_changes=pop_changes.rename(columns={'skl':'skl_pct_delta','unskl':'unskl_pct_delta'})

#####pivot labor prices
pl_changes=r_df.pivot(index='state',columns='sk',values='pl').reset_index()
pl_changes=pl_changes.rename(columns={'skl':'pl_skl','unskl':'pl_unskl'})

In [None]:
#r_df=r_df[['statefip','abbrev','state','skill','pl','ph','skl','unskl','state_pop']]
r_df=r_df.merge(pop_changes,on='state').merge(pl_changes,on='state')
r_df['skilled_level_change']=(r_df['skl']*r_df['state_pop']) + (r_df['skl']*r_df['state_pop']*r_df['skl_pct_delta'])
r_df['unskilled_level_change']=(r_df['unskl']*r_df['state_pop']) + (r_df['unskl']*r_df['state_pop']*r_df['unskl_pct_delta'])
r_df['new_state_pop']=r_df['unskilled_level_change']+r_df['skilled_level_change']



In [None]:
r_df_lm=r_df.drop_duplicates(subset='state')

In [None]:
r_df_lm['pl_unskl'].max()

In [None]:
test_wage['value'].max()

In [None]:
r_df_lm['ph'].min()

In [None]:
test_ph['region'].min()

In [None]:
list(r_df_lm)

In [None]:
del r_df_lm['']
del r_df_lm['new_state_pop']
del r_df_lm['pl']

In [None]:
r_df_lm2=r_df_lm[['statefip','abbrev','skill','pl_skl','pl_unskl','ph']]

In [None]:
r_df_lm2.to_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/gams_dta2.dta')




##### (9) map out migration flows

In [None]:
###prepare dataset with centroid of every state

map_dta1=pd.read_csv('/Users/hannahkamen/Downloads/census_texas.csv')
map_dta2=pd.read_csv('/Users/hannahkamen/Downloads/census_ny.csv')
map_dta=map_dta1.append(map_dta2).reset_index()

###rename data
map_dta['centroid2']=map_dta['centroid1']+","+map_dta['centroid2']
map_dta['centroid1']=map_dta['FULL1_NAME']+","+map_dta['FULL2_NAME']
map_dta['FULL1_NAME']=map_dta['GEOID1']
map_dta['FULL2_NAME']=map_dta['GEOID2']

del map_dta['GEOID1']
del map_dta['GEOID2']


map_dta['GEOID1']=map_dta['level_1']
map_dta['GEOID2']=map_dta['Unnamed: 0']
map_dta=map_dta[['GEOID1','GEOID2','FULL1_NAME','FULL2_NAME','centroid1','centroid2']]
####get abbreviations

states=list(sl['abbrev'].unique())

state_coords=pd.DataFrame()
for s in states:
    tmp=map_dta[map_dta['FULL2_NAME'].str.contains(s)]
    tmp['state']=s
    state_coords=state_coords.append(tmp)
state_coords=state_coords.drop_duplicates(subset='state')
state_coords=state_coords[['state','FULL2_NAME','centroid2']]

In [None]:
#get sum of shares in all other states for people who chose states 1-44
#get ids that live in state s currentlly
master_map=pd.DataFrame()
for s in tmp0['state'].unique():
    living_now=[]
    tmp1=tmp0[(tmp0['state']==s) & (tmp0['chosen']==1)]
    ####list of ids currently living in state s
    living_now.append(list(tmp1['id'].unique())[0])
    ###limit dataframe to the ids of people living in state s
    tmp2=tmp0[tmp0['id'].isin(living_now)]
    ####now groupby shares across all states
    tmp3=tmp2.groupby(['state'],as_index=False).agg({'share_it12':sum,'share_b':sum})
    ###tag origin state
    tmp3['living_flag']=s
    master_map=master_map.append(tmp3)
    
######merge with state populaiton
master_map0=master_map.merge(state_split,left_on='living_flag',right_on='state',how='inner')
master_map0=master_map0.rename(columns={'state_x':'moving_to'})

######get share difference between current  iteration and benchmark
master_map0['share_diff']=master_map0['share_it12']-master_map0['share_b']




In [None]:
state_coords['id']= np.arange(len(state_coords))
state_coords['name']=state_coords['state']


In [None]:
state_coords['lat']=state_coords['centroid2'].apply(lambda x: x.split(',')[1].replace(")",'').strip())
state_coords['lon']=state_coords['centroid2'].apply(lambda x: x.split(',')[0].replace("(",'').replace("c",'').strip())

In [None]:
state_coords_lm=state_coords[['id','name','lat','lon']]

In [None]:
state_coords_lm.to_csv('/Users/hannahkamen/Downloads/flowmap_location_lookup.csv',index=False)

In [None]:
######get total change in population by state 
tmp['total_pct_change_fromb']=(tmp['share_it12']-tmp['share_b'])/tmp['share_b']
total_skilled_pop_changes=tmp.sort_values(by='total_pct_change_fromb',ascending=False)[['statefip','educ_id','age_id','total_pct_change_fromb']]

####merge this on to "move to state" in master_map

master_map0_m=master_map0.merge(total_skilled_pop_changes,left_on='moving_to',right_on='statefip')


#get top entrance and top exit states
top_entrance=list(tmp.sort_values(by='total_pct_change_fromb',ascending=False).head(10)['statefip'])
top_exit=list(tmp.sort_values(by='total_pct_change_fromb',ascending=True).head(10)['statefip'])


In [None]:
#####prepare pop map data for skilled entrance
all_skl_entrance=pd.DataFrame()
for s in top_entrance:
    ###for each entrance state, calculate shares by origin state that make up total change in the positive share diff
    master_map_lm=master_map0_m[((master_map0_m['moving_to']==s) & (master_map0_m['share_diff']>0))]
    #####get totals 
    master_map_lm_totalinflows=master_map_lm.groupby('moving_to').agg({'share_diff':sum})
    ###rename and merge
    master_map_lm_totalinflows=master_map_lm_totalinflows.rename(columns={'share_diff':'share_diff_tot'})
    master_map_lm=master_map_lm.merge(master_map_lm_totalinflows,on='moving_to')
    
    master_map_lm['inflow_share']= master_map_lm['share_diff']/master_map_lm['share_diff_tot']
    
    master_map_lm=master_map_lm[['moving_to','living_flag','inflow_share','share_diff_tot']]
    all_skl_entrance=all_skl_entrance.append(master_map_lm)

In [None]:
##merge in pop info


all_skl_entrance=all_skl_entrance.merge(sl,left_on='moving_to',right_on='state')
all_skl_entrance=all_skl_entrance.rename(columns={'abbrev':'dest'})

all_skl_entrance=all_skl_entrance.merge(sl[['state','abbrev']],left_on='living_flag',right_on='state')
all_skl_entrance=all_skl_entrance.rename(columns={'abbrev':'origin'})

all_skl_entrance=all_skl_entrance.merge(state_coords_lm,left_on='dest',right_on='name')
del all_skl_entrance['dest']
all_skl_entrance['dest']=all_skl_entrance['id']
del all_skl_entrance['id']
all_skl_entrance=all_skl_entrance.merge(state_coords_lm,left_on='origin',right_on='name')
del all_skl_entrance['origin']


all_skl_entrance=all_skl_entrance.merge(state_educ,left_on='moving_to',right_on='state').merge(total_skilled_pop_changes,left_on='moving_to',right_on='statefip')
all_skl_entrance['raw_pop_change_moving_to']=all_skl_entrance['total_pct_change_fromb']*all_skl_entrance['state_pop']*all_skl_entrance['skl']*all_skl_entrance['inflow_share']

all_skl_entrance['origin']=all_skl_entrance['id']
all_skl_entrance['count']=all_skl_entrance['raw_pop_change_moving_to']

In [None]:
all_skl_entrance_lm=all_skl_entrance[['origin','dest','count']]

all_skl_entrance_lm.to_csv('/Users/hannahkamen/Downloads/skilled_entrance.csv',index=False)


In [None]:
master_map_ks=master_map_lm.merge(sl,left_on='moving_to',right_on='state')
master_map_ks=master_map_ks.rename(columns={'abbrev':'moving_to_abbrev'})
del master_map_ks['state']
master_map_ks=master_map_ks.merge(sl[['state','abbrev']],left_on='living_flag',right_on='state')
master_map_ks=master_map_ks.rename(columns={'abbrev':'living_abbrev'})
del master_map_ks['state']
del master_map_ks['statefip']

master_map_ks=master_map_ks.merge(state_coords,left_on='moving_to_abbrev',right_on='state')
master_map_ks=master_map_ks.merge(state_coords,left_on='living_abbrev',right_on='state')
master_map_ks=master_map_ks.rename(columns={'centroid2_x':'centroid2','centroid2_y':'centroid1','FULL2_NAME_x':'FULL1_NAME','FULL2_NAME_y':'FULL2_NAME'})
                      
                                  

In [None]:
master_map_ks

In [None]:
master_map_ks.to_csv('/Users/hannahkamen/Downloads/ks_in_skl_map_data.csv')

In [None]:
list(master_map_ks)

In [None]:
master_map_ks

In [None]:
master_map0_totalinflows=master_map0.groupby('moving_to').agg({'share_diff':sum})
master_map0_totalinflows=master_map0_totalinflows.rename(columns={'share_diff':'share_diff_tot'})
master_map0_m=master_map0.merge(master_map0_totalinflows,on='moving_to')
master_map0_totalinflows=master_map0_totalinflows.reset_index()

In [None]:
master_map0_m=master_map0_m[['moving_to','living_flag','share_diff','share_diff_tot']]

In [None]:
master_map0_m

In [None]:
map_dta=pd.read_csv('/Users/hannahkamen/Downloads/census_texas.csv')

In [None]:
map_dta_lm=map_dta[map_dta['GEOID2'].str.contains(',')]

map_dta_lm['state']=map_dta_lm['GEOID2'].apply(lambda x: x.split(',')[1])
map_dta_lm['state']=map_dta_lm['GEOID2'].apply(lambda x: x.split(',')[1])

In [None]:
map_dta_lm

In [None]:
map_dta['state']

In [None]:
list(map_dta['GEOID2'])

In [None]:
master_map0.head()

In [None]:
##test to see what is happening with temperature

In [None]:
# df_tmp[df_tmp['state']=='washington']

In [None]:
master_map.sort_values(by='share',ascending=False).head(100)

In [None]:
master_map.head()

In [None]:
df_lm

In [None]:
master_map.head()

In [None]:
df_test_gre

In [None]:
df_tmp

In [None]:
df_test_gre

In [None]:
# df_m[(df_m['id']==3)& (df_m['state']=='california')][['msa','bpl','chosen','d_s','d_r1','d_r2','statefip','state']]




In [None]:
df_test_gre['diff']=abs(df_test_gre['share']-df_test_gre['chosen'])

In [None]:
df_test_gre['diff'].describe()

In [None]:
# df_tmp=df_m.groupby(['msa'],as_index=False).agg({'fexthot_28':sum,'fextcold':sum,'state':max,'hot':sum,'cold':sum})

In [None]:
# df_m[['msa','state','hot','cold','fexthot_28','fextcold']].head()

In [None]:
len(df[(df['id']==4100)]['msa'].unique())

In [None]:
list()

In [None]:
logit_ready[logit_ready['chosen']==1]['d_s'].describe()

In [None]:
df_lm=df[['msa','chosen','share']]

In [None]:
df[df['chosen']==1]['share'].describe()

#### Import total population and process percent changes for Windc counterfactual

In [None]:
#orig=pd.read_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/dta/acs5yr_0610_clr.dta')

#orig.groupby(['ageid'],as_index=False)

In [None]:
###get shares by college graduates#impo
#educ_shares.to_csv('/Users/hannahkamen/Downloads/le0_shr2.csv',index=False)
# df['pct_col']=np.exp(df['lncoll'])
# df['pct_nc']=1-np.exp(df['lncoll'])
# df['coll_pop']=np.exp(df['lnpop'])*df['pct_col']
# df['population']=np.exp(df['lnpop'])

# df_tot=df.groupby(['statefip'],as_index=False).agg({'population':sum,'coll_pop':sum,'share':sum,'chosen':sum})
# df_tot['state_coll_shr']=df_tot['coll_pop']/df_tot['population']
# df_tot['pct_change']=(df_tot['share']-df_tot['chosen'])/df_tot['chosen']


In [None]:
df[df['share'].isnull()]

In [None]:
df

In [None]:
list(df)

In [None]:


df_st=df.groupby('statefip',as_index=False).agg({'share':sum,'population':sum,'chosen':sum,'id':'count'})
df_st['chosen'].sum()

In [None]:
list(df)

In [None]:
len(df['id'].unique())

In [None]:
df_st.head()

In [None]:
len(df_st['id'].unique())

In [None]:
44*265

In [None]:
df[df['id']==62973.0]

In [None]:
len(df)

In [None]:
df_st.head()

In [None]:
len(df['statefip'].unique())

In [None]:
df.groupby('id',as_index=False).agg({'share':sum})['share'].unique()

In [None]:
df['msa'].unique()

In [None]:
len(list(df['msa'].unique()))

In [None]:
df.head()

In [None]:
data.head()

In [None]:
st_lkup=data.groupby(['statefip','str_statefip'],as_index=False).sum()[['statefip','str_statefip']]

In [None]:
list(data)

In [None]:
data['str_statefip'].unique()

In [None]:
list(data)

In [None]:
st_lkup

In [None]:
data.head()

In [None]:
len(data)

In [None]:
list(data)

In [None]:
len(logit)

In [None]:
list(logit)

In [None]:
logit.head()

In [None]:
q

In [None]:
logit.head()

In [None]:
q.head()

In [None]:
for i in np.arange(2,8,1):
    print(i)

In [None]:
for i in np.arange(2,8,1):
    df=pd.read_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/results/temp/2nd_stage_avg_age%s.dta'%i)
    




In [None]:
df

In [None]:
secondndstage2.head()

In [None]:
secondndstage3.head()

In [None]:
logit['_b_d_r1'].unique()

In [None]:
logit['d_s'].unique()

In [None]:
len(cols)

In [None]:
cols=[x for x in logit.columns if "_b" in x]

In [None]:
for x in [x for x in logit.columns if "_b" in x]:
    
    print(logit[x].unique())

In [None]:
logit['d_s']