In [1]:
import pandas as pd
import numpy as np
import scipy.stats
import math
import warnings
warnings.filterwarnings("ignore")


### This routine handles these steps:
#### 1) read in projection data
#### 2) merge with population data for each group
#### 3) converge age groups to total population
#### 4) get update population shares
#### 5) read into gams , run gams 
#### 6) re-import gams results (housing prices, wages, and updates population shares (from logit)
#### 7) prep gams results for incorporation into logit routine
#### 8) re-run shares in stata
#### 9) map out migration flows for top entry exit


### Steps 1-5
##### (1) read in projection data, (2) merge pop data fro each group, (3) translate age group and skill changes to total population pct change (4) get updated gams shares (5) read into gams, and run new counter factual 1 time

In [2]:
# tmp0_gr=tmp0.groupby(['msa','statefip'],as_index=False).agg({'id':sum})
# bmk0_gr=bmk0.groupby(['msa'],as_index=False).agg({'id':sum})
# msaid_state_lookup=tmp0_gr[['msa','statefip']].merge(bmk0_gr[['msa']],on='msa').groupby(['msa','statefip'],as_index=False).sum()
# msaid_state_lookup.to_excel('/Users/hannahkamen/Downloads/msaid_state_lookup.xlsx')



In [3]:
#new_bmk=pd.read_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/projection_data_age2_1_wbmk.dta')
    
    
    

In [4]:
###get state age and education estimates

state_split=pd.read_excel('/Users/hannahkamen/Downloads/state_pop_educ_shares.xlsx')
state_age_shares=pd.read_excel('/Users/hannahkamen/Downloads/state_age_shares.xlsx')
msaid_state_lookup=pd.read_excel('/Users/hannahkamen/Downloads/msaid_state_lookup.xlsx')

state_split=state_split[['skl_2','skl_3','skl_4','skl_5','skl_6','skl_7','unskl_2','unskl_3','unskl_4','unskl_5','unskl_6','unskl_7','state','state_pop']]

master=pd.DataFrame()
for a in [2]:
    for i in [0,1]:       
        try:
            del tmp0
        except:
            print("first loop")
        
        #(1) read in projection data, and benchmark population shares

        tmp0=pd.read_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/projection_data_age2_%s_wbmk_iter1.dta'%i)
        
        #############merge state lookup onto both projections 
        #sl=pd.read_excel('/Users/hannahkamen/Downloads/statelookup2.xlsx')
        #tmp0=tmp0.merge(sl,on='statefip',how='inner')

        #(2) merge pop share estimates from each group

        ########group benchmark and projected by state, sum shares and chosen to get pct change
        tmp=tmp0.groupby(['state'],as_index=False).agg({'fexthot_28':max,'fextcold':max,'share_it1':sum,'share':sum})

        ########merge grouped projections with benchmark to get pct change
        tmp['pct_change']=(tmp['share_it1']-tmp['share'])/tmp['share']

        ########merge pct changes onto population by age

        tmp=tmp.merge(state_age_shares,on='state',how='inner')
        tmp['age_id']= a
        tmp['educ_id']=i
        ########SET AGE GROUP TWO TO 100 PERCENT OF POPULATION FOR NOW
        #tmp['contribution_to_total_change']=tmp['pct_change']*tmp['%s'%a]
        tmp['contribution_to_total_change']=tmp['pct_change']*1
        
        ######## append all age and skill datasets
        master=master.append(tmp)






first loop


In [5]:

#(3) translate age group to total population pct change


master_gr=master.groupby(['state','educ_id'],as_index=False).agg({'contribution_to_total_change':sum})


#####get MSA Identifier information

msa_id=pd.read_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/msa_identifier.dta')
msa_vars=pd.read_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/second_stage_dataset_cl.dta')




In [6]:
###import state lookup

sl=pd.read_excel('/Users/hannahkamen/Downloads/statelookup2.xlsx')

In [7]:
#(4) export to gams format
master_gr['educ_id']=master_gr['educ_id'].astype(str).str.replace('0','unskl').str.replace('1','skl')
master_gr=master_gr.merge(sl,on='state',how='inner')
master_gr=master_gr.rename(columns={'abbrev':'','educ_id':'sk','contribution_to_total_change':'skill_shr'})


In [9]:
master_gr=master_gr[['','sk','skill_shr']]
master_gr.to_csv('/Users/hannahkamen/Downloads/le0_shock_0_it1.csv',index=False)

In [8]:
master_gr['skill_shr'].describe()

count    88.000000
mean     -0.017610
std       0.111226
min      -0.327351
25%      -0.071204
50%      -0.005141
75%       0.040509
max       0.247840
Name: skill_shr, dtype: float64

In [32]:
master_gr['skill_shr'].describe()

count    88.000000
mean     -0.017610
std       0.111230
min      -0.327378
25%      -0.071209
50%      -0.005143
75%       0.040514
max       0.247864
Name: skill_shr, dtype: float64

In [None]:
#tmp['fexthot_28'].describe()
#tmp['fextcold'].describe()

In [None]:
npl_rpt['value'].max()

#### (6) re-import GAMS results, merge with pop changes

In [40]:
##### import GAMS results
y_rpt=pd.read_csv('/Users/hannahkamen/Downloads/y_rpt.csv')
phou_rpt=pd.read_csv('/Users/hannahkamen/Downloads/phou_rpt.csv')
npl_rpt=pd.read_csv('/Users/hannahkamen/Downloads/npl_rpt.csv')
####msa pop data
msa=pd.read_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/second_stage_dataset_cl.dta')

sl=pd.read_excel('/Users/hannahkamen/Downloads/statelookup2.xlsx')


In [41]:
####create table with percent of total state that each MSA contributes to
msa=msa[['statefip','msa','lnpop']]
msa['msa_pop']=np.exp(msa['lnpop'])
msa_tot=msa.groupby('statefip',as_index=False).agg({'msa_pop':sum})
msa_tot=msa_tot.rename(columns={'msa_pop':'msa_pop_total'})
msa=msa.merge(msa_tot,on='statefip')
msa['pct_state_total']=msa['msa_pop']/msa['msa_pop_total']

In [42]:
len(msa['msa'].unique())

266

In [None]:
msa.to_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/msa_pop_pct.dta')



In [43]:
npl_rpt=npl_rpt[['region','skill','value']]
npl_rpt=npl_rpt.rename(columns={'region':'abbrev','value':'pl'})

In [44]:
phou_rpt=phou_rpt[['pct','region']]
phou_rpt=phou_rpt.rename(columns={'region':'ph','pct':'abbrev'})

In [45]:
#####import state pop shares by skilled and unskilled
state_educ=pd.read_excel('/Users/hannahkamen/Downloads/state_educ_shares.xlsx')
del state_educ['statefip']
state_educ=state_educ.merge(sl,on='state',how='inner')
state_educ=state_educ[['statefip','skl','unskl','state','abbrev','state_pop']]

In [46]:
###merge all fields
r_df=phou_rpt.merge(npl_rpt, on='abbrev').merge(master_gr, left_on=['abbrev','skill'], right_on=['','sk']).merge(state_educ,on='abbrev')



In [47]:
####pivot skill pct changes
pop_changes=r_df.pivot(index='state',columns='sk',values='skill_shr').reset_index()
pop_changes=pop_changes.rename(columns={'skl':'skl_pct_delta','unskl':'unskl_pct_delta'})

#####pivot labor prices
pl_changes=r_df.pivot(index='state',columns='sk',values='pl').reset_index()
pl_changes=pl_changes.rename(columns={'skl':'pl_skl','unskl':'pl_unskl'})

In [48]:
r_df=r_df[['statefip','abbrev','state','skill','pl','ph','skl','unskl','state_pop']]
r_df=r_df.merge(pop_changes,on='state').merge(pl_changes,on='state')
r_df['skilled_level_change']=(r_df['skl']*r_df['state_pop']) + (r_df['skl']*r_df['state_pop']*r_df['skl_pct_delta'])
r_df['unskilled_level_change']=(r_df['unskl']*r_df['state_pop']) + (r_df['unskl']*r_df['state_pop']*r_df['unskl_pct_delta'])
r_df['new_state_pop']=r_df['unskilled_level_change']+r_df['skilled_level_change']



In [49]:
r_df_lm=r_df.drop_duplicates(subset='state')

In [None]:
r_df_lm.to_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/gams_dta.dta')




In [55]:
r_df_lm[r_df_lm['state']=='arizona']['ph'].astype(float)*10000

14    10001.135266
Name: ph, dtype: float64

In [None]:
####translates pop changes by skill into total population changes


In [None]:
np.exp(tmp0['inc_hat'])

##### (9) map out migration flows

In [None]:
#get sum of shares in all other states for people who chose states 1-44
#get ids that live in state s currentlly
master_map=pd.DataFrame()
for s in df_m['state'].unique():
    living_now=[]
    df_lm=df_m[(df_m['state']==s) & (df_m['chosen']==1)]
    ####list of ids currently living in state s
    living_now.append(list(df_lm['id'].unique())[0])
    ###limit dataframe to the ids of people living in state s
    tmp=df_m[df_m['id'].isin(living_now)]
    ####now groupby shares across all states
    tmp2=tmp.groupby(['state'],as_index=False).agg({'share':sum})
    ###tag origin state
    tmp2['living_flag']=s
    master_map=master_map.append(tmp2)
    
######merge with state populaiton
master_map0=master_map.merge(state_split,left_on='living_flag',right_on='state',how='inner')
master_map0=master_map0.rename(columns={'state_x':'moving_to'})

####get estimates for number of people leaving / going to
master_map0['arrive_moving_to']=master_map0['unskl_2']*master_map0['state_pop']

####merge in the msa codes

master_map0_lm=master_map0[master_map0['living_flag']=='texas']

###export texas test

master_map0_lm.to_csv('/Users/hannahkamen/Downloads/texas_out_unskl2.csv')
    
    

In [None]:
map_dta=pd.read_csv('/Users/hannahkamen/Downloads/census_texas.csv')

In [None]:
map_dta_lm=map_dta[map_dta['GEOID2'].str.contains(',')]

map_dta_lm['state']=map_dta_lm['GEOID2'].apply(lambda x: x.split(',')[1])
map_dta_lm['state']=map_dta_lm['GEOID2'].apply(lambda x: x.split(',')[1])

In [None]:
map_dta_lm

In [None]:
map_dta['state']

In [None]:
list(map_dta['GEOID2'])

In [None]:
master_map0.head()

In [None]:
##test to see what is happening with temperature

In [None]:
# df_tmp[df_tmp['state']=='washington']

In [None]:
master_map.sort_values(by='share',ascending=False).head(100)

In [None]:
master_map.head()

In [None]:
df_lm

In [None]:
master_map.head()

In [None]:
df_test_gre

In [None]:
df_tmp

In [None]:
df_test_gre

In [None]:
# df_m[(df_m['id']==3)& (df_m['state']=='california')][['msa','bpl','chosen','d_s','d_r1','d_r2','statefip','state']]




In [None]:
df_test_gre['diff']=abs(df_test_gre['share']-df_test_gre['chosen'])

In [None]:
df_test_gre['diff'].describe()

In [None]:
# df_tmp=df_m.groupby(['msa'],as_index=False).agg({'fexthot_28':sum,'fextcold':sum,'state':max,'hot':sum,'cold':sum})

In [None]:
# df_m[['msa','state','hot','cold','fexthot_28','fextcold']].head()

In [None]:
len(df[(df['id']==4100)]['msa'].unique())

In [None]:
list()

In [None]:
logit_ready[logit_ready['chosen']==1]['d_s'].describe()

In [None]:
df_lm=df[['msa','chosen','share']]

In [None]:
df[df['chosen']==1]['share'].describe()

#### Import total population and process percent changes for Windc counterfactual

In [None]:
#orig=pd.read_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/dta/dta/acs5yr_0610_clr.dta')

#orig.groupby(['ageid'],as_index=False)

In [None]:
###get shares by college graduates#impo
#educ_shares.to_csv('/Users/hannahkamen/Downloads/le0_shr2.csv',index=False)
# df['pct_col']=np.exp(df['lncoll'])
# df['pct_nc']=1-np.exp(df['lncoll'])
# df['coll_pop']=np.exp(df['lnpop'])*df['pct_col']
# df['population']=np.exp(df['lnpop'])

# df_tot=df.groupby(['statefip'],as_index=False).agg({'population':sum,'coll_pop':sum,'share':sum,'chosen':sum})
# df_tot['state_coll_shr']=df_tot['coll_pop']/df_tot['population']
# df_tot['pct_change']=(df_tot['share']-df_tot['chosen'])/df_tot['chosen']


In [None]:
df[df['share'].isnull()]

In [None]:
df

In [None]:
list(df)

In [None]:


df_st=df.groupby('statefip',as_index=False).agg({'share':sum,'population':sum,'chosen':sum,'id':'count'})
df_st['chosen'].sum()

In [None]:
list(df)

In [None]:
len(df['id'].unique())

In [None]:
df_st.head()

In [None]:
len(df_st['id'].unique())

In [None]:
44*265

In [None]:
df[df['id']==62973.0]

In [None]:
len(df)

In [None]:
df_st.head()

In [None]:
len(df['statefip'].unique())

In [None]:
df.groupby('id',as_index=False).agg({'share':sum})['share'].unique()

In [None]:
df['msa'].unique()

In [None]:
len(list(df['msa'].unique()))

In [None]:
df.head()

In [None]:
data.head()

In [None]:
st_lkup=data.groupby(['statefip','str_statefip'],as_index=False).sum()[['statefip','str_statefip']]

In [None]:
list(data)

In [None]:
data['str_statefip'].unique()

In [None]:
list(data)

In [None]:
st_lkup

In [None]:
data.head()

In [None]:
len(data)

In [None]:
list(data)

In [None]:
len(logit)

In [None]:
list(logit)

In [None]:
logit.head()

In [None]:
q

In [None]:
logit.head()

In [None]:
q.head()

In [None]:
for i in np.arange(2,8,1):
    print(i)

In [None]:
for i in np.arange(2,8,1):
    df=pd.read_stata('/Users/hannahkamen/Downloads/population-migration-master/estimation/1_main_specification/acs5yr0610/results/temp/2nd_stage_avg_age%s.dta'%i)
    




In [None]:
df

In [None]:
secondndstage2.head()

In [None]:
secondndstage3.head()

In [None]:
logit['_b_d_r1'].unique()

In [None]:
logit['d_s'].unique()

In [None]:
len(cols)

In [None]:
cols=[x for x in logit.columns if "_b" in x]

In [None]:
for x in [x for x in logit.columns if "_b" in x]:
    
    print(logit[x].unique())

In [None]:
logit['d_s']