In [1]:
import pandas as pd
import numpy as np
import os
import re

In [2]:
%run chart_theme.py

### Cleaning 1991-2011 CA Migration Data (IRS)

*Excluding foreign migration (b/c seems to be drastically underrepresented in IRS data after comparing to Census data- e.g. IRS 17/18 CA foreign mig inflow = 11311, whereas Census 2018 CA foreign inflow = )

## *Use only domestic migration for IRS & Census data- since IRS is missing a lot of int'l data and Census only included inbound int'l data

In [3]:
inmig_91_11 = os.listdir('CA In 91_11')
outmig_91_11 = os.listdir('CA Out 91_11')

In [4]:
master_df1 = pd.DataFrame()
folders = ['CA In 91_11', 'CA Out 91_11']
counter = 0
for folder in [inmig_91_11, outmig_91_11]:
    for file in folder:
        # Identifying year from different file names
        num = re.findall('\d+', file)[0][-2:]
        if int(num) > 20:
            yr = int('19' + num)
        else:
            yr = int('20' + num)
        
        # Extracting these cols: mig state code, mig state name, returns, exemptions
        if yr < 1993:
            cols = [0,2,3,5]
        elif yr > 2009:
            cols = [1,3,4,5]
        else:
            cols = [0,2,3,4]
            
        df = pd.read_excel(folders[counter] + '/' + file, usecols=cols)
        df.columns = ['State_Code', 'Mig_Type', 'Returns', 'Exemptions']
        df = df[df['State_Code'].isin(['06','96','63','57',6,96,63,57])]  # Including 63 for different mig state code in 1993 files (63 & 96=residents; 6=domestic migrants; 57=foreign migrants)
        if yr in [1994,1995]:
            df['State_Code'] = [96,57,6]
        
        df['Year'] = np.full(len(df), yr)
        master_df1 = pd.concat([master_df1, df])
    counter += 1

In [5]:
master_df1['Type'] = ['In']*63 + ['Out']*63
master_df1['State_Code'] = master_df1['State_Code'].astype(str)
master_df1 = master_df1.sort_values(['Type','Year'])

*Create col for foreign mig and col that subtracts it from total (to produce domestic mig col)

In [6]:
totals = master_df1[master_df1['State_Code'].isin(['6','06'])]
mig_tot = master_df1[master_df1['State_Code'].isin(['63','96'])]
mig_for = master_df1[master_df1['State_Code'].isin(['57'])]

In [7]:
mig_for.head()

Unnamed: 0,State_Code,Mig_Type,Returns,Exemptions,Year,Type
8,57,Foreign,25189,50814,1991,In
8,57,Foreign,30168,57708,1992,In
8,57,Foreign,30686,52872,1993,In
8,57,Foreign,23180,39476,1994,In
8,57,Foreign,22871,38215,1995,In


In [8]:
mig_91_11 = pd.DataFrame({'Year':master_df1.Year.unique(),
                          #'Returns':totals.Returns[totals.Type == 'In'].values,
                          'Non_Mig':totals.Exemptions[totals.Type == 'In'].values,
                          #'Inmig_Returns':mig_tot.Returns[mig_tot.Type == 'In'].values,
                          #'Outmig_Returns':mig_tot.Returns[mig_tot.Type == 'Out'].values,
                          #'In_Foreign_Returns':mig_for.Returns[mig_for.Type == 'In'].values,
                          #'Out_Foreign_Returns':mig_for.Returns[mig_for.Type == 'Out'].values,
                          'Inmig_Exemp':mig_tot.Exemptions[mig_tot.Type == 'In'].values,
                          'Outmig_Exemp':mig_tot.Exemptions[mig_tot.Type == 'Out'].values,
                          'In_Foreign_Exemp':mig_for.Exemptions[mig_for.Type == 'In'].values,
                          'Out_Foreign_Exemp':mig_for.Exemptions[mig_for.Type == 'Out'].values})

In [9]:
mig_91_11['In_Dom_Exemp'] = mig_91_11.Inmig_Exemp - mig_91_11.In_Foreign_Exemp
mig_91_11['Out_Dom_Exemp'] = mig_91_11.Outmig_Exemp - mig_91_11.Out_Foreign_Exemp
mig_91_11 = mig_91_11[['Year', 'Non_Mig', 'Inmig_Exemp', 'In_Dom_Exemp', 'Outmig_Exemp', 'Out_Dom_Exemp']]

In [10]:
mig_91_11.head()

Unnamed: 0,Year,Non_Mig,Inmig_Exemp,In_Dom_Exemp,Outmig_Exemp,Out_Dom_Exemp
0,1991,23010999,448258,397444,571032,531946
1,1992,23340798,429962,372254,577023,542349
2,1993,23236233,372838,319966,659933,625119
3,1994,22841596,344116,304640,674796,643480
4,1995,22414007,353108,314893,628896,600334


In [6]:
"""
mig_91_11 = pd.DataFrame({'Year':master_df1.Year.unique(), 
                          'Returns':master_df1['Returns'][(master_df1['Mig_Type'].str.contains('Non-Migrant')) & (master_df1.Type == 'In')].values, 
                          'Exemptions':master_df1['Exemptions'][(master_df1['Mig_Type'].str.contains('Non-Migrant')) & (master_df1.Type == 'In')].values, 
                          'Inmig_Returns':master_df1['Returns'][~(master_df1['Mig_Type'].str.contains('Non-Migrant')) & (master_df1.Type == 'In')].values, 
                          'Inmig_Exempt':master_df1['Exemptions'][~(master_df1['Mig_Type'].str.contains('Non-Migrant')) & (master_df1.Type == 'In')].values, 
                          'Outmig_Returns':master_df1['Returns'][~(master_df1['Mig_Type'].str.contains('Non-Migrant')) & (master_df1.Type == 'Out')].values,
                          'Outmig_Exempt':master_df1['Exemptions'][~(master_df1['Mig_Type'].str.contains('Non-Migrant')) & (master_df1.Type == 'Out')].values}).sort_values('Year')"""

### Cleaning 2012-2018 CA Migration Data (IRS)

In [11]:
inmig_12_18 = os.listdir('CA In 12_18')
outmig_12_18 = os.listdir('CA Out 12_18')

In [12]:
master_df2 = pd.DataFrame()
folders = ['CA In 12_18', 'CA Out 12_18']
counter = 0
for folder in [inmig_12_18, outmig_12_18]:
    for file in folder:
        yr = int('20'+ file[-6:-4])
        df = pd.read_csv(folders[counter] + '/' + file, usecols=[0,1,4,5])
        df.columns = ['State', 'Mig_Type', 'Returns', 'Exemptions']
        df = df[(df.State == 6) & (df.Mig_Type.isin([6,96,97]))].drop_duplicates(subset='Mig_Type')  # Code 97 used for foreign & same state (only want the first one, corresponding to foreign migrants)
        
        df['Year'] = np.full(len(df), yr)
        master_df2 = pd.concat([master_df2, df])
    counter += 1

In [13]:
len(master_df2)

42

In [14]:
master_df2['Type'] = ['In']*21 + ['Out']*21
mig_12_18 = pd.DataFrame({'Year':master_df2.Year.unique(), 
                          #'Returns':master_df2[2][(master_df2[1] == 6) & (master_df2.Type == 'In')].values, 
                          'Non_Mig':master_df2.Exemptions[(master_df2.Mig_Type == 6) & (master_df2.Type == 'In')].values, 
                          'Inmig_Exemp':master_df2.Exemptions[(master_df2.Mig_Type == 96) & (master_df2.Type == 'In')].values, 
                          'In_Dom_Exemp':master_df2.Exemptions[(master_df2.Mig_Type == 97) & (master_df2.Type == 'In')].values, 
                          'Outmig_Exemp':master_df2.Exemptions[(master_df2.Mig_Type == 96) & (master_df2.Type == 'Out')].values, 
                          'Out_Dom_Exemp':master_df2.Exemptions[(master_df2.Mig_Type == 97) & (master_df2.Type == 'Out')].values}).sort_values('Year')

In [15]:
mig_12_18

Unnamed: 0,Year,Non_Mig,Inmig_Exemp,In_Dom_Exemp,Outmig_Exemp,Out_Dom_Exemp
0,2012,30080010,476184,455202,496085,468496
1,2013,30139423,485161,463373,532619,505991
2,2014,30251074,453621,432409,512732,487994
3,2015,30666470,344232,322677,388394,365561
4,2016,30665600,453736,432443,546608,521161
5,2017,30323418,571204,560173,750592,738242
6,2018,30682756,441167,429856,592703,583053


### Combining Data

In [24]:
comb = pd.concat([mig_91_11, mig_12_18]).reset_index(drop=True)
for col in comb.columns:
    comb[col] = comb[col].astype(int)

comb.Year = comb.Year - 1  # Since tax reflects prev year's migration data
comb['Zero'] = np.full(len(comb),0)  # For gray area chart

In [25]:
comb.head()

Unnamed: 0,Year,Non_Mig,Inmig_Exemp,In_Dom_Exemp,Outmig_Exemp,Out_Dom_Exemp,Zero
0,1990,23010999,448258,397444,571032,531946,0
1,1991,23340798,429962,372254,577023,542349,0
2,1992,23236233,372838,319966,659933,625119,0
3,1993,22841596,344116,304640,674796,643480,0
4,1994,22414007,353108,314893,628896,600334,0


Peak Years:
- 92-94
- 04-06
- 16-19

*Exemptions represent all individuals (filers & dependents) 
- https://www.irs.gov/newsroom/five-things-to-remember-about-exemptions-and-dependents-for-tax-year-2017

*Adjust mig values based on difference b/w exemptions & state population (since not all residents are accounted for in tax filings- as the IRS clarifies in their accompanying data guide)

In [18]:
# CA 1990-2019 Population (source: http://www.dof.ca.gov/Forecasting/Demographics/Estimates/)
pops = '29558000	30143555	30722998	31150786	31418940	31617770	31837399	32207869	32657877	33140771	33721583	34256789	34725516	35163609	35570847	35869173	36116202	36399676	36704375	36966713	37223900	37594781	37971427	38321459	38622301	38952462	39214803	39504609	39740508	39927315'
pops = re.sub('\s',',',pops).split(',')[:-2]  # Excluding 18 & 19

In [26]:
comb['Population'] = [int(p) for p in pops]
adj_cols = ['Inmig_Exemp', 'In_Dom_Exemp', 'Outmig_Exemp', 'Out_Dom_Exemp']  # Adjusting mig cols based on pop/exemptions diff
for col in adj_cols:
    comb[col] = (comb[col] * comb.Population / comb.Non_Mig).round().astype(int)

In [27]:
comb.head()

Unnamed: 0,Year,Non_Mig,Inmig_Exemp,In_Dom_Exemp,Outmig_Exemp,Out_Dom_Exemp,Zero,Population
0,1990,23010999,575795,510523,733500,683293,0,29558000
1,1991,23340798,555276,480749,745198,700419,0,30143555
2,1992,23236233,492967,423060,872565,826534,0,30722998
3,1993,22841596,469297,415460,920270,877562,0,31150786
4,1994,22414007,494971,441403,881558,841521,0,31418940


In [28]:
comb['Pop_Exemp_Mult'] = comb.Population / comb.Non_Mig
comb['Net_Mig_Dom_Exemp'] = comb.In_Dom_Exemp - comb.Out_Dom_Exemp
#comb['Net_Mig_Exemp'] = comb.Inmig_Exemp - comb.Outmig_Exemp
comb['Rel_Mig_Dom_Exemp'] = round(comb.Net_Mig_Dom_Exemp / comb.Non_Mig, 4)
#comb['Rel_Mig_Exemp'] = round(comb.Net_Mig_Exemp / comb.Exemptions, 4)
comb['Inmig_Exemp_pct_chg'] = abs(comb.Inmig_Exemp.pct_change())
comb['Outmig_Exemp_pct_chg'] = abs(comb.Outmig_Exemp.pct_change())

In [29]:
comb.head()

Unnamed: 0,Year,Non_Mig,Inmig_Exemp,In_Dom_Exemp,Outmig_Exemp,Out_Dom_Exemp,Zero,Population,Pop_Exemp_Mult,Net_Mig_Dom_Exemp,Rel_Mig_Dom_Exemp,Inmig_Exemp_pct_chg,Outmig_Exemp_pct_chg
0,1990,23010999,575795,510523,733500,683293,0,29558000,1.284516,-172770,-0.0075,,
1,1991,23340798,555276,480749,745198,700419,0,30143555,1.291453,-219670,-0.0094,0.035636,0.015948
2,1992,23236233,492967,423060,872565,826534,0,30722998,1.322202,-403474,-0.0174,0.112213,0.170917
3,1993,22841596,469297,415460,920270,877562,0,31150786,1.363774,-462102,-0.0202,0.048015,0.054672
4,1994,22414007,494971,441403,881558,841521,0,31418940,1.401755,-400118,-0.0179,0.054707,0.042066


In [30]:
comb.to_csv('IRS_CA_mig_pop_9017.csv', index=False)

In [285]:
comb.Net_Mig_Dom_Exemp.min()

-462102

In [286]:
comb.Net_Mig_Dom_Exemp.max()

-16616

In [207]:
comb.Inmig_Exemp_pct_chg.mean()*100

8.051697652445382

In [208]:
comb.Outmig_Exemp_pct_chg.mean()*100

10.603041688119243

In [287]:
comb[comb.Year < 2008].Inmig_Exemp_pct_chg.mean()*100

4.1377091399891786

In [210]:
comb[comb.Year < 2008].Outmig_Exemp_pct_chg.mean()*100

6.8624478247063445

In [211]:
comb[comb.Year >= 2008].Inmig_Exemp_pct_chg.mean()*100

14.70547812362093

In [212]:
comb[comb.Year >= 2008].Outmig_Exemp_pct_chg.mean()*100

16.96205125592117

In [302]:
alt.Chart(comb, title='CA Outmig Totals').mark_line(size=5, interpolate='basis').encode(x='Year:O', y='Outmig_Exemp')

In [303]:
alt.Chart(comb, title='CA Inmig Totals').mark_line(size=5, interpolate='basis').encode(x='Year:O', y='Inmig_Exemp')

#### *Inmig is more stable up to 2008 (no severe peaks/valleys)

In [215]:
#comb.to_csv('IRS_CA_mig_9017.csv')
comb = pd.read_csv('IRS_CA_mig_9017.csv')

\*Comparing this IRS state mig data w/ IRS county mig data (irs_mig_91to2018.csv) reveals that the ***net*** migration figures are identical (so can use the cleaned county data for comparisons of different states)- the raw inmig & outmig counts are higher because they include migration between counties in the same state

In [77]:
one = alt.Chart(comb[comb.Year < 1998]
                 ).mark_line(size=5, strokeCap='round'
                            ).encode(x=alt.X('Year:O', axis=alt.Axis(values=list(range(1992,2018,4)))), 
                                     y=alt.Y('Rel_Mig_Exemptions', axis=alt.Axis(format='%', labelPadding=40)))

two = alt.Chart(comb[comb.Year.isin([1997,1998,1999,2000,2001])]
                 ).mark_line(color='red', size=5, strokeCap='round', interpolate='basis'
                            ).encode(x=alt.X('Year:O', axis=alt.Axis(values=list(range(1992,2018,4)))), 
                                     y=alt.Y('Rel_Mig_Exemptions', axis=alt.Axis(format='%', labelPadding=40)))

three = alt.Chart(comb[comb.Year > 2000]
                 ).mark_line(size=5, strokeCap='round'
                            ).encode(x=alt.X('Year:O', axis=alt.Axis(values=list(range(1992,2018,4)))), 
                                     y=alt.Y('Rel_Mig_Exemptions', axis=alt.Axis(format='%', labelPadding=40)))

horizontal = alt.Chart(pd.DataFrame({'y':[0]})).mark_rule(strokeCap='round', color='gray').encode(y='y')

one + two + three + horizontal

In [None]:
#title='With the exception of 1997-2000, California has experienced net negative domestic migration every year',
#title='*Note: Total CA population has steadily increased due to high annual birth rates', 

In [360]:
%run chart_theme.py

In [275]:
line = alt.Chart(comb, title='California Net Domestic Migration'
                 ).mark_line(color='#006aac', size=5, strokeCap='round', interpolate='basis'
                            ).encode(x=alt.X('Year:O', axis=alt.Axis(labelOpacity=.8, labelFontSize=22, values=[1990,2000,2010,2017])), 
                                     y=alt.Y('Net_Mig_Dom_Exemp', 
                                             title='Every year since 1990 more Californians moved out than residents',
                                             axis=alt.Axis(values=list(range(0,-400000,-100000)),
                                                           format='~s', titleFontSize=24, titleOpacity=.8,  titleX=-50, titleY=-50,
                                                           labelOpacity=.8, labelFontSize=22, labelPadding=50),
                                             scale=alt.Scale(domain=[-360000,0])))

title_line2 = alt.Chart(pd.DataFrame({'x':[1994],'y':[30000],'text':['from other states moved in.']})).mark_text(dx=-6, size=24, font='calibri', opacity=.9).encode(x='x:O',y='y',text='text')
#y_label = alt.Chart(pd.DataFrame({'x':[1990],'y':[0],'text':['0']})).mark_text(dx=-50, size=24, opacity=.8, font='calibri').encode(x='x:O',y='y',text='text')
area = alt.Chart(comb).mark_area(color='lightgray', opacity=.3, interpolate='basis').encode(x='Year:O',y='Net_Mig_Dom_Exemp',y2='Zero')
text = alt.Chart(pd.DataFrame({'x':[1993,2005,2016.5],'y':[-350000,-250000,-190000],'text':['\'93 peak', '\'05 peak', 'Next Peak?']})).mark_text(size=22, font='calibri', fontWeight='bold', opacity=.8).encode(x='x:O',y='y',text='text')

chart = (area + line + text + title_line2).properties(height=500)
chart

In [369]:
line = alt.Chart(comb, title='California Net Domestic Migration'
                 ).mark_line(color='#006aac', size=5, strokeCap='round', interpolate='basis'
                            ).encode(x=alt.X('Year:O', axis=alt.Axis(labelOpacity=.8, labelFontSize=22, values=[1990,2000,2010,2017])), 
                                     y=alt.Y('Net_Mig_Dom_Exemp', 
                                             title='Every year since 1990 more Californians moved out than residents',
                                             axis=alt.Axis(values=list(range(0,-500000,-100000)),
                                                           format='~s', titleFontSize=24, titleOpacity=.8,  titleX=-50, titleY=-5,
                                                           labelOpacity=.8, labelFontSize=22, labelPadding=50)))

title_line2 = alt.Chart(pd.DataFrame({'x':[1994],'y':[40000],'text':['from other states moved in.']})).mark_text(dx=-6, size=24, font='calibri', opacity=.9).encode(x='x:O',y='y',text='text')
#y_label = alt.Chart(pd.DataFrame({'x':[1990],'y':[0],'text':['0']})).mark_text(dx=-50, size=24, opacity=.8, font='calibri').encode(x='x:O',y='y',text='text')
area = alt.Chart(comb).mark_area(color='lightgray', opacity=.3, interpolate='basis').encode(x='Year:O',y='Net_Mig_Dom_Exemp',y2='Zero')
text = alt.Chart(pd.DataFrame({'x':[1993,2005,2016.5],'y':[-470000,-335000,-240000],'text':['\'93 low', '\'05 low', 'Next low?']})).mark_text(size=22, font='calibri', fontWeight='bold', opacity=.8).encode(x='x:O',y='y',text='text')

chart = (area + line + text + title_line2).properties(height=450)
chart

In [370]:
chart.save('ca_dom_mig_irs_9017.svg')

In [14]:
inmig = alt.Chart(comb, title='CA Migration (In & Out)').mark_bar().encode(x='Year:O',y=alt.Y('Inmig_Total', axis=alt.Axis(format='~s', labelPadding=40,titleX=-40)))
outmig = alt.Chart(comb).mark_bar(color='#ffc417').encode(x='Year:O',y='Outmig_Total')
outmig + inmig

In [315]:
comb.head(1)

Unnamed: 0,Year,Non_Mig,Inmig_Exemp,In_Dom_Exemp,Outmig_Exemp,Out_Dom_Exemp,Zero,Population,Net_Mig_Dom_Exemp,Rel_Mig_Dom_Exemp,Inmig_Exemp_pct_chg,Outmig_Exemp_pct_chg
0,1990,23010999,575795,510523,733500,683293,0,29558000,-172770,-0.0075,,


In [373]:
inmig = alt.Chart(comb, title='California Domestic Migration Trends'
                 ).mark_line(size=5, strokeCap='round', interpolate='basis', color='#8ac3ff'
                            ).encode(x=alt.X('Year:O', axis=alt.Axis(values=[1990,2000,2010,2017], labelOpacity=.8, labelFontSize=20)),
                                     y=alt.Y('In_Dom_Exemp', title='migration trends were characterized by an inverse',
                                             axis=alt.Axis(values=list(range(200000,1000000,200000)), 
                                                           titleFontSize=22, titleOpacity=.8, titleX=145, titleY=28.5, labelPadding=35, labelOpacity=0, labelFontSize=20)))
outmig = alt.Chart(comb).mark_line(size=5, strokeCap='round', interpolate='basis',color='#ffc417').encode(x='Year:O',y='Out_Dom_Exemp')
area = alt.Chart(comb).mark_area(interpolate='basis', color='lightgray', opacity=.15).encode(x='Year:O', y='In_Dom_Exemp', y2='Out_Dom_Exemp')

title_line2 = alt.Chart(pd.DataFrame({'x':[2002],'y':[1100000],'text':[' relationship from 1990 to 2007 but have since shifted to a direct one.']})).mark_text(dx=9, size=22, font='calibri', opacity=.9).encode(x='x:O',y='y',text='text')

pattern_text = alt.Chart(pd.DataFrame({'x':[1999,2013], 'y':[330000]*2, 
                               'text':['Inverse Relationship', 'Direct Relationship']})).mark_text(font='calibri', size=24, opacity=.8).encode(x='x:O',y='y',text='text')
line_df = pd.DataFrame({'x':[1999,1990,1990,2007,2013,2007.1,2007.1,2017], 
                        'x2':[1999,1990,2007,2007,2013,2007.1,2017,2017], 
                        'y':[400000,400000,400000,430000,400000,430000,400000,400000], 
                        'y2':[370000,430000,400000,400000,370000,400000,400000,430000]})

lines = alt.Chart(line_df).mark_rule(opacity=.5).encode(x='x:O',x2='x2',y='y',y2='y2')
y_line = line = alt.Chart(pd.DataFrame({'x':[2004],'y':[1000000],'text':['_'*90]})).mark_text(dx=13, dy=-4, opacity=.09, fontSize=11.5, fontWeight='bold').encode(x='x:O', y='y', text='text')
y_label_mil = alt.Chart(pd.DataFrame({'x':[1990],'y':[1000000],'text':['1 million']})).mark_text(font='calibri', size=20, opacity=.8, dx=-15).encode(x='x:O',y='y',text='text')
y_label_mig = alt.Chart(pd.DataFrame({'x':[1990],'y':[950000],'text':['migrants']})).mark_text(font='calibri', size=20, opacity=.8, dx=-14).encode(x='x:O',y='y',text='text')
y_labels = alt.Chart(pd.DataFrame({'x':[1990]*4,'y':list(range(200000,1000000,200000)),'text':[str(i) +'k' for i in range(200,1000,200)]})).mark_text(font='calibri', size=20, opacity=.8, dx=-28).encode(x='x:O',y='y',text='text')
#horizontal_dash = alt.Chart(pd.DataFrame({'x':[1990],'x2':[2007.1],'y':[565000]})).mark_rule(opacity=.5, strokeDash=[5,5]).encode(x='x:O',x2='x2',y='y')
#white_box1 = alt.Chart(pd.DataFrame({'x':[1990],'x2':[1996],'y':[1200000],'y2':[1150000]})).mark_rect(color='black').encode(x='x:O',y='y',y2='y2')
label_inward = alt.Chart(pd.DataFrame(pd.DataFrame({'x':[1990],'y':[1160000],'text':['Inward']}))).mark_text(color='#8ac3ff', size=22, font='calibri', dx=-20).encode(x='x:O',y='y',text='text')
label_and = alt.Chart(pd.DataFrame(pd.DataFrame({'x':[1991],'y':[1160000],'text':['and']}))).mark_text(size=22, font='calibri', dx=12, opacity=.8).encode(x='x:O',y='y',text='text')
label_outward = alt.Chart(pd.DataFrame(pd.DataFrame({'x':[1993],'y':[1160000],'text':['outward']}))).mark_text(color='#ffc417', size=22, font='calibri', dx=30).encode(x='x:O',y='y',text='text')

in_out = (area + outmig + inmig + title_line2 + pattern_text + lines + y_label_mil + y_label_mig + y_line + y_labels + label_inward + label_and + label_outward).properties(height=500)
in_out

In [374]:
in_out.save('CA_in_out_mig_9017.svg')

In [357]:
inmig = alt.Chart(comb, title='California Inward and Outward Migration'
                 ).mark_line(size=5, strokeCap='round', interpolate='basis', color='#8ac3ff'
                            ).encode(x=alt.X('Year:O', axis=alt.Axis(values=[1990,2000,2010,2017], labelOpacity=.8, labelFontSize=20)),
                                     y=alt.Y('In_Dom_Exemp', title='migration totals based on IRS tax filing data',
                                             axis=alt.Axis(values=list(range(200000,1000000,200000)), 
                                                           titleFontSize=22, titleOpacity=.8, titleX=148, titleY=20, labelPadding=35, labelOpacity=0, labelFontSize=20)))
outmig = alt.Chart(comb).mark_line(size=5, strokeCap='round', interpolate='basis',color='#ffc417').encode(x='Year:O',y='Out_Dom_Exemp')
area = alt.Chart(comb).mark_area(interpolate='basis', color='lightgray', opacity=.15).encode(x='Year:O', y='In_Dom_Exemp', y2='Out_Dom_Exemp')
#title_line2 = alt.Chart(pd.DataFrame({'x':[2002],'y':[1100000],'text':[' relationship from 1990 to 2007 but have since shifted to a direct one.']})).mark_text(dx=0, size=22, font='calibri', opacity=.9).encode(x='x:O',y='y',text='text')

y_line = line = alt.Chart(pd.DataFrame({'x':[2004],'y':[1000000],'text':['_'*89]})).mark_text(dx=10, dy=-4, opacity=.09, fontSize=11.5, fontWeight='bold').encode(x='x:O', y='y', text='text')
y_label_mil = alt.Chart(pd.DataFrame({'x':[1990],'y':[1000000],'text':['1 million']})).mark_text(font='calibri', size=20, opacity=.8, dx=-15).encode(x='x:O',y='y',text='text')
y_label_mig = alt.Chart(pd.DataFrame({'x':[1990],'y':[950000],'text':['migrants']})).mark_text(font='calibri', size=20, opacity=.8, dx=-14).encode(x='x:O',y='y',text='text')
y_labels = alt.Chart(pd.DataFrame({'x':[1990]*4,'y':list(range(200000,1000000,200000)),'text':[str(i) +'k' for i in range(200,1000,200)]})).mark_text(font='calibri', size=20, opacity=.8, dx=-28).encode(x='x:O',y='y',text='text')
label_inward = alt.Chart(pd.DataFrame(pd.DataFrame({'x':[1990],'y':[1080000],'text':['Inward']}))).mark_text(color='#8ac3ff', size=22, font='calibri', dx=-20).encode(x='x:O',y='y',text='text')
label_and = alt.Chart(pd.DataFrame(pd.DataFrame({'x':[1991],'y':[1080000],'text':['and']}))).mark_text(size=22, font='calibri', dx=12, opacity=.8).encode(x='x:O',y='y',text='text')
label_outward = alt.Chart(pd.DataFrame(pd.DataFrame({'x':[1993],'y':[1080000],'text':['outward']}))).mark_text(color='#ffc417', size=22, font='calibri', dx=30).encode(x='x:O',y='y',text='text')

in_out = (area + outmig + inmig +  y_label_mil + y_label_mig + y_line + y_labels + label_inward + label_and + label_outward).properties(height=500)
in_out

- Migration to and from CA 

In [332]:
in_out.save('CA_in_out_mig_9017.png', scale_factor=3)

In [380]:
inmig = alt.Chart(comb, title='CA Migration (In & Out)').mark_area(size=5, strokeCap='round', interpolate='basis').encode(x='Year:O',y=alt.Y('Inmig_Total', axis=alt.Axis(format='~s', labelPadding=40,titleX=-40)))
outmig = alt.Chart(comb).mark_area(size=5, strokeCap='round', interpolate='basis',color='#ffc417').encode(x='Year:O',y='Outmig_Total')
outmig + inmig

In [240]:
chart = alt.Chart(comb, title='California Net Migration').mark_bar().encode(x=alt.X('Year:O', axis=alt.Axis(labelOpacity=.8, values=list(range(1992,2018,4)))), 
                                  y=alt.Y('Rel_Mig', axis=alt.Axis(labelPadding=45, labelOpacity=.8, format='%')),
                                  color=alt.condition(alt.datum.Rel_Mig > 0, alt.value('#efcd55'), alt.value('#5688c4')))

text = alt.Chart(pd.DataFrame({'x':[1995,2005,2016],'y':[-.0128,-.0065,-.0054],'text':['1993 peak', '2005 peak', '2016 peak?']})).mark_text(size=16, font='arial').encode(x='x:O',y='y',text='text')
chart + text

In [382]:
chart = alt.Chart(comb, title='California Net Migration').mark_bar().encode(x=alt.X('Year:O', axis=alt.Axis(labelOpacity=.8, values=list(range(1992,2018,4)))), 
                                  y=alt.Y('Net_Mig_Total', axis=alt.Axis(labelPadding=45, labelOpacity=.8, format='%')),
                                  color=alt.condition(alt.datum.Rel_Mig > 0, alt.value('#efcd55'), alt.value('#5688c4')))

text = alt.Chart(pd.DataFrame({'x':[1995,2005,2016],'y':[-.0128,-.0065,-.0054],'text':['1993 peak', '2005 peak', '2016 peak?']})).mark_text(size=16, font='arial').encode(x='x:O',y='y',text='text')
chart + text

In [106]:
alt.Chart(comb).mark_bar(size=20).encode(x=alt.X('Year:O', axis=alt.Axis(values=list(range(1992,2018,4)))), 
                                  y=alt.Y('Net_Mig', axis=alt.Axis(labelPadding=60)),
                                  color=alt.condition(alt.datum.Net_Mig > 0, alt.value('#7eadeb'), alt.value('#e6cd85')))