In [None]:
# Data Source
# https://apps.bea.gov/regional/downloadzip.cfm
# The files are from the GDP section (CAGDP1: GDP Summary by County and MSA) and the 
# Personal Income section (CAINC30: Economic Profile By County)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt


In [None]:
earnings_df = pd.read_csv('/work/assets/CAINC30_CA_1969_2020.csv')
county_gdp = pd.read_csv('/work/assets/CAGDP1_CA_2001_2019.csv')

county_gdp = county_gdp.iloc[3:-4,:] #Get rid of unnecessary rows (CA totals and footer)
county_gdp['GeoFIPS'] = county_gdp.GeoFIPS.apply(lambda s:s.replace('"', "")) #get rid of quotes
county_gdp["county_code"]=county_gdp['GeoFIPS'].str[-3:] #Isolate County Code



earnings_df = earnings_df.iloc[31:-4,:] #Get rid of unnecessary rows (CA totals and footer)
earnings_df['GeoFIPS'] = earnings_df.GeoFIPS.apply(lambda s:s.replace('"', "")) #get rid of quotes
earnings_df["county_code"]=earnings_df['GeoFIPS'].str[-3:] #Isolate County Code


In [None]:
earnings_df['Description'].unique()

array(['Personal income (thousands of dollars)',
       ' Net earnings by place of residence',
       ' Personal current transfer receipts',
       '  Income maintenance benefits 1/',
       '  Unemployment insurance compensation', '  Retirement and other',
       ' Dividends, interest, and rent 2/', ' Population (persons) 3/',
       ' Per capita personal income 4/', ' Per capita net earnings 4/',
       ' Per capita personal current transfer receipts 4/',
       '  Per capita income maintenance benefits 4/',
       '  Per capita unemployment insurance compensation 4/',
       '  Per capita retirement and other 4/',
       ' Per capita dividends, interest, and rent 4/',
       ' Earnings by place of work', '  Wages and salaries',
       '  Supplements to wages and salaries',
       '   Employer contributions for employee pension and insurance funds 5/',
       '   Employer contributions for government social insurance',
       "  Proprietors' income", "   Farm proprietors' income",
  

In [None]:
county_gdp['Description'].unique()
county_gdp.head()

Unnamed: 0,GeoFIPS,GeoName,Region,TableName,LineCode,IndustryClassification,Description,Unit,2001,2002,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,county_code
3,6001,"Alameda, CA",8.0,CAGDP1,1.0,...,Real GDP (thousands of chained 2012 dollars),Thousands of chained 2012 dollars,81266600.0,82555390.0,...,94801980.0,97203584.0,100766100.0,105506400.0,113194400.0,117516300.0,122983900.0,127165200.0,129708800.0,1
4,6001,"Alameda, CA",8.0,CAGDP1,2.0,...,Chain-type quantity indexes for real GDP,Quantity index,83.605,84.93,...,97.529,100.0,103.665,108.542,116.451,120.897,126.522,130.824,133.44,1
5,6001,"Alameda, CA",8.0,CAGDP1,3.0,...,Current-dollar GDP (thousands of current dollars),Thousands of dollars,66992940.0,69145650.0,...,92823600.0,97203584.0,102651300.0,109296300.0,119856800.0,126100400.0,133468800.0,140288600.0,146210700.0,1
6,6003,"Alpine, CA",8.0,CAGDP1,1.0,...,Real GDP (thousands of chained 2012 dollars),Thousands of chained 2012 dollars,74997.0,77453.0,...,98962.0,98033.0,97738.0,106196.0,110513.0,108329.0,104377.0,103549.0,114254.0,3
7,6003,"Alpine, CA",8.0,CAGDP1,2.0,...,Chain-type quantity indexes for real GDP,Quantity index,76.502,79.007,...,100.948,100.0,99.699,108.327,112.73,110.503,106.471,105.626,116.546,3


In [None]:
#Reformat GDP data to features with one row per county per year
mapper = {'Real GDP (thousands of chained 2012 dollars)':'real_gdp',
'Chain-type quantity indexes for real GDP':'chain_idx_real_gdp',
'Current-dollar GDP (thousands of current dollars)':'curr_dollar_real_gdp'}

county_gdp["Description"].replace(mapper, inplace=True)
county_gdp = county_gdp[['GeoFIPS','Description','2014','2015','2016','2017','2018','2019']]
county_gdp = county_gdp.melt(id_vars=['GeoFIPS','Description'])
county_gdp = county_gdp.pivot(index=['GeoFIPS','variable'],columns=['Description'],values='value')
county_gdp.reset_index(inplace=True)
county_gdp.rename(columns={'variable':'year'},inplace=True)
county_gdp

Description,GeoFIPS,year,chain_idx_real_gdp,curr_dollar_real_gdp,real_gdp
0,06001,2014,108.542,109296260.0,105506384.0
1,06001,2015,116.451,119856797.0,113194353.0
2,06001,2016,120.897,126100394.0,117516318.0
3,06001,2017,126.522,133468782.0,122983926.0
4,06001,2018,130.824,140288623.0,127165177.0
...,...,...,...,...,...
343,06115,2015,102.252,2967775.0,2787647.0
344,06115,2016,107.910,3160242.0,2941894.0
345,06115,2017,113.501,3392823.0,3094336.0
346,06115,2018,114.059,3485836.0,3109541.0


In [None]:
#Reformat earnings data to features with one row per county per year
mapper = {'Personal income (thousands of dollars)':'personal_income', #take out
       ' Net earnings by place of residence':'net_earn_place_of_residence', #take out
       ' Personal current transfer receipts':'pers_curr_transfer_receipts', # take outSum of the next 3
       '  Income maintenance benefits 1/':'income_maint_benefits',# take out
       '  Unemployment insurance compensation':'unemploy_ins_comp', # out
       '  Retirement and other':'retirement_and_other',# out
       ' Dividends, interest, and rent 2/':'dividends_interest_and_rent', #out
       ' Population (persons) 3/':'bea_number_of_people', # keep
       ' Per capita personal income 4/':'per_capita_personal_income', # keep
       ' Per capita net earnings 4/':'per_capita_net_earnings', # out
       ' Per capita personal current transfer receipts 4/':'per_capita_pers_curr_transfer_receipts', # outSum of the next 3
       '  Per capita income maintenance benefits 4/':'per_capita_inc_maint_benefits', # out
       '  Per capita unemployment insurance compensation 4/':'per_capita_unemploy_ins_comp', #keep
       '  Per capita retirement and other 4/':'per_capita_retirement_and_other', #keep
       ' Per capita dividends, interest, and rent 4/':'per_capita_dividends_interest_and_rent',# out
       ' Earnings by place of work':'earn_by_place_of_work', # outSum of wages and salaries, supplements to wages and salaries, and Proprieter's Income
       '  Wages and salaries':'wages_and_salaries',# out
       '  Supplements to wages and salaries':'supp_wages_and_salaries', # out Sum of next two
       '   Employer contributions for employee pension and insurance funds 5/':'empl_contrib_pension_and_ins',
       '   Employer contributions for government social insurance':'empl_contrib_govt_social_ins',
       "  Proprietors' income":'proprietors_income', # out Sum of Farm and NonFarm
       "   Farm proprietors' income":'farm_proprieters_income', #out
       "   Nonfarm proprietors' income":'non_farm_proprieters_income', #out
       'Total employment (number of jobs)':'total_num_jobs', # keep Sum of wage and salary and Proprietors
       ' Wage and salary employment':'wage_and_salary_num_jobs',
       ' Proprietors employment':'proprietors_num_jobs', #Sum of Farm Proprietors and Nonfarm Proprietors
       '  Farm proprietors employment 6/':'farm_proprietors_num_jobs',# keep
       '  Nonfarm proprietors employment':'nonfarm_proprietors_num_jobs', # keep
       'Average earnings per job (dollars)':'avg_earn_per_job', #out
       ' Average wages and salaries':'avg_wages_and_salaries', # out
       " Average nonfarm proprietors' income": 'avg_nonfarm_proprietors_inc' }#out

earnings_df["Description"].replace(mapper, inplace=True)
earnings_df = earnings_df[['GeoFIPS','Description','2014','2015','2016','2017','2018','2019']]
earnings_df = earnings_df.melt(id_vars=['GeoFIPS','Description'])
earnings_df = earnings_df.pivot(index=['GeoFIPS','variable'],columns=['Description'],values='value')
earnings_df.reset_index(inplace=True)
earnings_df.rename(columns={'variable':'year'},inplace=True)

In [None]:
#Merge two dataframes together
bea_df = county_gdp.merge(earnings_df, how = 'left', on = ['GeoFIPS','year'])

#Convert columns to per capita
bea_df['per_capita_curr_dollar_real_gdp'] = bea_df['curr_dollar_real_gdp']/bea_df['bea_number_of_people']
bea_df['per_capita_num_jobs'] = bea_df['total_num_jobs']/bea_df['bea_number_of_people']
bea_df['per_capita_farm_proprieter_jobs'] = bea_df['farm_proprietors_num_jobs']/bea_df['bea_number_of_people']
bea_df['per_capita_nonfarm_proprieter_jobs'] = bea_df['nonfarm_proprietors_num_jobs']/bea_df['bea_number_of_people']


#Remove the columns we will not be using
bea_df = bea_df.drop(columns={
'real_gdp',
'chain_idx_real_gdp',
'personal_income', 
'net_earn_place_of_residence', 
'pers_curr_transfer_receipts', 
'income_maint_benefits',
'unemploy_ins_comp', 
'retirement_and_other',
'dividends_interest_and_rent',
'per_capita_net_earnings', 
'per_capita_pers_curr_transfer_receipts', 
'per_capita_inc_maint_benefits', 
'per_capita_dividends_interest_and_rent',
'earn_by_place_of_work', 
'wages_and_salaries',
'supp_wages_and_salaries', 
'empl_contrib_pension_and_ins',
'empl_contrib_govt_social_ins',
'proprietors_income', 
'farm_proprieters_income', 
'non_farm_proprieters_income',
'wage_and_salary_num_jobs',
'proprietors_num_jobs', 
'avg_earn_per_job', 
'avg_wages_and_salaries', 
'avg_nonfarm_proprietors_inc',
'curr_dollar_real_gdp',
'farm_proprietors_num_jobs',
'nonfarm_proprietors_num_jobs',
'total_num_jobs'
})






In [None]:
bea_df.columns

Index(['GeoFIPS', 'year', 'bea_number_of_people', 'per_capita_personal_income',
       'per_capita_retirement_and_other', 'per_capita_unemploy_ins_comp',
       'per_capita_curr_dollar_real_gdp', 'per_capita_num_jobs',
       'per_capita_farm_proprieter_jobs',
       'per_capita_nonfarm_proprieter_jobs'],
      dtype='object', name='Description')

In [None]:
#Write out file to be used in project 
bea_df.to_csv('/work/cleaned-csvs/ca_economy.csv',index=False)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=f6c76417-5fde-42f3-8920-755838dec3fa' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>