# Business dynamics

> How different slices of economy evolve over time.

In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import fastparquet

from rurec import resources, rurality, ers_codes
#%matplotlib inline

# Rurality

Business dynamics in rural areas, based on different definitions of rurality.

In [None]:
df = rurality.get_df(cols=['YEAR', 'STATE', 'EMPLOYEES', 'RURAL_OUTSIDE_UA', 'UI_CODE', 'RUC_CODE', 'RUCA_CODE', 'RURAL_HRSA', 'FAR_LEVEL'], states=['WI', 'CT'])
df['STATE'].cat.remove_unused_categories(inplace=True)

df['RURAL_OUTSIDE_UA'].replace({'0': False, '1': True}, inplace=True)
df['RURAL_UI'] = df['UI_CODE'].isin(['6', '7', '8', '9', '10', '11', '12'])
df['RURAL_RUC'] = df['RUC_CODE'].isin(['5', '7', '9', '10'])
df['RURAL_RUCA'] = df['RUCA_CODE'].str[:1].isin(['7', '8', '9']) | (df['RUCA_CODE'].str[:2] == '10')
df['RURAL_FAR'] = (df['FAR_LEVEL'] > 0)

In [None]:
tab = {}
for rural_col in ['RURAL_OUTSIDE_UA', 'RURAL_HRSA', 'RURAL_UI', 'RURAL_RUC', 'RURAL_RUCA', 'RURAL_FAR']:
    x = df.groupby(['YEAR', 'STATE', rural_col])['EMPLOYEES'].agg(['size', 'sum']).stack().unstack(rural_col)
    x = x[True] / x.sum(1)
    x = x.unstack()
    x.rename(columns={'size': 'estab', 'sum': 'emp'}, inplace=True)
    tab[rural_col] = x
tab = pd.concat(tab, 1).fillna(0)

## Share of rural establishments

In [None]:
idx = pd.IndexSlice
t = tab.loc[:, idx[:, 'estab']]
t.columns = t.columns.droplevel(1)
t = t.unstack()
t.style.format('{:.1%}')

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
for i, rur in enumerate(t.columns.levels[0]):
    for st in ['WI', 'CT']:
        l = 'solid' if st == 'WI' else 'dashed'
        y = t[(rur, st)]
        ax.plot(y, color=f'C{i}', linestyle=l, label=f'{rur}, {st}')
        
ax.set_xticks(t.index)
fig.legend(loc='lower center', ncol=len(t.columns.levels[0]));

## Share of rural employment

In [None]:
idx = pd.IndexSlice
t = tab.loc[:, idx[:, 'emp']]
t.columns = t.columns.droplevel(1)
t = t.unstack()
t.style.format('{:.1%}')

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
for i, rur in enumerate(t.columns.levels[0]):
    for st in ['WI', 'CT']:
        l = 'solid' if st == 'WI' else 'dashed'
        y = t[(rur, st)]
        ax.plot(y, color=f'C{i}', linestyle=l, label=f'{rur}, {st}')
        
ax.set_xticks(t.index)
fig.legend(loc='lower center', ncol=len(t.columns.levels[0]));

# FAI

This section is old code that used InfoGroup stored in BigQuery.

### NAICS codes of FAI industries

In [None]:
%cd ..

In [None]:
with open('data/fai.json') as f:
    fai_naics = json.load(f)
with open('data/fai_subsectors.json') as f:
    fai_subsectors = json.load(f)

fai_subs_codes = []
for x in fai_subsectors.values():
    fai_subs_codes += x.keys()
    
# "fai.json" has 168 codes and includes farming and 12 other industries, "fai_subsectors.json" has 100 codes
fai_naics_narrow = fai_subs_codes

# Tom's 4-digit codes
fai_naics_broad = [str(x) for x in [2362,2382,3111,3112,3114,3115,3116,3117,3119,3222,3251,3253,3261,3272,3332,3333,
          3339,3352,3369,3371,4234,4238,4241,4244,4246,4249,4451,4452,4461,4471,4529,4543,
          5413,5417,6242,7223,7225,8113,9231,9261,1151,1152,2371,2379,3219,3254,3322,3323,
          3324,3326,3331,3399,4245,4442,4842,4931,5222,5324,5419,7121,8129,8134,8139]]

In [None]:
query = '''
SELECT DISTINCT
  substr(naics, 1, 6) as naics, 
  naics_desc
FROM
  `original.data`
WHERE
  year > 2002
ORDER BY
  naics
'''

naics_desc = pd.read_gbq(query, dialect='standard', project_id='info-group-162919').dropna()

In [None]:
df = naics_desc
df['naics4'] = df['naics'].str[:4]
df['fai_narrow'] = df['naics'].isin(fai_naics_narrow)
df['fai_broad'] = df['naics4'].isin(fai_naics_broad)
df = df[df['fai_narrow'] | df['fai_broad']]
df = df[['naics', 'naics4', 'naics_desc', 'fai_narrow', 'fai_broad']]
df.shape

In [None]:
# 6-digit codes
df1 = df.groupby(['fai_broad', 'fai_narrow']).size().unstack()
df1, df1.sum(), df1.sum(1)

### Load InfoGroup employment data

In [None]:
query = '''
SELECT
  year,
  naics,
  cbsa_level,
  count(*) as est,
  sum(employees) as emp
FROM
  (select employees, year, cbsa_level, substr(naics, 1, 6) as naics from `original.data`)
WHERE
  year > 2002
GROUP BY
  year,
  naics,
  cbsa_level
ORDER BY
  year,
  naics
'''

df = pd.read_gbq(query, dialect='standard', project_id='info-group-162919')
# df = df[df.naics.notnull()]
# df = df[df.naics.str.startswith('1151') | ~df.naics.str.startswith('11')]
df_by_year_naics = df

In [None]:
# missing NAICS share is negligible
df['naics_na'] = (df['naics'] == '')
df1 = df.groupby('naics_na')['emp'].sum()
df1 / df1.sum()

In [None]:
# farm employment share is negligible
df['naics'] = df['naics'].fillna('')
df['farm'] = df.naics.str.startswith('11') & ~df.naics.str.startswith('1151')
df1 = df.groupby('farm')['emp'].sum()
df1 / df1.sum()

### Industry classification: FAI and subsectors

In [None]:
df = df_by_year_naics
df['cbsa'] = df['cbsa_level'].replace([None, '1', '2'], ['rural', 'micro', 'metro'])
df['fai_narrow'] = df.naics.isin(fai_naics_narrow)
df['fai_broad'] = df.naics.str[:4].isin(fai_naics_broad)
df['subsector'] = None
for subsector, codes in fai_subsectors.items():
    df.loc[df.naics.isin(codes), 'subsector'] = subsector
df.subsector = df.subsector.astype('category')

In [None]:
df1 = {}
df1['total'] = df.groupby(['year', 'cbsa'])['est'].sum().unstack()
df1['fai_narrow'] = df[df['fai_narrow']].groupby(['year', 'cbsa'])['est'].sum().unstack() / df1['total']
df1['fai_broad'] = df[df['fai_broad']].groupby(['year', 'cbsa'])['est'].sum().unstack() / df1['total']
pd.options.display.precision = 3
df1 = pd.concat(df1, 1)
print(df1.to_string())

In [None]:
df1 = {}
df1['total'] = df.groupby(['year', 'cbsa'])['emp'].sum().unstack()
df1['fai_narrow'] = df[df['fai_narrow']].groupby(['year', 'cbsa'])['emp'].sum().unstack() / df1['total']
df1['fai_broad'] = df[df['fai_broad']].groupby(['year', 'cbsa'])['emp'].sum().unstack() / df1['total']
df1['total'] = (df1['total'] / 1000).astype('int')
pd.options.display.precision = 3
df1 = pd.concat(df1, 1)
print(df1.to_string())

### Space classification: rural, micropolitan, metropolitan

In [None]:
area_df = {
    'Rural': df[df.cbsa_level.isnull()],
    'Micropolitan': df[df.cbsa_level == '1'],
    'Metropolitan': df[df.cbsa_level == '2'] 
}

### Plot employment share in FAI

In [None]:
fig, ax = plt.subplots()
for area, adf in area_df.items():
    share = adf[adf['fai']].groupby('year')['emp'].sum() / adf.groupby('year')['emp'].sum()
    years = share.index
    ax.plot(years, share, label=area, alpha=0.7)
    ax.set_ylabel('Employment share')
    ax.set_xlim(years.min(), years.max())
lgd = ax.legend(loc='upper right');
#fig.savefig('fig/fai_dynamics.png', bbox_extra_artists=(lgd,), bbox_inches='tight')

In [None]:
fig, ax = plt.subplots()
for area, adf in area_df.items():
    share = adf[adf['fai_tom']].groupby('year')['emp'].sum() / adf.groupby('year')['emp'].sum()
    years = share.index
    ax.plot(years, share, label=area, alpha=0.7)
    ax.set_ylabel('Employment share')
    ax.set_xlim(years.min(), years.max())
lgd = ax.legend(loc='upper right');
#fig.savefig('fig/fai_dynamics.png', bbox_extra_artists=(lgd,), bbox_inches='tight')

### Plot breakdown of FAI employment

In [None]:
def fai_emp_breakdown(df):
    _df = df.groupby(['year', 'subsector'])['emp'].sum().unstack()
    _sum = _df.sum(axis=1)
    return _df.apply(lambda col: col / _sum)

In [None]:
colors = ['b','g','r','c','m','y','k']
fig, axes = plt.subplots(ncols=3, figsize=(17, 5))
axes[0].set_ylabel('Share of FAI')
lines = []
for (area, adf), ax in zip(area_df.items(), axes):
    shares = fai_emp_breakdown(adf)
    years = shares.index
    cum_share1 = np.zeros(len(shares))
    for subsector, color in zip(fai_subsectors, colors):
        cum_share0 = cum_share1.copy()
        share = shares[subsector]
        cum_share1 += share
        line = ax.plot(years, cum_share1, color, alpha=0.5, label=subsector)
        lines.append(line[0])
        ax.fill_between(years, cum_share0, cum_share1, facecolor=color, alpha=0.3)
    ax.set_title(area)
    ax.set_ylim(-0.0001,1)
    ax.set_xlim(years.min(), years.max())
lines = lines[:len(fai_subsectors)]
lgd = fig.legend(handles=lines, ncol=len(lines), loc='upper center', bbox_to_anchor=(0.44, 0.08))
fig.savefig('fig/fai_breakdown.png', bbox_extra_artists=(lgd,), bbox_inches='tight')