In [1]:
import pandas as pd
years_to_include = ("2012","2013","2014","2015","2016","2017","2018")

In [66]:
def unemployment_rate(year):
    """
    Function returns average unemployment rate for each county for each two-digit year passed as a string
    """
    
    # import US BLS labor stats; ignore noise rows at file start and improve column names
    df = pd.read_excel('./data/labor_force_xls/laucnty' + year + '.xlsx', header=4, usecols=["County Name/State Abbreviation","(%)"])
    df.columns = ("County","Percent Unemployed")

    # find only CT counties
    ct_mask = df["County"].str[-2:] == "CT"
    df = df[ct_mask]

    # strip to county name and set index
    df["County"] = df["County"].str[:-11]
    return df.set_index("County")
    

In [69]:
def create_composite(years):
    
    """
    Function returns a composite of population, economic, and unemployment data for each county by year for desired list of years
    """
    
    
    # load population data for all counties
    census_CT = pd.read_csv('data/census_CT.csv')
    census_CT.loc[0,"County"] = "Connecticut"
    census_CT.set_index("County", inplace=True)
    
    # create new DataFrame to fill
    composite = pd.DataFrame()
    
    for year in years:
            
        # load economic indicators for given year
        econ = pd.read_csv('data/income_poverty_csv/est' + year[-2:] + '-ct.csv')
        econ.set_index('Jurisdiction', inplace=True)

        # get unemployment rates for given year
        unemp = unemployment_rate(year[-2:])

        # join unemployment and population data on jurisdiction/county name
        econ = econ.join(unemp)
        econ = econ.join(census_CT[year])
        
        # relabel population column
        econ.rename(columns = {
            year : "Population"
        }, inplace=True)
        
        # add new column "Year" with current year; reorder columns
        econ["Year"] = year
        econ = econ[list(econ.columns)[-1:]+list(econ.columns)[0:-1]]
        
        # concatenate current year to composite
        composite = pd.concat([composite, econ])
        
    # return composite with numbered index
    return composite.reset_index()
                

In [70]:
CT_econ_2012_through_2018 = create_composite(years_to_include)
CT_econ_2012_through_2018

Unnamed: 0,Jurisdiction,Year,MedianHouseholdIncome,%inPoverty,TotalInPoverty,%MinorsInPoverty,TotalMinorsInPoverty,Percent Unemployed,Population
0,Connecticut,2012,67275,10.6,370537,14.6,114613,,3594547
1,Fairfield,2012,79536,8.9,81756,11.1,24854,7.8,935099
2,Hartford,2012,63374,12.2,106357,16.7,32889,8.5,897706
3,Litchfield,2012,67746,7.0,12975,8.6,3272,7.6,187570
4,Middlesex,2012,74588,6.4,10376,7.5,2470,7.2,165634
...,...,...,...,...,...,...,...,...,...
58,Middlesex,2018,88709,6.7,10556,8.1,2314,3.5,162870
59,New Haven,2018,67720,11.6,96563,16.7,28530,4.4,856971
60,New London,2018,67790,9.8,25063,12.5,6388,4.0,266285
61,Tolland,2018,89584,8.0,10835,6.4,1652,3.6,150689
