In [1]:
import pandas as pd
years_to_include = ("2012","2013","2014","2015","2016","2017","2018")

In [94]:
def unemployment_rate(year):
    """
    Function returns average unemployment rate for each county for each two-digit year passed as a string
    """
    
    # import US BLS labor stats; ignore noise rows at file start and improve column names
    df = pd.read_excel('./data/labor_force_xls/laucnty' + year + '.xlsx', header=4, usecols=["County Name/State Abbreviation","(%)"])
    df.columns = ("County","%Unemployed")

    # find only CT counties
    ct_mask = df["County"].str[-2:] == "CT"
    df = df[ct_mask]

    # strip to county name and set index
    df["County"] = df["County"].str[:-11]
    return df.set_index("County")
    

In [141]:
def create_composite(years):
    
    """
    Function returns a composite of population, economic, and unemployment data for each county by year for desired list of years
    """
    
    
    # load population data for all counties
    census_CT = pd.read_csv('data/census_CT.csv')
    census_CT.loc[0,"County"] = "Connecticut"
    census_CT.set_index("County", inplace=True)
    
    # create new DataFrame to fill
    composite = pd.DataFrame()
    
    for year in years:
            
        # load economic indicators for given year
        econ = pd.read_csv('data/income_poverty_csv/est' + year[-2:] + '-ct.csv')
        econ.set_index('Jurisdiction', inplace=True)

        # get unemployment rates for given year
        unemp = unemployment_rate(year[-2:])

        # join unemployment and population data on jurisdiction/county name
        econ = econ.join(unemp)
        econ = econ.join(census_CT[year])
        
        # relabel population column
        econ.rename(columns = {
            year : "Population"
        }, inplace=True)
                
        """
        Subroutine calculates statewide unemployment from weighted county unemp and pop
        """        
        df = econ.query('Jurisdiction != "Connecticut"')
        percentages = list(df["%Unemployed"])
        pops = list(df["Population"])
        
        unemp_sum = 0
        i = 0
        while i < len(pops):
            unemp_sum += pops[i] * percentages[i]
            i += 1
        
        econ.loc["Connecticut","%Unemployed"] = unemp_sum / econ.loc["Connecticut"]["Population"]
        """
        End subroutine
        """
        
        # add state-normalized poverty and unemployment levels
        econ["pov100"] = (econ['%inPoverty'] / econ.loc["Connecticut","%inPoverty"] * 100).astype('int')
        econ["povMinors100"] = (econ['%MinorsInPoverty'] / econ.loc["Connecticut","%MinorsInPoverty"] * 100).astype('int')
        econ["unemp100"] = (econ['%Unemployed'] / econ.loc["Connecticut","%Unemployed"] * 100).astype('int')
        
                
        # add new column "Year" with current year; reorder columns
        econ["Year"] = year
        econ = econ[list(econ.columns)[-1:]+list(econ.columns)[0:-1]]
        
        # concatenate current year to composite
        composite = pd.concat([composite, econ])
        
    # return composite with numbered index
    return composite.reset_index()
                

In [143]:
# create composite and write to CSV
CT_econ_2012_through_2018 = create_composite(years_to_include)
CT_econ_2012_through_2018.to_csv('data/CT_composite.csv')