In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [116]:
def households(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    
    # Get average household and family size and total households
    average_household_size=  float(df.iloc[-2:]['Estimate'][14])
    average_family_size= float(df.iloc[-2:]['Estimate'][15])
    total_households = df.iloc[0]['Estimate']
    total_households = int(total_households.replace(',',''))

    # Convert percentage estimates to float
    df['Percentage'] = df['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else None)

    # Move Total Household Population from row to column
    df['Total Household Population'] = int(df.iloc[0]['Estimate'].replace(',',''))
    df.dropna(inplace=True)

    # Drop unnecessary columns
    df.drop(columns=['Estimate','Margin of Error'], inplace=True)

    # Calculate population estimates
    df['Population Estimate'] = df['Percentage']*df['Total Household Population']

    # Drop unnecessary columns
    df.drop(columns=['Percentage','Total Household Population'], inplace=True)

    

    # Change index and transpose table
    df.set_index('Label', inplace=True)
    df = df.T

    # Add average household and family size to table. Also add total household count
    
    df.insert(0, 'Average Household Size', average_household_size)
    df.insert(1, 'Average Family Size', average_family_size)
    df.insert(2, 'Total Household Count', total_households)

    #  Rename and drop columns
    try:
        df.drop(columns=['With own children under 18 years','Male householder','Female householder'], inplace=True)
    except:
        df.drop(columns=['With own children of the householder under 18 years','Male householder','Female householder'], inplace=True)
    df.rename(columns={'Family households': 'Multi Generational Family Households', 'Married-couple family': 'Single Generation Married Couple', 'Female householder, no husband present, family': 'Multi Generational Divorced Family Households','Nonfamily households': 'Non Family Households', 'Female householder, no spouse present, family': 'Multi Generational Divorced Family Households'}, inplace=True)
    


    # Add new columns
    df['Non Family Households (Living Alone)'] = df.iloc[:,7] + df.iloc[:,9]
    df['Non Family Households (Not Living Alone)'] = df.iloc[:,8] + df.iloc[:,10]

    # Drop unnecessary columns
    df.drop(columns=['Living alone','Not living alone'], inplace=True)


    # Change index to year
    df.index = [year]

    return df


In [117]:
def concat_data():
    years = [2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022]
    df_list=[]
    for year in range(len(years)):
        try:
            df_list.append(households(f'data/{years[year]}/HOUSEHOLDS_BY_TYPE.CSV',years[year]))
        except: 
            continue
    df= pd.concat(df_list)

    return df
    

In [118]:
HOUSEHOLDS_BY_TYPE = concat_data()
HOUSEHOLDS_BY_TYPE

Label,Average Household Size,Average Family Size,Total Household Count,Multi Generational Family Households,Single Generation Married Couple,Multi Generational Divorced Family Households,Non Family Households,Non Family Households (Living Alone),Non Family Households (Not Living Alone)
2010,2.81,3.4,24310,17211.48,13394.81,3233.23,7098.52,6028.88,1093.95
2011,3.09,3.82,22701,15096.165,11827.221,2519.811,7604.835,5788.755,1816.08
2012,2.97,3.48,23724,17176.176,12407.652,3819.564,6547.824,4910.868,1613.232
2013,2.86,3.38,26959,18844.341,13182.951,3936.014,8114.659,5957.939,2129.761
2014,3.17,3.82,22226,16002.72,12624.368,2756.024,6223.28,5178.658,1066.848
2015,2.85,3.48,25286,17523.198,13730.298,2983.748,7762.802,6144.498,1618.304
2016,2.97,3.52,23580,16317.36,12143.7,3419.1,7262.64,5305.5,1933.56
2017,2.99,3.55,26139,18192.744,12311.469,4339.074,7946.256,5855.136,2091.12
2018,2.78,3.33,28119,19120.92,14425.047,3289.923,8998.08,7226.583,1771.497
2019,2.93,3.54,26642,18303.054,14519.89,2371.138,8338.946,6394.08,1944.866
