In [108]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [109]:
def housing_tenure(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    # Get  total housing units
    total_housing_units = df.iloc[0]['Estimate']
    total_housing_units = int(total_housing_units.replace(',',''))

    # Convert percentage estimates to float
    df['Percentage'] = df['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else None)

    # Move Total Housing Units from row to column
    df['Total Housing Units'] = total_housing_units
    df.dropna(inplace=True)

    # Drop unnecessary columns
    df.drop(columns=['Estimate','Margin of Error'], inplace=True)

    # Calculate population estimates
    df['Population Estimate'] = df['Percentage']*df['Total Housing Units']

    # Drop unnecessary columns
    df.drop(columns=['Percentage','Total Housing Units'], inplace=True)

    

    # Change index and transpose table
    df.set_index('Label', inplace=True)
    df = df.T

    # add total household units count
    
    df.insert(0, 'Total Household Units', total_housing_units)



    # Change index to year
    df.index = [year]
    df.rename(columns={'Renter-occupied housing units':'Renter-Occupied Housing Units'},inplace=True)
    
    return df

In [110]:
def ownership_characteristics(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    
    # Get owner occupied housing units, median value, and median monthly costs
    owner_occupied_units=  float(df.iloc[0]['Estimate'].replace(',',''))
    median_value_dollars=  float(df.iloc[1]['Estimate'].replace(',',''))
    with_mortgage=  float(df.iloc[2]['Estimate'].replace(',',''))
    without_mortgage=  float(df.iloc[3]['Estimate'].replace(',',''))
    

    # Change index and transpose table
    df.set_index('Label', inplace=True)
    df = df.T
    df=df[0:1]
    df['Year']=[year]
    df.reset_index(inplace=True)
    df.set_index('Year', inplace=True)
    df.rename(columns={'Owner-occupied housing units':'Owner-Occupied Housing Units', 'Median value (dollars)':'Median Value (Dollars)','Median selected monthly owner costs with a mortgage (dollars)': 'Median Monthly Owner Costs with Mortgage (dollars)','Median selected monthly owner costs without a mortgage (dollars)': 'Median Monthly Owner Costs without Mortgage (dollars)'},inplace=True)


    # Drop unnecessary columns
    df.drop(columns=['index'], inplace=True)


    return df

In [111]:
def combine_frames(df_housing_tenure, df_ownership_characteristics):
    # Combine dataframes
    df_household = pd.concat([df_housing_tenure, df_ownership_characteristics], axis=1)
    return df_household

In [112]:
def concat_data():
    years = [2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022]
    df_list=[]
    for year in range(len(years)):
        try:
            housing_tenures=housing_tenure(f'data/{years[year]}/HOUSING_TENURE.CSV',years[year])
            ownership=ownership_characteristics(f'data/{years[year]}/OWNER_CHARACTERISTICS.CSV',years[year])
            df= combine_frames(housing_tenures,ownership)
            df=df.drop(df.columns[1], axis=1)
            df_list.append(df)
        except:
            continue
    df= pd.concat(df_list)
    df.reset_index(inplace=True)
    df.rename(columns={'index':'Year'}, inplace=True)
    return df

In [113]:
TENURE_AND_OWNERSHIP = concat_data()
TENURE_AND_OWNERSHIP

Label,Year,Total Household Units,Renter-Occupied Housing Units,Owner-Occupied Housing Units,Median Value (Dollars),Median Monthly Owner Costs with Mortgage (dollars),Median Monthly Owner Costs without Mortgage (dollars)
0,2010,24310,7900.75,16419,263800,2189,645
1,2011,22701,9352.812,13358,270300,2238,713
2,2012,23724,8991.396,14724,244700,2130,819
3,2013,26959,11915.878,15049,230800,2112,665
4,2014,22226,7579.066,14641,257700,2228,741
5,2015,25286,10392.546,14902,267200,2035,853
6,2016,23580,9974.34,13595,279700,2048,757
7,2017,26139,10769.268,15362,284200,2056,804
8,2018,28119,9841.65,18285,296200,2072,797
9,2019,26642,10203.886,16448,286000,2054,837


In [114]:
TENURE_AND_OWNERSHIP.to_csv('data/final/HOUSING_TENURE_AND_OWNERSHIP.CSV', index=False)
