# Housing Tenure Script
Run all of these code blocks in order to create the HOUSING_TENURE data table.

### Import Statements

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### Function to create row for each year

In [2]:
def housing_tenure(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    # Get total housing units
    total_housing_units = df.iloc[0]['Estimate']
    total_housing_units = int(total_housing_units.replace(',',''))

    # Convert estimates to float
    df['Percentage'] = df['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else float(x.replace(',','')))

    # Move Total Housing Units from row to column
    df['Total Housing Units'] = total_housing_units

    # Drop unnecessary columns
    df.drop(columns=['Estimate','Margin of Error'], inplace=True)

    # Calculate population estimates
    df['Population Estimate'] = df.apply(lambda x: x['Percentage']*x['Total Housing Units'] if x['Percentage']<1 else x['Percentage'], axis=1)

    # Drop unnecessary columns
    df.drop(columns=['Percentage','Total Housing Units'], inplace=True)

    # Change index and transpose table
    df.set_index('Label', inplace=True)
    df = df.T

    # Change index to year
    df.index = [year]

    # Rename all columns with abbreviated underscored naming conventions
    df.rename(columns={'Occupied housing units':'total_occupied_units', 'Owner-occupied housing units':'owner_occupied_units', 'Renter-occupied housing units':'renter_occupied_units', 'Average household size of owner-occupied unit':'avg_hh_size_owner', 'Average household size of renter-occupied unit':'avg_hh_size_renter'},inplace=True)

    # Rearrange columns
    df = df[['owner_occupied_units','avg_hh_size_owner','renter_occupied_units','avg_hh_size_renter','total_occupied_units']]

    return df

### Test that function works

In [3]:
housing_tenure('data/2022/HOUSING_TENURE.CSV',2022)

Label,owner_occupied_units,avg_hh_size_owner,renter_occupied_units,avg_hh_size_renter,total_occupied_units
2022,20569.41,2.94,11620.59,2.26,32190.0


### Function to create single dataframe containing all years

In [4]:
def concat_data():
    years = [2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022]
    df_list=[]
    for year in range(len(years)):
        try:
            df_list.append(housing_tenure(f'data/{years[year]}/HOUSING_TENURE.CSV',years[year]))
        except:
            continue
    df= pd.concat(df_list)
    df.reset_index(inplace=True)
    df.rename(columns={'index':'year'}, inplace=True)
    return df

### Create single dataframe containing all years

In [5]:
HOUSING_TENURE = concat_data()
HOUSING_TENURE

Label,year,owner_occupied_units,avg_hh_size_owner,renter_occupied_units,avg_hh_size_renter,total_occupied_units
0,2010,16409.25,3.02,7900.75,2.36,24310.0
1,2011,13348.188,3.52,9352.812,2.47,22701.0
2,2012,14732.604,3.02,8991.396,2.88,23724.0
3,2013,15043.122,2.98,11915.878,2.7,26959.0
4,2014,14646.934,3.45,7579.066,2.64,22226.0
5,2015,14893.454,3.2,10392.546,2.34,25286.0
6,2016,13605.66,3.27,9974.34,2.57,23580.0
7,2017,15369.732,3.25,10769.268,2.63,26139.0
8,2018,18277.35,2.93,9841.65,2.5,28119.0
9,2019,16438.114,3.25,10203.886,2.42,26642.0


### Save final dataframe as CSV to 'final' folder in 'data' folder

In [6]:
HOUSING_TENURE.to_csv('data/final/HOUSING_TENURE.CSV', index=False)

### Check that dataframe was saved properly as CSV

In [7]:
df = pd.read_csv('data/final/HOUSING_TENURE.CSV')
df

Unnamed: 0,year,owner_occupied_units,avg_hh_size_owner,renter_occupied_units,avg_hh_size_renter,total_occupied_units
0,2010,16409.25,3.02,7900.75,2.36,24310.0
1,2011,13348.188,3.52,9352.812,2.47,22701.0
2,2012,14732.604,3.02,8991.396,2.88,23724.0
3,2013,15043.122,2.98,11915.878,2.7,26959.0
4,2014,14646.934,3.45,7579.066,2.64,22226.0
5,2015,14893.454,3.2,10392.546,2.34,25286.0
6,2016,13605.66,3.27,9974.34,2.57,23580.0
7,2017,15369.732,3.25,10769.268,2.63,26139.0
8,2018,18277.35,2.93,9841.65,2.5,28119.0
9,2019,16438.114,3.25,10203.886,2.42,26642.0
