# Gross Rent Script
Run all of these code blocks in order to create the GROSS_RENT data table.

### Import Statements

In [2]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### Function to create row for each year

In [4]:
def rent(file1,file2,year):
    # Read in files
    gr = pd.read_csv(file1)
    grapi = pd.read_csv(file2)

    # Get rid of spaces in Labels
    gr['Label'] = gr['Label'].apply(lambda x: x.lstrip())
    grapi['Label'] = grapi['Label'].apply(lambda x: x.lstrip())

    # Convert estimates to float
    gr['Estimate'] = gr['Estimate'].apply(lambda x: float(x.replace(',','')))
    grapi['Percentage'] = grapi['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else None)

    # Move Total Occupied Units Paying Rent from row to column
    grapi['Total Occupied Units Paying Rent (GRAPI)'] = int(grapi.iloc[0]['Estimate'].replace(',',''))
    grapi.dropna(inplace=True)

    # Drop unnecessary columns/rows
    grapi.drop(columns=['Estimate','Margin of Error'], inplace=True)
    gr.drop(columns=['Margin of Error'], inplace=True)

    # Calculate population estimates
    grapi['Population Estimate'] = grapi['Percentage']*grapi['Total Occupied Units Paying Rent (GRAPI)']

    # Drop unnecessary columns
    grapi.drop(columns=['Percentage','Total Occupied Units Paying Rent (GRAPI)'], inplace=True)

    # Change index and transpose tables
    gr.set_index('Label', inplace=True)
    gr = gr.T
    grapi.set_index('Label', inplace=True)
    grapi = grapi.T

    # Change index to year
    gr.index = [year]
    grapi.index = [year]

    # Merge dataframes
    df = gr.merge(grapi, left_index=True, right_index=True)

    # Rearrange columns
    df = df[['Median gross rent (dollars)','Less than 30 percent','30 percent or more','Occupied units paying rent']]

    # Rename columns
    df = df.rename(columns={'Median gross rent (dollars)':'med_gross_rent', 'Less than 30 percent': 'hh_rent_lt_30pct_income', '30 percent or more': 'hh_rent_gte_30pct_income', 'Occupied units paying rent': 'occupied_units_rented'})

    return df

### Test that function works

In [5]:
rent('data/2022/GROSS_RENT.CSV','data/2022/GROSS_RENT_AS_A_PERCENTAGE_OF_HOUSEHOLD_INCOME_IN_THE_PAST_12_MONTHS.CSV',2022)

Label,med_gross_rent,hh_rent_lt_30pct_income,hh_rent_gte_30pct_income,occupied_units_rented
2022,1426.0,7303.0,3597.0,11078.0


### Test that function works for each year
Should return only "No file for year '2020'"

In [6]:
for year in range(2010,2023):
    file1 = 'data/'+str(year)+'/GROSS_RENT.CSV'
    file2 = 'data/'+str(year)+'/GROSS_RENT_AS_A_PERCENTAGE_OF_HOUSEHOLD_INCOME_IN_THE_PAST_12_MONTHS.CSV'

    try:
        rent(file1,file2,year)
    except:
        print(f"No file for year '{year}'")

No file for year '2020'


### Run function for each year

In [7]:
GROSS_RENT_2022 = rent('data/2022/GROSS_RENT.CSV','data/2022/GROSS_RENT_AS_A_PERCENTAGE_OF_HOUSEHOLD_INCOME_IN_THE_PAST_12_MONTHS.CSV',2022)
GROSS_RENT_2021 = rent('data/2021/GROSS_RENT.CSV','data/2021/GROSS_RENT_AS_A_PERCENTAGE_OF_HOUSEHOLD_INCOME_IN_THE_PAST_12_MONTHS.CSV',2021)
GROSS_RENT_2019 = rent('data/2019/GROSS_RENT.CSV','data/2019/GROSS_RENT_AS_A_PERCENTAGE_OF_HOUSEHOLD_INCOME_IN_THE_PAST_12_MONTHS.CSV',2019)
GROSS_RENT_2018 = rent('data/2018/GROSS_RENT.CSV','data/2018/GROSS_RENT_AS_A_PERCENTAGE_OF_HOUSEHOLD_INCOME_IN_THE_PAST_12_MONTHS.CSV',2018)
GROSS_RENT_2017 = rent('data/2017/GROSS_RENT.CSV','data/2017/GROSS_RENT_AS_A_PERCENTAGE_OF_HOUSEHOLD_INCOME_IN_THE_PAST_12_MONTHS.CSV',2017)
GROSS_RENT_2016 = rent('data/2016/GROSS_RENT.CSV','data/2016/GROSS_RENT_AS_A_PERCENTAGE_OF_HOUSEHOLD_INCOME_IN_THE_PAST_12_MONTHS.CSV',2016)
GROSS_RENT_2015 = rent('data/2015/GROSS_RENT.CSV','data/2015/GROSS_RENT_AS_A_PERCENTAGE_OF_HOUSEHOLD_INCOME_IN_THE_PAST_12_MONTHS.CSV',2015)
GROSS_RENT_2014 = rent('data/2014/GROSS_RENT.CSV','data/2014/GROSS_RENT_AS_A_PERCENTAGE_OF_HOUSEHOLD_INCOME_IN_THE_PAST_12_MONTHS.CSV',2014)
GROSS_RENT_2013 = rent('data/2013/GROSS_RENT.CSV','data/2013/GROSS_RENT_AS_A_PERCENTAGE_OF_HOUSEHOLD_INCOME_IN_THE_PAST_12_MONTHS.CSV',2013)
GROSS_RENT_2012 = rent('data/2012/GROSS_RENT.CSV','data/2012/GROSS_RENT_AS_A_PERCENTAGE_OF_HOUSEHOLD_INCOME_IN_THE_PAST_12_MONTHS.CSV',2012)
GROSS_RENT_2011 = rent('data/2011/GROSS_RENT.CSV','data/2011/GROSS_RENT_AS_A_PERCENTAGE_OF_HOUSEHOLD_INCOME_IN_THE_PAST_12_MONTHS.CSV',2011)
GROSS_RENT_2010 = rent('data/2010/GROSS_RENT.CSV','data/2010/GROSS_RENT_AS_A_PERCENTAGE_OF_HOUSEHOLD_INCOME_IN_THE_PAST_12_MONTHS.CSV',2010)

### Combine each year into single dataframe containing all years

In [8]:
GROSS_RENT = pd.concat([GROSS_RENT_2022,GROSS_RENT_2021,GROSS_RENT_2019,GROSS_RENT_2018,GROSS_RENT_2017,GROSS_RENT_2016,GROSS_RENT_2015,GROSS_RENT_2014,GROSS_RENT_2013,GROSS_RENT_2012,GROSS_RENT_2011,GROSS_RENT_2010])
GROSS_RENT

Label,med_gross_rent,hh_rent_lt_30pct_income,hh_rent_gte_30pct_income,occupied_units_rented
2022,1426.0,7303.0,3597.0,11078.0
2021,1330.0,6371.46,2943.54,9746.0
2019,1272.0,6730.506,3196.494,10104.0
2018,1179.0,5863.088,3654.912,9652.0
2017,1246.0,7312.074,2928.926,10293.0
2016,1132.0,6316.661,3446.339,9802.0
2015,1185.0,6788.726,3389.274,10178.0
2014,1050.0,5250.145,2154.855,7405.0
2013,1026.0,7863.84,3566.16,11478.0
2012,989.0,6570.585,2018.415,8783.0


### Reset index on combined dataframe

In [9]:
GROSS_RENT.reset_index(inplace=True)
GROSS_RENT.rename(columns={'index':'year'}, inplace=True)
GROSS_RENT

Label,year,med_gross_rent,hh_rent_lt_30pct_income,hh_rent_gte_30pct_income,occupied_units_rented
0,2022,1426.0,7303.0,3597.0,11078.0
1,2021,1330.0,6371.46,2943.54,9746.0
2,2019,1272.0,6730.506,3196.494,10104.0
3,2018,1179.0,5863.088,3654.912,9652.0
4,2017,1246.0,7312.074,2928.926,10293.0
5,2016,1132.0,6316.661,3446.339,9802.0
6,2015,1185.0,6788.726,3389.274,10178.0
7,2014,1050.0,5250.145,2154.855,7405.0
8,2013,1026.0,7863.84,3566.16,11478.0
9,2012,989.0,6570.585,2018.415,8783.0


### Save final dataframe as CSV to 'final' folder in 'data' folder

In [10]:
GROSS_RENT.to_csv('data/final/GROSS_RENT.CSV', index=False)

### Check that dataframe was saved properly as CSV

In [11]:
df = pd.read_csv('data/final/GROSS_RENT.CSV')
df

Unnamed: 0,year,med_gross_rent,hh_rent_lt_30pct_income,hh_rent_gte_30pct_income,occupied_units_rented
0,2022,1426.0,7303.0,3597.0,11078.0
1,2021,1330.0,6371.46,2943.54,9746.0
2,2019,1272.0,6730.506,3196.494,10104.0
3,2018,1179.0,5863.088,3654.912,9652.0
4,2017,1246.0,7312.074,2928.926,10293.0
5,2016,1132.0,6316.661,3446.339,9802.0
6,2015,1185.0,6788.726,3389.274,10178.0
7,2014,1050.0,5250.145,2154.855,7405.0
8,2013,1026.0,7863.84,3566.16,11478.0
9,2012,989.0,6570.585,2018.415,8783.0
