# Income in the Past 12 Months in Inflation Adjusted Dollars Script
Run all of these code blocks in order to create the INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS data table.

### Import Statements

In [6]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

### Function to create row for each year

In [65]:
def income(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    # Drop unnecessary columns
    df.drop(columns=['Margin of Error'], inplace=True)

    # For Households
    hh = df.iloc[:13]
    # Convert estimates to float
    hh['Percentage'] = hh['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else (int(x.replace(',','')) if ',' in x else None))
    # Create Total Population Segment column
    hh['Total Households'] = int(hh.iloc[0]['Estimate'].replace(',',''))
    # Calculate population estimates
    hh['Population Estimate'] = hh.apply(lambda x: x['Percentage']*x['Total Households'] if x['Percentage']<1 else x['Percentage'], axis=1)
    # Drop unnecessary columns
    hh.drop(columns=['Estimate','Percentage','Total Households'], inplace=True)
    # Change index and transpose table
    hh.set_index('Label', inplace=True)
    hh = hh.T
    # Change index to year
    hh.index = [year]

    # For families
    f = df.iloc[13:21]
    # Convert estimates to float
    f['Percentage'] = f['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else (int(x.replace(',','')) if ',' in x else None))
    # Create Total Population Segment column
    f['Total Families'] = int(f.iloc[0]['Estimate'].replace(',',''))
    # Calculate population estimates
    f['Population Estimate'] = f.apply(lambda x: x['Percentage']*x['Total Families'] if x['Percentage']<1 else x['Percentage'], axis=1)
    # Drop unnecessary columns
    f.drop(columns=['Estimate','Percentage','Total Families'], inplace=True)
    # Change index and transpose table
    f.set_index('Label', inplace=True)
    f = f.T
    # Change index to year
    f.index = [year]

    # For individuals
    i = df.iloc[21:]
    # Replace NaN values with empty string
    i['Estimate'] = i['Estimate'].fillna('')
    # Convert estimates to integers
    i['Estimate'] = i['Estimate'].apply(lambda x: int(x.replace(',','')) if ',' in x else None)
    # Replace NaN value with sum of total males and females w/ full-time earnings
    male_sum = i.loc[i['Label'] == 'Male', 'Estimate'][24]
    female_sum = i.loc[i['Label'] == 'Female', 'Estimate'][25]
    i.loc[i['Label'] == 'With earnings for full-time, year-round workers:', 'Estimate'] = male_sum+female_sum
    # Drop total male and female rows
    i = i.drop([24, 25])
    # Replace NaN value with overall mean earnings
    male_mean = i.loc[i['Label'] == 'Male', 'Estimate'][27]
    female_mean = i.loc[i['Label'] == 'Female', 'Estimate'][28]
    i.at[26, 'Label'] = 'Mean earnings (dollars) for full-time, year-round workers:' # Change the value in row 1, column 'A' to 'new_value'
    i.loc[i['Label'] == 'Mean earnings (dollars) for full-time, year-round workers:', 'Estimate'] = ((male_mean*male_sum)+(female_mean*female_sum))/(male_sum+female_sum)
    # Drop mean male and female rows
    i = i.drop([27,28])
    # Drop unnecessary rows
    i = i.drop([29,30,31])
    # Change index and transpose table
    i.set_index('Label', inplace=True)
    i = i.T
    # Change index to year
    i.index = [year]

    # Merge dataframes
    merged_df = pd.merge(hh, f, left_index=True, right_index=True)
    merged_df = pd.merge(merged_df, i, left_index=True, right_index=True)

    # Rename all columns with abbreviated underscored naming conventions
    col_names = ['households', 'med_hh_inc', 'hh_with_earnings', 'mean_earnings', 'hh_with_soc_sec_inc', 'mean_soc_sec_inc', 'hh_with_sup_sec_inc', 'mean_sup_sec_inc', 'hh_with_cash_pub_assist_inc', 'mean_cash_pub_assist_inc', 'hh_with_retire_inc', 'mean_retire_inc', 'hh_with_food_stamp_snap', 'families', 'med_married_couple_fam_inc', 'married_couple_fam', 'med_male_hh_no_spouse_inc', 'male_hh_no_spouse_fam', 'med_female_hh_no_spouse_inc', 'female_hh_no_spouse_fam', 'med_fam_inc', 'individuals', 'indiv_per_cap_inc', 'indiv_with_earnings_ft_yr', 'indiv_ft_yr_mean_earnings']
    merged_df.columns = col_names

    return merged_df

### Test that function works

In [66]:
income('data/2022/INCOME_IN_THE_PAST_12_MONTHS_IN_2022_INFLATIONADJUSTED_DOLLARS.CSV',2022)

Unnamed: 0,households,med_hh_inc,hh_with_earnings,mean_earnings,hh_with_soc_sec_inc,mean_soc_sec_inc,hh_with_sup_sec_inc,mean_sup_sec_inc,hh_with_cash_pub_assist_inc,mean_cash_pub_assist_inc,...,married_couple_fam,med_male_hh_no_spouse_inc,male_hh_no_spouse_fam,med_female_hh_no_spouse_inc,female_hh_no_spouse_fam,med_fam_inc,individuals,indiv_per_cap_inc,indiv_with_earnings_ft_yr,indiv_ft_yr_mean_earnings
2022,32190.0,107149.0,28455.96,138669.0,8111.88,22259.0,836.94,10556.0,579.42,1316.0,...,16531.515,131647.0,1641.591,66976.0,4947.894,79306.0,90566.0,48992.0,38713.0,88935.325291


### Test that function works for each year
Should return only "No file for year '2020'"

In [67]:
for year in range(2010,2023):
    file = 'data/'+str(year)+'/INCOME_IN_THE_PAST_12_MONTHS_IN_'+str(year)+'_INFLATIONADJUSTED_DOLLARS.CSV'

    try:
        income(file,year)
    except:
        print(f"No file for year '{year}'")

No file for year '2020'


### Run function for each year

In [68]:
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2022 = income('data/2022/INCOME_IN_THE_PAST_12_MONTHS_IN_2022_INFLATIONADJUSTED_DOLLARS.CSV',2022)
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2021 = income('data/2021/INCOME_IN_THE_PAST_12_MONTHS_IN_2021_INFLATIONADJUSTED_DOLLARS.CSV',2021)
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2019 = income('data/2019/INCOME_IN_THE_PAST_12_MONTHS_IN_2019_INFLATIONADJUSTED_DOLLARS.CSV',2019)
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2018 = income('data/2018/INCOME_IN_THE_PAST_12_MONTHS_IN_2018_INFLATIONADJUSTED_DOLLARS.CSV',2018)
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2017 = income('data/2017/INCOME_IN_THE_PAST_12_MONTHS_IN_2017_INFLATIONADJUSTED_DOLLARS.CSV',2017)
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2016 = income('data/2016/INCOME_IN_THE_PAST_12_MONTHS_IN_2016_INFLATIONADJUSTED_DOLLARS.CSV',2016)
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2015 = income('data/2015/INCOME_IN_THE_PAST_12_MONTHS_IN_2015_INFLATIONADJUSTED_DOLLARS.CSV',2015)
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2014 = income('data/2014/INCOME_IN_THE_PAST_12_MONTHS_IN_2014_INFLATIONADJUSTED_DOLLARS.CSV',2014)
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2013 = income('data/2013/INCOME_IN_THE_PAST_12_MONTHS_IN_2013_INFLATIONADJUSTED_DOLLARS.CSV',2013)
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2012 = income('data/2012/INCOME_IN_THE_PAST_12_MONTHS_IN_2012_INFLATIONADJUSTED_DOLLARS.CSV',2012)
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2011 = income('data/2011/INCOME_IN_THE_PAST_12_MONTHS_IN_2011_INFLATIONADJUSTED_DOLLARS.CSV',2011)
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2010 = income('data/2010/INCOME_IN_THE_PAST_12_MONTHS_IN_2010_INFLATIONADJUSTED_DOLLARS.CSV',2010)

### Combine each year into single dataframe containing all years

In [69]:
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS = pd.concat([INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2022,INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2021,INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2019,INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2018,INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2017,INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2016,INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2015,INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2014,INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2013,INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2012,INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2011,INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS_2010])
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS

Unnamed: 0,households,med_hh_inc,hh_with_earnings,mean_earnings,hh_with_soc_sec_inc,mean_soc_sec_inc,hh_with_sup_sec_inc,mean_sup_sec_inc,hh_with_cash_pub_assist_inc,mean_cash_pub_assist_inc,...,married_couple_fam,med_male_hh_no_spouse_inc,male_hh_no_spouse_fam,med_female_hh_no_spouse_inc,female_hh_no_spouse_fam,med_fam_inc,individuals,indiv_per_cap_inc,indiv_with_earnings_ft_yr,indiv_ft_yr_mean_earnings
2022,32190.0,107149.0,28455.96,138669.0,8111.88,22259.0,836.94,10556.0,579.42,1316.0,...,16531.515,131647.0,1641.591,66976.0,4947.894,79306.0,90566.0,48992.0,38713.0,88935.325291
2021,29191.0,100098.0,25658.889,114198.0,7852.379,21673.0,1080.067,7721.0,1605.505,5881.0,...,16658.094,123980.0,1094.026,52813.0,2889.88,72290.0,88410.0,41632.0,37257.0,74621.441984
2019,26642.0,99311.0,23977.8,107921.0,6820.352,22222.0,532.84,13075.0,666.05,2642.0,...,14515.072,110962.0,1427.712,,2361.216,98438.0,90604.0,37248.0,37340.0,68987.518265
2018,28119.0,90519.0,24604.125,115456.0,7564.011,23222.0,1546.545,9181.0,,,...,14430.315,111997.0,1395.249,103879.0,3287.436,85308.0,88862.0,40071.0,36815.0,72488.738422
2017,26139.0,80633.0,23002.32,107240.0,7031.391,18420.0,914.865,9327.0,653.475,2282.0,...,12301.848,103922.0,1565.028,111429.0,4349.322,65261.0,83710.0,35506.0,34865.0,67545.535351
2016,23580.0,85467.0,21670.02,101973.0,5446.98,22845.0,1108.26,10650.0,518.76,,...,12124.274,104882.0,766.946,71106.0,3410.462,66060.0,75230.0,36278.0,30886.0,65346.317943
2015,25286.0,85545.0,23288.406,100788.0,5208.916,22370.0,1036.726,7439.0,632.15,2198.0,...,13727.556,115315.0,824.004,58906.0,2980.44,46875.0,81632.0,35065.0,33455.0,67200.706143
2014,22226.0,90424.0,20359.016,97772.0,5089.754,19799.0,777.91,7285.0,533.424,3467.0,...,12626.367,99299.0,624.117,90305.0,2752.516,91497.0,79608.0,31084.0,31611.0,60176.819209
2013,26959.0,75459.0,24236.141,91695.0,6820.627,18454.0,1159.237,6401.0,512.221,2231.0,...,13198.5,97686.0,1734.66,50118.0,3921.84,67023.0,83472.0,31108.0,32393.0,61210.763529
2012,23724.0,86429.0,21826.08,98921.0,4673.628,17122.0,687.996,7130.0,332.136,3208.0,...,12403.238,99789.0,962.024,86275.0,3813.738,73228.0,78113.0,34397.0,33616.0,62310.865302


### Reset index on combined dataframe

In [70]:
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS.reset_index(inplace=True)
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS.rename(columns={'index':'year'}, inplace=True)
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS

Unnamed: 0,year,households,med_hh_inc,hh_with_earnings,mean_earnings,hh_with_soc_sec_inc,mean_soc_sec_inc,hh_with_sup_sec_inc,mean_sup_sec_inc,hh_with_cash_pub_assist_inc,...,married_couple_fam,med_male_hh_no_spouse_inc,male_hh_no_spouse_fam,med_female_hh_no_spouse_inc,female_hh_no_spouse_fam,med_fam_inc,individuals,indiv_per_cap_inc,indiv_with_earnings_ft_yr,indiv_ft_yr_mean_earnings
0,2022,32190.0,107149.0,28455.96,138669.0,8111.88,22259.0,836.94,10556.0,579.42,...,16531.515,131647.0,1641.591,66976.0,4947.894,79306.0,90566.0,48992.0,38713.0,88935.325291
1,2021,29191.0,100098.0,25658.889,114198.0,7852.379,21673.0,1080.067,7721.0,1605.505,...,16658.094,123980.0,1094.026,52813.0,2889.88,72290.0,88410.0,41632.0,37257.0,74621.441984
2,2019,26642.0,99311.0,23977.8,107921.0,6820.352,22222.0,532.84,13075.0,666.05,...,14515.072,110962.0,1427.712,,2361.216,98438.0,90604.0,37248.0,37340.0,68987.518265
3,2018,28119.0,90519.0,24604.125,115456.0,7564.011,23222.0,1546.545,9181.0,,...,14430.315,111997.0,1395.249,103879.0,3287.436,85308.0,88862.0,40071.0,36815.0,72488.738422
4,2017,26139.0,80633.0,23002.32,107240.0,7031.391,18420.0,914.865,9327.0,653.475,...,12301.848,103922.0,1565.028,111429.0,4349.322,65261.0,83710.0,35506.0,34865.0,67545.535351
5,2016,23580.0,85467.0,21670.02,101973.0,5446.98,22845.0,1108.26,10650.0,518.76,...,12124.274,104882.0,766.946,71106.0,3410.462,66060.0,75230.0,36278.0,30886.0,65346.317943
6,2015,25286.0,85545.0,23288.406,100788.0,5208.916,22370.0,1036.726,7439.0,632.15,...,13727.556,115315.0,824.004,58906.0,2980.44,46875.0,81632.0,35065.0,33455.0,67200.706143
7,2014,22226.0,90424.0,20359.016,97772.0,5089.754,19799.0,777.91,7285.0,533.424,...,12626.367,99299.0,624.117,90305.0,2752.516,91497.0,79608.0,31084.0,31611.0,60176.819209
8,2013,26959.0,75459.0,24236.141,91695.0,6820.627,18454.0,1159.237,6401.0,512.221,...,13198.5,97686.0,1734.66,50118.0,3921.84,67023.0,83472.0,31108.0,32393.0,61210.763529
9,2012,23724.0,86429.0,21826.08,98921.0,4673.628,17122.0,687.996,7130.0,332.136,...,12403.238,99789.0,962.024,86275.0,3813.738,73228.0,78113.0,34397.0,33616.0,62310.865302


### Save final dataframe as CSV to 'final' folder in 'data' folder

In [71]:
INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS.to_csv('data/final/INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS.CSV', index=False)

### Check that dataframe was saved properly as CSV

In [72]:
test = pd.read_csv('data/final/INCOME_IN_THE_PAST_12_MONTHS_IN_INFLATIONADJUSTED_DOLLARS.CSV')
test

Unnamed: 0,year,households,med_hh_inc,hh_with_earnings,mean_earnings,hh_with_soc_sec_inc,mean_soc_sec_inc,hh_with_sup_sec_inc,mean_sup_sec_inc,hh_with_cash_pub_assist_inc,...,married_couple_fam,med_male_hh_no_spouse_inc,male_hh_no_spouse_fam,med_female_hh_no_spouse_inc,female_hh_no_spouse_fam,med_fam_inc,individuals,indiv_per_cap_inc,indiv_with_earnings_ft_yr,indiv_ft_yr_mean_earnings
0,2022,32190.0,107149.0,28455.96,138669.0,8111.88,22259.0,836.94,10556.0,579.42,...,16531.515,131647.0,1641.591,66976.0,4947.894,79306.0,90566.0,48992.0,38713.0,88935.325291
1,2021,29191.0,100098.0,25658.889,114198.0,7852.379,21673.0,1080.067,7721.0,1605.505,...,16658.094,123980.0,1094.026,52813.0,2889.88,72290.0,88410.0,41632.0,37257.0,74621.441984
2,2019,26642.0,99311.0,23977.8,107921.0,6820.352,22222.0,532.84,13075.0,666.05,...,14515.072,110962.0,1427.712,,2361.216,98438.0,90604.0,37248.0,37340.0,68987.518265
3,2018,28119.0,90519.0,24604.125,115456.0,7564.011,23222.0,1546.545,9181.0,,...,14430.315,111997.0,1395.249,103879.0,3287.436,85308.0,88862.0,40071.0,36815.0,72488.738422
4,2017,26139.0,80633.0,23002.32,107240.0,7031.391,18420.0,914.865,9327.0,653.475,...,12301.848,103922.0,1565.028,111429.0,4349.322,65261.0,83710.0,35506.0,34865.0,67545.535351
5,2016,23580.0,85467.0,21670.02,101973.0,5446.98,22845.0,1108.26,10650.0,518.76,...,12124.274,104882.0,766.946,71106.0,3410.462,66060.0,75230.0,36278.0,30886.0,65346.317943
6,2015,25286.0,85545.0,23288.406,100788.0,5208.916,22370.0,1036.726,7439.0,632.15,...,13727.556,115315.0,824.004,58906.0,2980.44,46875.0,81632.0,35065.0,33455.0,67200.706143
7,2014,22226.0,90424.0,20359.016,97772.0,5089.754,19799.0,777.91,7285.0,533.424,...,12626.367,99299.0,624.117,90305.0,2752.516,91497.0,79608.0,31084.0,31611.0,60176.819209
8,2013,26959.0,75459.0,24236.141,91695.0,6820.627,18454.0,1159.237,6401.0,512.221,...,13198.5,97686.0,1734.66,50118.0,3921.84,67023.0,83472.0,31108.0,32393.0,61210.763529
9,2012,23724.0,86429.0,21826.08,98921.0,4673.628,17122.0,687.996,7130.0,332.136,...,12403.238,99789.0,962.024,86275.0,3813.738,73228.0,78113.0,34397.0,33616.0,62310.865302
