In [2]:
# Dependencies and Setup
import csv
import pandas as pd
import os
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import linregress 
import seaborn as sns 
from scipy import stats
import scipy.stats as sts


Alicia's analysis 

* (Households as of March of the following year. Income in current and 2019 CPI-U-RS adjusted dollars (28). Beginning in 2010, standard errors were calculated using replicate weights)
* Table H-8. Median Household Income by State: 1984 to 2019
* Source: U.S. Census Bureau, Current Population Survey, Annual Social and Economic Supplements (CPS ASEC). For information on confidentiality protection, sampling error, nonsampling error, and definitions, see <https://www2.census.gov/programs-surveys/cps/techdocs/cpsmar20.pdf>.
* Footnotes are available at <www.census.gov/topics/income-poverty/income/guidance/cps-historic-footnotes.html>.

In [3]:
# Extracting the path
median_income_path = "../Data/Income/Median-Household-Income-5y.csv"

In [4]:
# Reading the data 
median_income_data = pd.read_csv(median_income_path)
median_income_data.head()


Unnamed: 0,State,2019 Median income,2019 Standard error,2018 Median income,2018 Standard error,2017 Median income,2017 Standard error,2016 Median income,2016 Standard error,2015 Median income,2015 Standard error,2014 Median income,2014 Standard error
0,United States,68703,550,63179,420,61372,335,59039,436,56516,321,53657,392
1,Alabama,56200,2512,49936,2423,51113,845,47221,2301,44509,3419,42278,1529
2,Alaska,78394,6685,68734,3390,72231,2719,75723,4086,75112,3485,67629,3153
3,Arizona,70674,3391,62283,2291,61125,2642,57100,1971,52248,2008,49254,2304
4,Arkansas,54539,2384,49781,2108,48829,2642,45907,2165,42798,1572,44922,2546


In [5]:
for x in median_income_data.columns:
    if x != "State":
        median_income_data[x] = median_income_data[x].str.replace(",", "").astype(float)
    print(x)

State
2019 Median income
2019 Standard error
2018 Median income
2018 Standard error
2017 Median income
2017 Standard error
2016 Median income
2016 Standard error
2015 Median income
2015 Standard error
2014 Median income
2014 Standard error


In [6]:
median_income_data.dtypes

State                   object
2019 Median income     float64
2019 Standard error    float64
2018 Median income     float64
2018 Standard error    float64
2017 Median income     float64
2017 Standard error    float64
2016 Median income     float64
2016 Standard error    float64
2015 Median income     float64
2015 Standard error    float64
2014 Median income     float64
2014 Standard error    float64
dtype: object

In [7]:
print(median_income_data.dtypes)

State                   object
2019 Median income     float64
2019 Standard error    float64
2018 Median income     float64
2018 Standard error    float64
2017 Median income     float64
2017 Standard error    float64
2016 Median income     float64
2016 Standard error    float64
2015 Median income     float64
2015 Standard error    float64
2014 Median income     float64
2014 Standard error    float64
dtype: object


In [8]:
# Creating a new dataframe with just median household income for year 2014 - 2019
household_income_df = pd.DataFrame({
    "State": median_income_data['State'],
    "2019": median_income_data['2019 Median income'],
    "2018": median_income_data['2018 Median income'],
    "2017": median_income_data['2017 Median income'],
    "2016": median_income_data['2016 Median income'],
    "2015": median_income_data['2015 Median income'],
    "2014": median_income_data['2014 Median income']
})
household_income_df.head()

Unnamed: 0,State,2019,2018,2017,2016,2015,2014
0,United States,68703.0,63179.0,61372.0,59039.0,56516.0,53657.0
1,Alabama,56200.0,49936.0,51113.0,47221.0,44509.0,42278.0
2,Alaska,78394.0,68734.0,72231.0,75723.0,75112.0,67629.0
3,Arizona,70674.0,62283.0,61125.0,57100.0,52248.0,49254.0
4,Arkansas,54539.0,49781.0,48829.0,45907.0,42798.0,44922.0


In [10]:
# Only displaying 50 states, removed the United States row 
allstates_df  = household_income_df.iloc[1:]
allstates_df.head(49)

Unnamed: 0,State,2019,2018,2017,2016,2015,2014
1,Alabama,56200.0,49936.0,51113.0,47221.0,44509.0,42278.0
2,Alaska,78394.0,68734.0,72231.0,75723.0,75112.0,67629.0
3,Arizona,70674.0,62283.0,61125.0,57100.0,52248.0,49254.0
4,Arkansas,54539.0,49781.0,48829.0,45907.0,42798.0,44922.0
5,California,78105.0,70489.0,69759.0,66637.0,63636.0,60487.0
6,Colorado,72499.0,73034.0,74172.0,70566.0,66596.0,60940.0
7,Connecticut,87291.0,72812.0,72780.0,75923.0,72889.0,70161.0
8,Delaware,74194.0,65012.0,62318.0,58046.0,57756.0,57522.0
9,Florida,58368.0,54644.0,53681.0,51176.0,48825.0,46140.0
10,Georgia,56628.0,55821.0,57016.0,53527.0,50768.0,49555.0


In [11]:
latest_data = allstates_df[["State", "2019"]]
latest_data.head()


Unnamed: 0,State,2019
1,Alabama,56200.0
2,Alaska,78394.0
3,Arizona,70674.0
4,Arkansas,54539.0
5,California,78105.0


In [12]:
avg_income_df = latest_data.rename({"2019": "Average_Income"}, axis="columns")
avg_income_df


Unnamed: 0,State,Average_Income
1,Alabama,56200.0
2,Alaska,78394.0
3,Arizona,70674.0
4,Arkansas,54539.0
5,California,78105.0
6,Colorado,72499.0
7,Connecticut,87291.0
8,Delaware,74194.0
9,Florida,58368.0
10,Georgia,56628.0


In [13]:
avg_income_df.to_csv("../Data/Alicia/avg_income_df.csv")