# Imports and Reads

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("data.csv")
df.drop(["Latitude", "Longitude", "Geo County"], inplace=True, axis=1)
df.head()

Unnamed: 0,Taxable Year,County,Population,All Returns,AGI,Median Income,Median Rank,Joint Returns,Joint Returns Median Income,Joint Returns Median Rank,Tax Assessed
0,2011,Plumas,19953.0,7234,326683000,33509,28.0,3584,54112,36.0,8800000
1,2009,Nonresident,0.0,502103,180161996000,59342,0.0,268250,108722,0.0,1387232000
2,2007,Modoc,9659.0,3069,118770000,28083,52.0,1545,44018,56.0,3666000
3,2005,Nonresident,0.0,522584,138452361000,58612,0.0,281732,98575,0.0,1773706000
4,2003,Nonresident,0.0,436493,129987919000,54532,0.0,234008,91267,0.0,1068795000


# Pre-processing

In [3]:
unique_years = np.sort(df["Taxable Year"].unique())
unique_counties = df["County"].unique()

In [4]:
counties_years_df = pd.DataFrame(index=unique_counties, columns=unique_years)
for year in unique_years:
    for county in unique_counties:
        current_query = df.loc[(df["Taxable Year"] == year) & (df["County"] == county), "Median Income"]
        if current_query.shape[0] > 0:
            current_median = current_query.values[0]
            counties_years_df.loc[county, year] = current_median

counties_years_df.head()

Unnamed: 0,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
Plumas,22217,23643,26151,26196,27913,29612,29717,29872,30306,31353,...,34484,35679,35966,34590,35751.0,37791,38411,38241,37225,41837
Nonresident,38300,45830,49514,53026,55563,57766,54721,54467,54532,56182,...,70160,69875,71228,71107,,14600,16109,19434,22021,26804
Modoc,19401,19764,20771,21499,23075,24390,24927,25216,25749,25939,...,29423,29789,31073,30412,30809.0,31446,31493,31193,32627,35157
Sacramento,26523,27277,28255,29231,30343,31939,33012,33498,34087,34772,...,37051,37495,37925,38606,39882.0,40803,41920,42776,43348,47538
San Diego,23327,24329,25938,27528,28660,30405,31267,31813,32410,33485,...,37371,37998,38568,39515,41013.0,42009,43195,44267,43442,48759


In [5]:
drop_rows = counties_years_df[counties_years_df.isna().any(axis=1)].index
counties_years_df.drop(drop_rows, axis=0, inplace=True)
counties_years_df.head()

Unnamed: 0,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
Plumas,22217,23643,26151,26196,27913,29612,29717,29872,30306,31353,...,34484,35679,35966,34590,35751,37791,38411,38241,37225,41837
Modoc,19401,19764,20771,21499,23075,24390,24927,25216,25749,25939,...,29423,29789,31073,30412,30809,31446,31493,31193,32627,35157
Sacramento,26523,27277,28255,29231,30343,31939,33012,33498,34087,34772,...,37051,37495,37925,38606,39882,40803,41920,42776,43348,47538
San Diego,23327,24329,25938,27528,28660,30405,31267,31813,32410,33485,...,37371,37998,38568,39515,41013,42009,43195,44267,43442,48759
El Dorado,30211,31752,33499,35333,36701,39104,39978,40488,41609,42819,...,45735,47590,47956,48826,47741,49180,51008,51895,51785,58958


In [6]:
pct_change_df = counties_years_df.pct_change(axis=1)
pct_change_df.drop(1995, axis=1, inplace=True)
pct_change_df.head()

  pct_change_df = counties_years_df.pct_change(axis=1)


Unnamed: 0,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
Plumas,0.064185,0.106078,0.001721,0.065544,0.060868,0.003546,0.005216,0.014529,0.034548,0.080311,...,0.029097,0.034654,0.008044,-0.038258,0.033565,0.057061,0.016406,-0.004426,-0.026568,0.123895
Modoc,0.01871,0.050951,0.035049,0.073306,0.056988,0.022017,0.011594,0.021137,0.007379,0.019083,...,0.019967,0.012439,0.043103,-0.021272,0.013054,0.020676,0.001495,-0.009526,0.045972,0.077543
Sacramento,0.028428,0.035854,0.034543,0.038042,0.052599,0.033595,0.014722,0.017583,0.020096,0.018118,...,0.027425,0.011983,0.011468,0.017956,0.033052,0.023093,0.027375,0.02042,0.013372,0.09666
San Diego,0.042955,0.066135,0.0613,0.041122,0.060886,0.028351,0.017463,0.018766,0.033169,0.030312,...,0.028031,0.016778,0.015001,0.024554,0.03791,0.024285,0.028232,0.024818,-0.018637,0.122393
El Dorado,0.051008,0.05502,0.054748,0.038717,0.065475,0.022351,0.012757,0.027687,0.02908,0.026063,...,0.034167,0.04056,0.007691,0.018142,-0.022222,0.030142,0.03717,0.017389,-0.00212,0.138515


# Testing Assumptions