In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

# Import inflation function from GDP Housing Analysis
import common_functions

In [2]:
# get inflation data
inf_data = pd.read_csv("../dataset/1995inflation.csv")
inf_data.dtypes

year                int64
amount            float64
inflation rate    float64
cumulative        float64
dtype: object

In [3]:
# get GDP data
gdp_data = pd.read_csv("../DataSetDiscovery/GDP PDF Scraper/yearlyGDPbyState.csv")

gdp_data

Unnamed: 0.1,Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,0,1995,Alaska,2001,24791.0,Far West
1,1,1995,Alabama,2001,95514.0,Southeast
2,2,1995,Arkansas,2001,53809.0,Southeast
3,3,1995,Arizona,2001,104586.0,Southwest
4,4,1995,California,2001,925931.0,Far West
...,...,...,...,...,...,...
913,913,2013,Vermont,2014,28838.0,New England
914,914,2013,Washington,2014,407160.0,Far West
915,915,2013,Wisconsin,2014,284728.0,Great Lakes
916,916,2013,West Virginia,2014,70595.0,Southeast


In [4]:
# filter data for just virginia
gdp_data = gdp_data[gdp_data['state']=="Virginia"]

# limit to after 2000
gdp_data = gdp_data[gdp_data['year']>=2000]

# add inflation adjusted column
common_functions.inflation_adjust(gdp_data)

gdp_data

Unnamed: 0.1,Unnamed: 0,year,state,current dollars,GDP,GDP_area,inf-adjusted
300,300,2000,Virginia,2001,260837.0,Southeast,377764.0
351,351,2001,Virginia,2004,277214.0,Southeast,375580.0
402,402,2002,Virginia,2004,288840.0,Southeast,391332.0
453,453,2003,Virginia,2004,304116.0,Southeast,412028.0
504,504,2004,Virginia,2007,324870.0,Southeast,401310.0
555,555,2005,Virginia,2007,350288.0,Southeast,432709.0
606,606,2006,Virginia,2007,368604.0,Southeast,455334.0
657,657,2007,Virginia,2010,389319.0,Southeast,457382.0
708,708,2008,Virginia,2010,402853.0,Southeast,473282.0
759,759,2009,Virginia,2010,409732.0,Southeast,481363.0


In [5]:
# get evictions data
evc_data = pd.read_csv(r'../DataSet/va_eviction_cities.csv',header=0,encoding = "ISO-8859-1")

# limit to before 2013
evc_data = evc_data[evc_data['year']<=2013]
evc_data

Unnamed: 0,GEOID,year,name,parent-location,population,poverty-rate,renter-occupied-households,pct-renter-occupied,median-gross-rent,median-household-income,...,pct-nh-pi,pct-multiple,pct-other,eviction-filings,evictions,eviction-rate,eviction-filing-rate,low-flag,imputed,subbed
0,5100148,2000,Abingdon,Virginia,7780.0,10.06,1398.75,41.94,440.0,30976.0,...,0.01,0.58,0.05,29.86,28.86,2.06,2.13,0,0,0
1,5100148,2001,Abingdon,Virginia,7780.0,10.06,1438.06,41.94,440.0,30976.0,...,0.01,0.58,0.05,38.50,33.80,2.35,2.68,0,0,0
2,5100148,2002,Abingdon,Virginia,7780.0,10.06,1474.88,41.94,440.0,30976.0,...,0.01,0.58,0.05,61.25,47.14,3.20,4.15,0,0,0
3,5100148,2003,Abingdon,Virginia,7780.0,10.06,1514.18,41.94,440.0,30976.0,...,0.01,0.58,0.05,53.00,31.00,2.05,3.50,0,0,0
4,5100148,2004,Abingdon,Virginia,7780.0,10.06,1552.00,41.94,440.0,30976.0,...,0.01,0.58,0.05,84.20,57.86,3.73,5.43,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10115,5188240,2009,Yorktown,Virginia,189.0,0.00,,61.07,770.0,42888.0,...,0.00,0.00,0.00,,,,,0,0,0
10116,5188240,2010,Yorktown,Virginia,195.0,0.00,44.10,50.46,861.0,58365.0,...,0.00,0.00,0.00,2.80,0.00,0.00,6.35,1,0,0
10117,5188240,2011,Yorktown,Virginia,130.0,0.00,44.80,51.06,1000.0,62750.0,...,0.00,11.54,0.00,4.20,4.20,9.38,9.38,1,0,0
10118,5188240,2012,Yorktown,Virginia,130.0,0.00,46.20,51.06,1000.0,62750.0,...,0.00,11.54,0.00,0.70,0.70,1.52,1.52,0,0,0


In [6]:
# create a second data set averaging accross all of virginia
evc_mean = evc_data.groupby(['year']).mean()
# drop GEOID and other field that don't make sense as an average
evc_mean = evc_mean.drop(['GEOID','evictions','eviction-filings'], axis=1)
evc_mean

Unnamed: 0_level_0,population,poverty-rate,renter-occupied-households,pct-renter-occupied,median-gross-rent,median-household-income,median-property-value,rent-burden,pct-white,pct-af-am,...,pct-am-ind,pct-asian,pct-nh-pi,pct-multiple,pct-other,eviction-rate,eviction-filing-rate,low-flag,imputed,subbed
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000,11772.024259,11.87,1113.026717,32.258652,560.733154,40801.199461,110936.657682,23.999461,77.970135,14.954663,...,0.211321,1.996819,0.028059,1.178113,0.120458,3.385601,4.37706,0.026891,0.0,0.0
2001,11772.024259,11.87,1204.411079,32.258652,560.733154,40801.199461,110936.657682,23.999461,77.970135,14.954663,...,0.211321,1.996819,0.028059,1.178113,0.120458,3.669979,4.950373,0.045378,0.0,0.0
2002,11772.024259,11.87,1167.352432,32.258652,560.733154,40801.199461,110936.657682,23.999461,77.970135,14.954663,...,0.211321,1.996819,0.028059,1.178113,0.120458,3.589514,6.551304,0.005042,0.0,0.0
2003,11772.024259,11.87,1220.029386,32.258652,560.733154,40801.199461,110936.657682,23.999461,77.970135,14.954663,...,0.211321,1.996819,0.028059,1.178113,0.120458,3.528072,6.294249,0.114286,0.0,0.0
2004,11772.024259,11.87,1233.541124,32.258652,560.733154,40801.199461,110936.657682,23.999461,77.970135,14.954663,...,0.211321,1.996819,0.028059,1.178113,0.120458,3.841005,8.018075,0.0,0.0,0.0
2005,12428.849057,9.82434,1250.299864,33.472776,800.338028,50835.90566,209925.002717,29.342776,74.381536,15.85,...,0.212372,2.537601,0.047332,1.329973,0.182183,3.48862,8.009455,0.0,0.0,0.0
2006,12428.849057,9.82434,1265.145945,33.472776,800.338028,50835.90566,209925.002717,29.342776,74.381536,15.85,...,0.212372,2.537601,0.047332,1.329973,0.182183,3.60322,8.424566,0.011765,0.0,0.0
2007,12428.849057,9.82434,887.566833,33.472776,800.338028,50835.90566,209925.002717,29.342776,74.381536,15.85,...,0.212372,2.537601,0.047332,1.329973,0.182183,3.779382,5.395299,0.047059,0.0,0.0
2008,12428.849057,9.82434,899.94496,33.472776,800.338028,50835.90566,209925.002717,29.342776,74.381536,15.85,...,0.212372,2.537601,0.047332,1.329973,0.182183,4.344841,5.281394,0.063866,0.0,0.0
2009,12428.849057,9.82434,926.120142,33.472776,800.338028,50835.90566,209925.002717,29.342776,74.381536,15.85,...,0.212372,2.537601,0.047332,1.329973,0.182183,3.619837,4.982114,0.047059,0.0,0.0


In [7]:
# create a third data set summing all of Virginia
evc_sum = evc_data.groupby(['year']).sum()
# note: this does not match the entire population of virginia.  Persumably there are some communities missing.
# note: Eviction filings and evictions are presented in fractions is this because they are in thousands or because they are estimates?
evc_sum

Unnamed: 0_level_0,GEOID,population,poverty-rate,renter-occupied-households,pct-renter-occupied,median-gross-rent,median-household-income,median-property-value,rent-burden,pct-white,...,pct-nh-pi,pct-multiple,pct-other,eviction-filings,evictions,eviction-rate,eviction-filing-rate,low-flag,imputed,subbed
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000,3060320666,4367421.0,4403.77,518670.45,11967.96,208032.0,15137245.0,41157500.0,8903.8,28926.92,...,10.41,437.08,44.69,69268.97,35576.57,1577.69,2039.71,16,0,0
2001,3060320666,4367421.0,4403.77,580526.14,11967.96,208032.0,15137245.0,41157500.0,8903.8,28926.92,...,10.41,437.08,44.69,73799.37,38907.1,1768.93,2386.08,27,0,0
2002,3060320666,4367421.0,4403.77,600019.15,11967.96,208032.0,15137245.0,41157500.0,8903.8,28926.92,...,10.41,437.08,44.69,107198.46,42529.75,1845.01,3367.37,3,0,0
2003,3060320666,4367421.0,4403.77,714937.22,11967.96,208032.0,15137245.0,41157500.0,8903.8,28926.92,...,10.41,437.08,44.69,109694.0,44935.2,2067.45,3688.43,68,0,0
2004,3060320666,4367421.0,4403.77,724088.64,11967.96,208032.0,15137245.0,41157500.0,8903.8,28926.92,...,10.41,437.08,44.69,133841.38,47555.56,2254.67,4706.61,0,0,0
2005,3060320666,4611103.0,3644.83,733926.02,12418.4,284120.0,18860121.0,77252401.0,10358.0,27595.55,...,17.56,493.42,67.59,136024.47,43936.76,2047.82,4701.55,0,0,0
2006,3060320666,4611103.0,3644.83,742640.67,12418.4,284120.0,18860121.0,77252401.0,10358.0,27595.55,...,17.56,493.42,67.59,137974.3,43075.76,2115.09,4945.22,7,0,0
2007,3060320666,4611103.0,3644.83,445558.55,12418.4,284120.0,18860121.0,77252401.0,10358.0,27595.55,...,17.56,493.42,67.59,54867.08,30253.92,1897.25,2708.44,28,0,0
2008,3060320666,4611103.0,3644.83,451772.37,12418.4,284120.0,18860121.0,77252401.0,10358.0,27595.55,...,17.56,493.42,67.59,52620.3,33946.2,2181.11,2651.26,38,0,0
2009,3060320666,4611103.0,3644.83,455651.11,12418.4,284120.0,18860121.0,77252401.0,10358.0,27595.55,...,17.56,493.42,67.59,51015.08,30992.49,1780.96,2451.2,28,0,0


In [8]:
# sum both sets of data to ensure data was summed correctly (Is there a better way??)
evc_data.sum(axis=0)

GEOID                                                               42855323618
year                                                                   16718177
name                          AbingdonAbingdonAbingdonAbingdonAbingdonAbingd...
parent-location               VirginiaVirginiaVirginiaVirginiaVirginiaVirgin...
population                                                          6.72799e+07
poverty-rate                                                            65161.6
renter-occupied-households                                          8.53248e+06
pct-renter-occupied                                                      198309
median-gross-rent                                                   4.45311e+06
median-household-income                                             3.06837e+08
median-property-value                                               1.09032e+09
rent-burden                                                              156045
pct-white                               

In [9]:
evc_sum.sum(axis=0)

GEOID                         4.285532e+10
population                    6.727986e+07
poverty-rate                  6.516162e+04
renter-occupied-households    8.532478e+06
pct-renter-occupied           1.983087e+05
median-gross-rent             4.453113e+06
median-household-income       3.068373e+08
median-property-value         1.090323e+09
rent-burden                   1.560449e+05
pct-white                     4.523886e+05
pct-af-am                     9.479168e+04
pct-hispanic                  3.183282e+04
pct-am-ind                    1.221720e+03
pct-asian                     1.719703e+04
pct-nh-pi                     2.367600e+02
pct-multiple                  9.421590e+03
pct-other                     9.081400e+02
eviction-filings              1.384799e+06
evictions                     5.542605e+05
eviction-rate                 3.078098e+04
eviction-filing-rate          5.571554e+04
low-flag                      1.509000e+03
imputed                       0.000000e+00
subbed     

In [10]:
# add relevant columns from evc_sum to evc_mean createing evc_va
evc_va = evc_mean
evc_va.rename(columns={'population': 'mean-population', 'renter-occupied-households': 'mean-renter-occupied-households'})
evc_va['total population'] = evc_sum['population']
evc_va['renter-occupied-households'] = evc_sum['renter-occupied-households']
evc_va['evictions'] = evc_sum['evictions']

evc_va

Unnamed: 0_level_0,population,poverty-rate,renter-occupied-households,pct-renter-occupied,median-gross-rent,median-household-income,median-property-value,rent-burden,pct-white,pct-af-am,...,pct-nh-pi,pct-multiple,pct-other,eviction-rate,eviction-filing-rate,low-flag,imputed,subbed,total population,evictions
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000,11772.024259,11.87,518670.45,32.258652,560.733154,40801.199461,110936.657682,23.999461,77.970135,14.954663,...,0.028059,1.178113,0.120458,3.385601,4.37706,0.026891,0.0,0.0,4367421.0,35576.57
2001,11772.024259,11.87,580526.14,32.258652,560.733154,40801.199461,110936.657682,23.999461,77.970135,14.954663,...,0.028059,1.178113,0.120458,3.669979,4.950373,0.045378,0.0,0.0,4367421.0,38907.1
2002,11772.024259,11.87,600019.15,32.258652,560.733154,40801.199461,110936.657682,23.999461,77.970135,14.954663,...,0.028059,1.178113,0.120458,3.589514,6.551304,0.005042,0.0,0.0,4367421.0,42529.75
2003,11772.024259,11.87,714937.22,32.258652,560.733154,40801.199461,110936.657682,23.999461,77.970135,14.954663,...,0.028059,1.178113,0.120458,3.528072,6.294249,0.114286,0.0,0.0,4367421.0,44935.2
2004,11772.024259,11.87,724088.64,32.258652,560.733154,40801.199461,110936.657682,23.999461,77.970135,14.954663,...,0.028059,1.178113,0.120458,3.841005,8.018075,0.0,0.0,0.0,4367421.0,47555.56
2005,12428.849057,9.82434,733926.02,33.472776,800.338028,50835.90566,209925.002717,29.342776,74.381536,15.85,...,0.047332,1.329973,0.182183,3.48862,8.009455,0.0,0.0,0.0,4611103.0,43936.76
2006,12428.849057,9.82434,742640.67,33.472776,800.338028,50835.90566,209925.002717,29.342776,74.381536,15.85,...,0.047332,1.329973,0.182183,3.60322,8.424566,0.011765,0.0,0.0,4611103.0,43075.76
2007,12428.849057,9.82434,445558.55,33.472776,800.338028,50835.90566,209925.002717,29.342776,74.381536,15.85,...,0.047332,1.329973,0.182183,3.779382,5.395299,0.047059,0.0,0.0,4611103.0,30253.92
2008,12428.849057,9.82434,451772.37,33.472776,800.338028,50835.90566,209925.002717,29.342776,74.381536,15.85,...,0.047332,1.329973,0.182183,4.344841,5.281394,0.063866,0.0,0.0,4611103.0,33946.2
2009,12428.849057,9.82434,455651.11,33.472776,800.338028,50835.90566,209925.002717,29.342776,74.381536,15.85,...,0.047332,1.329973,0.182183,3.619837,4.982114,0.047059,0.0,0.0,4611103.0,30992.49


In [11]:
# plot against evictions
plt.scatter(gdp_data['GDP'],evc_va['evictions'])

ValueError: x and y must be the same size

In [37]:
# testing for this file can be found contained in this block
import unittest

class sum(unittest.TestCase):
    def test_evc_sum_evc_data_match(self):
        # evc_sum and ecv_data should always have the same total population
        self.assertEqual(eevc_data,eevc_data)

unittest.main(argv=[''], verbosity=2, exit=False)

test_evc_sum_evc_data_match (__main__.sum) ... ERROR

ERROR: test_evc_sum_evc_data_match (__main__.sum)
----------------------------------------------------------------------
Traceback (most recent call last):
  File &quot;&lt;ipython-input-37-f43821724de1&gt;&quot;, line 7, in test_evc_sum_evc_data_match
    self.assertEqual(eevc_data,eevc_data)
NameError: name &#39;eevc_data&#39; is not defined

----------------------------------------------------------------------
Ran 1 test in 0.001s

FAILED (errors=1)


&lt;unittest.main.TestProgram at 0x7ffe8b5c9fd0&gt;