In [None]:
  #import libraries 
    
import pandas as pd
import requests
from bs4 import BeautifulSoup as BS
import numpy as np
import datetime as dt
import re  
from shapely.geometry import Point
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
import seaborn as sns
plt.rcParams['figure.figsize'] = [12, 8]

In [None]:
# import datasets to be used

issued=pd.read_csv('Building_Permits_Issued.csv',low_memory=False)
SINGLE_FAMILY_2009=pd.read_csv('2009 single family_final_with_sqft.txt')
SINGLE_FAMILY_2013=pd.read_csv('2013 single family_final_with_sqft.txt')
SINGLE_FAMILY_2017=pd.read_csv('2017 single family_final_with_sqft.txt')

In [None]:
# check what the data looks likes

issued.head()

In [None]:
issued=issued.rename(columns={'Permit #': 'PermitNo', 'Permit Type Description':'PermTypeDesc', 'Permit Subtype Description': 'PermSubtypeDesc', 'Parcel':'Parcel', 'Date Entered':'Entered', 'Date Issued': 'Issued', 'Const. Cost': 'ConstCost', 'Address':'Address', 'City':'City', 'State':'State', 'Zip':'Zip', 'Subdivision/Lot': 'SubdLot', 'Contact':'Contact', 'Permit Type': 'PermType', 'Permit Subtype': 'PermSubtype', 'IVR Trk#': 'IVRTrk#', 'Purpose':'Purpose', 'Council Dist':'CouncilDist', 'Census Tract':'CensusTract', 'Mapped Location':'MappedLoc'})

In [None]:
#BuildingPermit['long_lat']=
# BuildingPermit['Mapped Location'].replace('\(.*?\)','',regex=True)


coords=re.compile(r'\(.*?\)')
matches = []
for string in issued.MappedLoc:
   match=re.findall(coords,string)
   matches.append(match)


In [None]:
# matches

In [None]:
matches=pd.DataFrame(matches)


In [None]:
matches.head()

In [None]:
# keep only first column
matches=matches.iloc[:,0]



In [None]:
issued=issued.join(matches)

In [None]:
# rename column '0' as 'location'
issued.rename(columns={0:'location'},inplace =True)
issued.head()

In [None]:
# remove the parenthesis
issued['location']=issued.location.str.replace('(','')

In [None]:
issued['location']=issued.location.str.replace(')','')


In [None]:
# split by strings by comma and assign them to latitude and longitude
location=issued.location.str.split(', ', expand=True)

In [None]:
location.rename(columns={0:'Lat',1:'Long'},inplace=True)


In [None]:
issued_df=issued.join(location)

In [None]:
issued_df.head()

In [None]:
# drop unneccessary column 
#issued_df=issued_df.drop('location',axis=1)

In [None]:
#issued_df['Long']=pd.to_numeric(issued_df['Long'])

In [None]:
#issued_df['Lat']=pd.to_numeric(issued_df['Lat'])

In [None]:
#  split the location column  by comma
# issued['lat']=issued.location.str.split(',')

In [None]:
# covert Issued date column to datetime format and create a year column from the Issued date
issued['Issued']=pd.to_datetime(issued.Issued)

# create Year column
issued['Year']=issued['Issued'].dt.year

In [None]:
# check data type of location column
issued.location.dtype

In [None]:
# look what the data looks like
# Land: Land value
# IMPR: Improvement Values
# Total APPR: Total Appraised value 

SINGLE_FAMILY_2009.head()

sf2009=SINGLE_FAMILY_2009.rename(columns={'APN':'APN', 'DistrictCode':'DistrictCode', 'Council District':'DistrictNo', 'AddressFullAddress': 'Address', 'AddressCity':'City', 'AddressPostalCode':'Zip', '2009 LAND':'LandValue', '2009 IMPR':'ImprovementValue', '2009 TOTAL APPR':'AppraisedValue', '2009 TOTAL ASSD':'2009 TOTAL ASSD'})

In [None]:
# to check if there are zero values
sf2009.LandValue.min()

In [None]:
# rename column names
sf2013=SINGLE_FAMILY_2013.rename(columns={'APN':'APN', 'DistrictCode':'DistrictCode', 'Council District':'DistrictNo', 'AddressFullAddress': 'Address', 'AddressCity':'City', 'AddressPostalCode':'Zip', '2013 LAND':'LandValue', '2013 IMPR':'ImprovementValue', '2013 TOTAL APPR':'AppraisedValue', '2013 TOTAL ASSD':'2013 TOTAL ASSD'})

sf2017=SINGLE_FAMILY_2017.rename(columns={'APN':'APN', 'DistrictCode':'DistrictCode', 'Council District':'DistrictNo', 'AddressFullAddress': 'Address', 'AddressCity':'City', 'AddressPostalCode':'Zip', '2017 LAND':'LandValue', '2017 IMPR':'ImprovementValue', '2017 TOTAL APPR':'AppraisedValue', '2017 TOTAL ASSD':'2017 TOTAL ASSD'})


In [None]:
Lat_Long_Zip=pd.read_csv('US Zip Codes from 2013 Government Data')
Lat_Long_Zip.rename(columns={'zipcode':'Zip'},inplace=True)
Lat_Long_Zip.rename(columns={'ZIP':'Zip'},inplace=True)

In [None]:
# merge zip code data with 2009,213,2015,2017
sf2009=pd.merge(Lat_Long_Zip,sf2009,'inner',on='Zip')
sf2013=pd.merge(Lat_Long_Zip,sf2013,'inner',on='Zip')
sf2017=pd.merge(Lat_Long_Zip,sf2017,'inner',on='Zip')

In [None]:
sf2013.head()

In [None]:
sf2013.AppraisedValue.dtype


In [None]:
sf2013.AppraisedValue.mean()

In [None]:
# morgage formula
# P = L[r(1 + r)m]/[(1 + r)m - 1], P=monthly payment,r=interest rate per month,m =number of months
r=0.0475/12 # monthly interest rate
m=30*12 # number of months in 30 years

# calculate monthly spending 
sf2013['Monthly_Housing_Spending']=(sf2013.AppraisedValue-sf2013.AppraisedValue*0.05)*(r*(1+r)**m)/((1+r)**m-1)
sf2009['Monthly_Housing_Spending']=(sf2009.AppraisedValue-sf2009.AppraisedValue*0.05)*(r*(1+r)**m)/((1+r)**m-1)
sf2017['Monthly_Housing_Spending']=(sf2017.AppraisedValue-sf2017.AppraisedValue*0.05)*(r*(1+r)**m)/((1+r)**m-1)

In [None]:
sf2013['Monthly_Housing_Spending'].head()

In [None]:
sf2013_Zip=sf2013.loc[:,['Zip','Monthly_Housing_Spending']]

# 2013 Highest Housing Spending Zip Codes

* zip code 37205 and 37215 are top two highest housing spending areas(highest valued areas)
* These two zip codes are in  the South Nashville 
* https://goo.gl/maps/G4b3xSGxrP12
*  https://goo.gl/maps/eep7Z48U1R12

In [None]:
# calculate mean morgage by zip codes
sf2013_Zip.groupby('Zip').mean().sort_values('Monthly_Housing_Spending',ascending=False)
# top 10 highest morgage zip codes
highest_morgage_Zip=sf2013_Zip.groupby('Zip').mean().sort_values('Monthly_Housing_Spending',ascending=False).head(10)
highest_morgage_Zip

# 2013 lowest Housing Spending Zip Codes

* Majority of low valued/low spending areas are in East Nashville and Antioch
* zip code 37228 has the lowest yearly housing spending/morgage payment
*  which is located in North Nashville mainly warehouse areas
* https://goo.gl/maps/oHs6b7oFQQT2 

In [None]:
# bottom 10 morgage zip codes
lowest_morgage_Zip=sf2013_Zip.groupby('Zip').mean().sort_values('Monthly_Housing_Spending',ascending=False).tail(10)
lowest_morgage_Zip


In [None]:
sf2013_map=sf2013
sf2013['geometry'] = sf2013.apply(lambda x: Point((float(x.LNG), float(x.LAT))), axis=1)
sf2013_map.info()

In [None]:
sf2013_map = folium.Map(location = [36.05223,-86.632515], zoom_start = 8)

In [None]:
# let's check type of permit

issued.loc[:,['PermTypeDesc','PermSubtypeDesc','PermType']].head()

In [None]:
issued.PermType.unique()

In [None]:
# to calculate how many permits issued per year?

issued_select1= issued.loc[:,['Year','PermitNo','PermTypeDesc','PermSubtypeDesc','Issued','ConstCost','Zip']]

In [None]:
issued_select2=issued_select1.loc[:,['Year','Zip','PermitNo']]


In [None]:
issued_select2.head()

- Zip 37209 in 2017 has the highest number of permits issued

In [None]:
# find count of issued permits by zip code and year

issued_zip_yearly=issued_select2.groupby(['Zip','Year']).count()

issued_zip_yearly.rename(columns={'PermitNo':'count'},inplace=True)
issued_zip_yearly.sort_values('count',ascending=False).head()

In [None]:
# count number of building permits issued per year
issued_yearly=issued_select2.groupby('Year').count()
issued_yearly.rename(columns={'PermitNo':'count'},inplace=True)
issued_yearly.sort_values('count',ascending=False)

In [None]:
# the average construction cost $289901.5

In [None]:
# compute desctiptive statistics for coonstruction cost

issued_select1.ConstCost.describe()

In [None]:
2.899015*(10**5)

In [None]:
# find residential permits
residential=(issued.loc[issued['PermType'].isin(['CARN','CARR'])])

In [None]:
residential_Zip_Yearly=residential.loc[:,['Year','Zip','PermitNo']]

In [None]:
residential_Zip_Yearly.head()

- 37209, 37206 and 37013 are the top 3 zip codes where highest number of residential permits were issued.

In [None]:
# residential permits by zip code by year
residential_Zip_Year=residential_Zip_Yearly.groupby(['Zip','Year']).count()
residential_Zip_Year.sort_values('PermitNo',ascending=False)

In [None]:
residential_Yearly=residential.loc[:,['Year','PermitNo']]


In [None]:
residential_by_year=residential_Yearly.groupby('Year').count()


In [None]:
residential_by_year_sorted=residential_by_year.sort_values('PermitNo',ascending=False)
residential_by_year_sorted

In [None]:
residential_by_year.plot(kind='bar',color='darkred',legend=False)
plt.ylabel('Counts of Permits Issued')
plt.title('Number of permits issued between 2015-2018')

# The average cost for residential permits has been decreasing since 2015.

In [None]:
# construction cost by year residential permits
residential_select1=residential.loc[:,['Year','ConstCost']]
residential_select1=residential_select1.groupby('Year').mean().sort_values('ConstCost',ascending=False)
residential_select1

In [None]:
residential_select1.plot(kind='bar',color='lightblue')
plt.ylabel('Average_Construction_Cost')
plt.title('Average Cost of Construction Overtime')

In [None]:
# demo_residential=issued[(residential.Address) ISIN (demolished.Address)]

In [None]:
# find demolished permits
demolished=(issued.loc[issued['PermType']=='CADM'])
demolished.head()

In [None]:
demolished_select1=demolished.loc[:,['Year','PermitNo','PermType','Address','PermTypeDesc','PermSubtypeDesc','Issued','ConstCost','Zip']]

In [None]:
# see counts of demolishing permits issued by years and zip code
demolished_select2=demolished_select1.loc[:,['Year','Zip','PermitNo']]
demolished_select2=demolished_select2.groupby(['Zip','Year']).count()
demolished_select2.sort_values('PermitNo',ascending=False).head(20)

In [None]:
#actual new houses built sub type descritption

#['CAA01R301', 'CAA02R302', 'CAA03R298', 'CAA03R299', 'CAA03R398', 'CAA03R399']

# permits demolished and rebuilt

stypes = ['CAA01R301', 'CAA02R302', 'CAA03R298', 'CAA03R299', 'CAA03R398', 'CAA03R399']

homes = pd.DataFrame(issued.loc[issued.PermSubtype.isin(stypes)])
demos = issued.loc[issued.PermType == 'CADM']
rebuilt=homes.loc[homes.Address.isin(demos.Address)]

https://github.com/nss-data-science-cohort-2/data-question-4-affordable-housing-plaid-platypi-plaidypi.git


# income groups based on Area Median income(AMI) =$ 68K (for family of 4)
AMI=68000

income_cat_1=0.30* AMI
income_cat_2=60*AMI
income_cat_3=0.80*AMI
income_cat_4=1.20*AMI
income_cat_5>=1.20*AMI


# the families should not spend more than 30 % of their income in all categories
# where to find income data: census tract
# maximum spending on housing per each category

cat1_spend=68000*0.3*0.3
cat2_spend=68000*0.6*0.3
cat3_spend=68000*0.8*0.3
cat4_spend=68000*1.2*0.3




In [None]:
# calculate categories by income

AMI_2017=68000

AMI_2009=64900
AMI_2013=62300

income_cat_1_2017=0.30* AMI_2017
income_cat_2_2017=0.60*AMI_2017
income_cat_3_2017=0.80*AMI_2017
income_cat_4_2017=1.20*AMI_2017

income_cat_1_2013=0.30* AMI_2013
income_cat_2_2013=0.60*AMI_2013
income_cat_3_2013=0.80*AMI_2013
income_cat_4_2013=1.20*AMI_2013

income_cat_1_2009=0.30* AMI_2009
income_cat_2_2009=0.60*AMI_2009
income_cat_3_2009=0.80*AMI_2009
income_cat_4_2009=1.20*AMI_2009


In [None]:
print(income_cat_1_2017)
print(income_cat_2_2017)
print(income_cat_3_2017)
print(income_cat_4_2017)

In [None]:
# calulate housing spending categories

cat1_spend_2017=(income_cat_1_2017*0.3)/12
cat2_spend_2017=(income_cat_2_2017*0.3)/12
cat3_spend_2017=(income_cat_3_2017*0.3)/12
cat4_spend_2017=(income_cat_4_2017*0.3)/12

cat1_spend_2013=(income_cat_1_2013*0.3)/12
cat2_spend_2013=(income_cat_2_2013*0.3)/12
cat3_spend_2013=(income_cat_3_2013*0.3)/12
cat4_spend_2013=(income_cat_4_2013*0.3)/12

cat1_spend_2009=(income_cat_1_2009*0.3)/12
cat2_spend_2009=(income_cat_2_2009*0.3)/12
cat3_spend_2009=(income_cat_3_2009*0.3)/12
cat4_spend_2009=(income_cat_4_2009*0.3)/12


In [None]:
print(cat1_spend_2017)
print(cat2_spend_2017)
print(cat3_spend_2017)
print(cat4_spend_2017)

In [None]:
sf2013.head()

# create backets 
def backet(x):
    if x<=cat1_spend:
        return 'cat1_spending'
    elif x<=cat2_spend:
         return 'cat2_spending'
    elif x<=cat3_spend:
        return 'cat3_spending'
    elif x<=cat4_spend:
        return 'cat4_spending'
    else:
        return 'Cat5_spending'
     

In [None]:
# create backets for 2009

def backet_2009(x):
    if x<=cat2_spend_2009:
        return 'affordable_housing'
    elif x<=cat4_spend_2009:
         return 'workforce_housing'
    else:
        return 'market_rate'
     

In [None]:
# create backets for 2013

def backet_2013(x):
    if x<=cat2_spend_2013:
        return 'affordable_housing'
    elif x<=cat4_spend_2013:
         return 'workforce_housing'
    else:
        return 'market_rate'

In [None]:
# create backets for 2017

def backet_2017(x):
    if x<=cat2_spend_2017:
        return 'affordable_housing'
    elif x<=cat4_spend_2017:
         return 'workforce_housing'
    else:
        return 'market_rate'

In [None]:
# assing the backets to sf2013,sf2009,sf2017

sf2013['backets']=sf2013.Monthly_Housing_Spending.apply(backet_2013)

sf2009['backets']=sf2009.Monthly_Housing_Spending.apply(backet_2009)

sf2017['backets']=sf2017.Monthly_Housing_Spending.apply(backet_2017)

In [None]:
sf2013.head()

In [None]:
# backets for 2013
# create backet 1
supply2013_backet1=sf2013[sf2013['backets']=='affordable_housing']
# create backet 2
supply2013_backet2=sf2013[sf2013['backets']=='workforce_housing']
# create backet 3
supply2013_backet3=sf2013[sf2013['backets']=='market_rate']


In [None]:
# backets for 2009
# create backet 1
supply2009_backet1=sf2009[sf2009['backets']=='affordable_housing']
# create backet 2
supply2009_backet2=sf2009[sf2009['backets']=='workforce_housing']
# create backet 3
supply2009_backet3=sf2009[sf2009['backets']=='market_rate']


In [None]:
# backets for 2017

# create backet 1
supply2017_backet1=sf2017[sf2017['backets']=='affordable_housing']
# create backet 2
supply2017_backet2=sf2017[sf2017['backets']=='workforce_housing']
# create backet 3
supply2017_backet3=sf2017[sf2017['backets']=='market_rate']



In [None]:
supply2013_backet3.head()

In [None]:
# 2009_AMI=64900, 2013_AMI=62300

# earning by zip code backets as demand
# housing costs by backets as supply (2009,2013,2017)

In [None]:
# import income by zip codes data for Tennessee
# NaN for all columns are becuase of blanck rows in the dataset. 
income_by_zip_2009=pd.read_excel('09zp43tn.xls',skiprows=3)
income_by_zip_2009.tail()

In [None]:
income_by_zip_2009.columns

In [None]:
# rename columns
income_by_zip_2009.rename(columns={'ZIP\ncode [1]':'Zip','Size of adjusted gross income':'IncomeBacket','Number of returns':'NumberofPeople','Adjusted gross income (AGI)':'AGI'},inplace=True)
income_by_zip_2009.head()

In [None]:
# remove blank rows and sub total per Zip Code
# AGI is in thousands of dollars
income_by_zip_2009=income_by_zip_2009.dropna()
income_by_zip_2009.tail()

In [None]:
income_by_zip_2009=income_by_zip_2009[income_by_zip_2009.Zip!='99999']

In [None]:
# exclude people earning 200,0000 or more
income_by_zip_2009=income_by_zip_2009[income_by_zip_2009['IncomeBacket']!='$200,000 or more']

# AGI is in thousands of dollars
income_by_zip_2009['AGI']=income_by_zip_2009['AGI']*1000

In [None]:
# create monthly average adjusted gross income by zip code
income_by_zip_2009['AGI']=1/12*(income_by_zip_2009['AGI']/income_by_zip_2009['NumberofPeople'])
income_by_zip_2009.head()

In [None]:
# we used 2016 data for 2017, there is no 2017 data on IRS site

# import income by zip codes data for Tennessee 
# NaN for all columns are becuase of blanck rows in the dataset. 
income_by_zip_2017=pd.read_excel('16zp43tn.xls',skiprows=3)
# print the head
income_by_zip_2017.head()

In [None]:
# see column names
income_by_zip_2017.columns

In [None]:
# rename columns
income_by_zip_2017.rename(columns={'ZIP\ncode [1]':'Zip','Number of returns':'NumberofPeople','Size of adjusted gross income':'IncomeBacket','Adjusted gross income (AGI) [4]':'AGI'},inplace=True)
income_by_zip_2017.head()

In [None]:
income_by_zip_2017.columns

In [None]:
# remove blank rows and sub total per Zip Code
# AGI is in thousands of dollars 
# income_by_zip_2017[income_by_zip_2017.IncomeBacket!='NaN']
income_by_zip_2017=income_by_zip_2017.dropna()

In [None]:
income_by_zip_2017=income_by_zip_2017[income_by_zip_2017.Zip!='99999']

In [None]:
income_by_zip_2017.head()

In [None]:
# exclude people earning 200,0000 or more
income_by_zip_2017=income_by_zip_2017[income_by_zip_2017['IncomeBacket']!='$200,000 or more']

# AGI is in thousands of dollars
income_by_zip_2017['AGI']=income_by_zip_2017['AGI']*1000
    
# create monthly average adjusted gross income by zip code
income_by_zip_2017['AGI']=1/12*(income_by_zip_2017['AGI']/income_by_zip_2017['NumberofPeople'])
income_by_zip_2017.head()

In [None]:
AMI_17=68000/12

AMI_09=64900/12
AMI_13=62300/12

income_cat_1_17=0.30* AMI_17
income_cat_2_17=0.60*AMI_17
income_cat_3_17=0.80*AMI_17
income_cat_4_17=1.20*AMI_17

income_cat_1_13=0.30* AMI_13
income_cat_2_13=0.60*AMI_13
income_cat_3_13=0.80*AMI_13
income_cat_4_13=1.20*AMI_13

income_cat_1_09=0.30* AMI_09
income_cat_2_09=0.60*AMI_09
income_cat_3_09=0.80*AMI_09
income_cat_4_09=1.20*AMI_09


In [None]:

# create income backets for 2017

def backet_2017_income(x):
    if x<=income_cat_2_17:
        return 'low_income'
    elif x<=income_cat_4_17:
         return 'medium_income'
    else:
        return 'high_income'

In [None]:
# create income backets for 2013

def backet_2013_income(x):
    if x<=income_cat_2_13:
        return 'low_income'
    elif x<=income_cat_4_13:
         return 'medium_income'
    else:
        return 'high_income'

In [None]:
# create income backets for 2009

def backet_2009_income(x):
    if x<=income_cat_2_09:
        return 'low_income'
    elif x<=income_cat_4_09:
         return 'medium_income'
    else:
        return 'high_income'

In [None]:
# sf2013['backets']=sf2013.Monthly_Housing_Spending.apply(backet_2013)

income_by_zip_2009['backets']=income_by_zip_2009.AGI.apply(backet_2009_income)

income_by_zip_2017['backets']=income_by_zip_2017.AGI.apply(backet_2017_income)
# we use the same income we imported for 2017 for 2013 becuase the IRS site is giving error to download 2013 data.
income_by_zip_2013=income_by_zip_2017
income_by_zip_2013['backets']=income_by_zip_2013.AGI.apply(backet_2013_income)

In [None]:
income_by_zip_2017.head()

In [None]:
# create backet 1 for demand
demand2017_backet1=income_by_zip_2017[income_by_zip_2017['backets']=='low_income']
# create backet 2 for demand
demand2017_backet2=income_by_zip_2017[income_by_zip_2017['backets']=='medium_income']
# create backet 3 for demand
demand2017_backet3=income_by_zip_2017[income_by_zip_2017['backets']=='high_income']

In [None]:
# create backet 1 for demand
demand2013_backet1=income_by_zip_2017[income_by_zip_2017['backets']=='low_income']
# create backet 2 for demand
demand2013_backet2=income_by_zip_2017[income_by_zip_2017['backets']=='medium_income']
# create backet 3 for demand
demand2013_backet3=income_by_zip_2013[income_by_zip_2013['backets']=='high_income']

In [None]:
demand2017_backet2.head()

In [None]:
# create backet 1 for demand
demand2009_backet1=income_by_zip_2009[income_by_zip_2009['backets']=='low_income']
# create backet 2 for demand
demand2009_backet2=income_by_zip_2009[income_by_zip_2009['backets']=='medium_income']
# create backet 3 for demand
demand2009_backet3=income_by_zip_2009[income_by_zip_2009['backets']=='high_income']

In [None]:
demand2017_backet1.head()

In [None]:
print(demand2017_backet1.NumberofPeople.sum());print(supply2017_backet1.backets.count())
print(demand2017_backet2.NumberofPeople.sum());print(supply2017_backet2.backets.count())
print(demand2017_backet3.NumberofPeople.sum());print(supply2017_backet3.backets.count())


In [None]:
print(demand2013_backet1.NumberofPeople.sum());print(supply2013_backet1.backets.count())
print(demand2013_backet2.NumberofPeople.sum());print(supply2013_backet2.backets.count())
print(demand2013_backet3.NumberofPeople.sum());print(supply2013_backet3.backets.count())

In [None]:
print(demand2009_backet1.NumberofPeople.sum());print(supply2009_backet1.backets.count())
print(demand2009_backet2.NumberofPeople.sum());print(supply2009_backet2.backets.count())
print(demand2009_backet3.NumberofPeople.sum());print(supply2009_backet3.backets.count())

In [None]:
demand2009_backet1.NumberofPeople.sum()

In [None]:
sf2013_df=sf2013.head()

In [None]:
supply2009_backet1.head()
demand2009_backet1.head()

In [None]:
#demand and suppy by zip code in 2009 for backet 1

demand_2009_b1=demand2009_backet1.loc[:,['Zip','NumberofPeople']].groupby('Zip').sum().sort_values('NumberofPeople',ascending=False)
supply_2009_b1= supply2009_backet1.loc[:,['Zip','backets']].groupby('Zip').count().sort_values('backets',ascending=False)

In [None]:
# join dd and ss data for 2009 backet1
DD_SS_2009_Backet1=demand_2009_b1.join(supply_2009_b1)
DD_SS_2009_Backet1.rename(columns={'NumberofPeople':'demand_b1_09','backets':'supply_b1_09'},inplace=True)
DD_SS_2009_Backet1.supply_b1_09.fillna(0,inplace=True)
DD_SS_2009_Backet1['surplus']=DD_SS_2009_Backet1.supply_b1_09-DD_SS_2009_Backet1.demand_b1_09
DD_SS_2009_Backet1.head()

# there was no surplus in 2009 in any zip code around Nashville

In [None]:
# find the surplus and shortage zip codes
surplus_2009_Backet1=DD_SS_2009_Backet1[DD_SS_2009_Backet1.surplus>0]
shortage_2009_Backet1=DD_SS_2009_Backet1[DD_SS_2009_Backet1.surplus<0]
non_zero_supply_b1_09=DD_SS_2009_Backet1[DD_SS_2009_Backet1.supply_b1_09>0]
print(surplus_2009_Backet1.shape)
print(shortage_2009_Backet1.shape)

- only Zip codes where affordable houses were reported included in this plot
- zip codes around the downtown have lower less shortage 
- however, this could be buecuase number of residents around there as it is commericial area

In [None]:
# top 20 zips with non zero supplies
top_20_suppy_b1_09=non_zero_supply_b1_09.sort_values('supply_b1_09',ascending=False)
top_20_suppy_b1_09.head(20)
top_20_suppy_b1_09.plot(kind='bar')

In [None]:


print(demand2009_backet1.NumberofPeople.sum());print(supply2009_backet1.backets.count())
print(demand2009_backet2.NumberofPeople.sum());print(supply2009_backet2.backets.count())
print(demand2009_backet3.NumberofPeople.sum());print(supply2009_backet3.backets.count())

In [None]:
sf2013_map=sf2013_df
sf2013_map=sf2013_map.dropna(subset=['LAT'])
sf2013['geometry'] = sf2013_df.apply(lambda x: Point((float(x.LNG), float(x.LAT))), axis=1)
sf2013_map.info()

In [None]:
nashville_map = folium.Map(location = [36.1627, -86.7816], zoom_start = 8)

In [None]:
sf2013_map.head(2)

for row in sf2013_map.iterrows():
    row_values = row[1] 
    location = [row_values['LAT'], row_values['LNG']]
    popup = 'Zip Code: ' + str(row_values['Zip']) + '<br/>' +  'City: ' + str(row_values['City']) + '<br/>' + 'Housing_Spending: ' + str(row_values['Monthly_Housing_Spending'])+ '<br/>'+ 'backets: ' + str(row_values['backets'])+'<br/>'+ 'LandValue: ' + str(row_values['LandValue'])
    Housing_Spending  = row_values['Monthly_Housing_Spending']
    LandValue = row_values['LandValue']
    #marker = folium.Marker(location = location, popup = popup)
    if row_values['Monthly_Housing_Spending']>0:
        marker = folium.Marker(location=location, popup=popup).add_to(nashville_map)
        marker.add_to(nashville_map)
    if row_values['LandValue'] > 0:
        
        marker = folium.Marker(location=location,popup=popup).add_to(nashville_map)
        marker.add_to(nashville_map)
           
        
# Show map

nashville_map


In [None]:
# for demand and supply prediction
# use average single family house value in 2013 in Tennesse
# find coefficient by calculating the change in demand betweeen 2013 and 2017
# use that coefficient and predict for 2017