## Data Collection for Auckland House Prices Analysis

In [1]:
import json
import sys
sys.path.append('/home/nbuser/library')
import time
import pandas as pd
import requests

In [2]:
def get_population(coor):
    ''' Takes latitude and longitude coordinates as input, returns 2018 census population. '''
    
    time.sleep(0.2)
    
    url = 'https://koordinates.com/services/query/v1/vector.json'

    params = {
        'key': '9b1fa34ae901439e973320201766e864',
        'layer': '104612',
        'x': coor[1],
        'y': coor[0],
    }

    response = requests.get(url, params=params)
    
    if response.status_code != 200:
        return pd.Series({'Population': response.status_code})

    pop = response.json()['vectorQuery']['layers']['104612']['features'][0]['properties']['C18_CURPop']
    
    return pd.Series({'Population': pop})

In [3]:
df = pd.read_csv('Properties.csv')

In [4]:
df.head()

Unnamed: 0,Bedrooms,Bathrooms,Address,Land area,CV,Latitude,Longitude,SA1,0-19 years,20-29 years,30-39 years,40-49 years,50-59 years,60+ years,Suburbs
0,5,3.0,"106 Lawrence Crescent Hill Park, Auckland",714,960000,-37.01292,174.904069,7009770,48,27,24,21,24,21,Manurewa
1,5,3.0,"8 Corsica Way Karaka, Auckland",564,1250000,-37.063672,174.922912,7009991,42,18,12,21,15,30,Karaka
2,6,4.0,"243 Harbourside Drive Karaka, Auckland",626,1250000,-37.06358,174.924044,7009991,42,18,12,21,15,30,Karaka
3,2,1.0,"2/30 Hardington Street Onehunga, Auckland",65,740000,-36.912996,174.787425,7007871,42,6,21,21,12,15,Onehunga
4,3,1.0,"59 Israel Avenue Clover Park, Auckland",601,630000,-36.979037,174.892612,7008902,93,27,33,30,21,33,Clover Park


In [5]:
def combine_lat_lon(lat, lon):
    return (lat, lon)

In [6]:
df['Combined coordinates'] = df.apply(lambda x : combine_lat_lon(x['Latitude'], x['Longitude']), axis = 1)

In [7]:
df.head()

Unnamed: 0,Bedrooms,Bathrooms,Address,Land area,CV,Latitude,Longitude,SA1,0-19 years,20-29 years,30-39 years,40-49 years,50-59 years,60+ years,Suburbs,Combined coordinates
0,5,3.0,"106 Lawrence Crescent Hill Park, Auckland",714,960000,-37.01292,174.904069,7009770,48,27,24,21,24,21,Manurewa,"(-37.0129205, 174.90406940000003)"
1,5,3.0,"8 Corsica Way Karaka, Auckland",564,1250000,-37.063672,174.922912,7009991,42,18,12,21,15,30,Karaka,"(-37.0636724, 174.9229121)"
2,6,4.0,"243 Harbourside Drive Karaka, Auckland",626,1250000,-37.06358,174.924044,7009991,42,18,12,21,15,30,Karaka,"(-37.063579700000005, 174.9240444)"
3,2,1.0,"2/30 Hardington Street Onehunga, Auckland",65,740000,-36.912996,174.787425,7007871,42,6,21,21,12,15,Onehunga,"(-36.9129964, 174.78742490000002)"
4,3,1.0,"59 Israel Avenue Clover Park, Auckland",601,630000,-36.979037,174.892612,7008902,93,27,33,30,21,33,Clover Park,"(-36.979036900000004, 174.8926119)"


In [8]:
sample = df.head(5)
sample

Unnamed: 0,Bedrooms,Bathrooms,Address,Land area,CV,Latitude,Longitude,SA1,0-19 years,20-29 years,30-39 years,40-49 years,50-59 years,60+ years,Suburbs,Combined coordinates
0,5,3.0,"106 Lawrence Crescent Hill Park, Auckland",714,960000,-37.01292,174.904069,7009770,48,27,24,21,24,21,Manurewa,"(-37.0129205, 174.90406940000003)"
1,5,3.0,"8 Corsica Way Karaka, Auckland",564,1250000,-37.063672,174.922912,7009991,42,18,12,21,15,30,Karaka,"(-37.0636724, 174.9229121)"
2,6,4.0,"243 Harbourside Drive Karaka, Auckland",626,1250000,-37.06358,174.924044,7009991,42,18,12,21,15,30,Karaka,"(-37.063579700000005, 174.9240444)"
3,2,1.0,"2/30 Hardington Street Onehunga, Auckland",65,740000,-36.912996,174.787425,7007871,42,6,21,21,12,15,Onehunga,"(-36.9129964, 174.78742490000002)"
4,3,1.0,"59 Israel Avenue Clover Park, Auckland",601,630000,-36.979037,174.892612,7008902,93,27,33,30,21,33,Clover Park,"(-36.979036900000004, 174.8926119)"


In [9]:
sample['Population'] = sample['Combined coordinates'].apply(get_population)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [10]:
sample

Unnamed: 0,Bedrooms,Bathrooms,Address,Land area,CV,Latitude,Longitude,SA1,0-19 years,20-29 years,30-39 years,40-49 years,50-59 years,60+ years,Suburbs,Combined coordinates,Population
0,5,3.0,"106 Lawrence Crescent Hill Park, Auckland",714,960000,-37.01292,174.904069,7009770,48,27,24,21,24,21,Manurewa,"(-37.0129205, 174.90406940000003)",174
1,5,3.0,"8 Corsica Way Karaka, Auckland",564,1250000,-37.063672,174.922912,7009991,42,18,12,21,15,30,Karaka,"(-37.0636724, 174.9229121)",129
2,6,4.0,"243 Harbourside Drive Karaka, Auckland",626,1250000,-37.06358,174.924044,7009991,42,18,12,21,15,30,Karaka,"(-37.063579700000005, 174.9240444)",129
3,2,1.0,"2/30 Hardington Street Onehunga, Auckland",65,740000,-36.912996,174.787425,7007871,42,6,21,21,12,15,Onehunga,"(-36.9129964, 174.78742490000002)",120
4,3,1.0,"59 Israel Avenue Clover Park, Auckland",601,630000,-36.979037,174.892612,7008902,93,27,33,30,21,33,Clover Park,"(-36.979036900000004, 174.8926119)",231


#### Add population to dataframe

In [11]:
df['Population'] = df['Combined coordinates'].apply(get_population)

In [12]:
df.tail()

Unnamed: 0,Bedrooms,Bathrooms,Address,Land area,CV,Latitude,Longitude,SA1,0-19 years,20-29 years,30-39 years,40-49 years,50-59 years,60+ years,Suburbs,Combined coordinates,Population
1046,4,1.0,"19 Landscape Road, Auckland",1368 m²,670000,-36.899255,174.761165,7005464,54,18,15,24,21,27,Mount Eden,"(-36.899255, 174.7611645)",159
1047,6,1.0,"56 Galway Street, Auckland",607 m²,1200000,-36.844933,174.770001,7005497,15,27,24,15,18,30,Auckland Central,"(-36.844933000000005, 174.770001)",129
1048,5,3.0,"28A Hayr Road, Auckland",453 m²,1250000,-36.912242,174.756726,7007758,36,30,45,21,24,21,Three Kings,"(-36.9122424, 174.75672590000002)",180
1049,5,2.0,"27 Market Road, Auckland",1854 m²,5300000,-36.879665,174.787668,7005745,48,18,12,15,36,45,Remuera,"(-36.8796648, 174.7876684)",174
1050,3,1.0,"23 William Avenue, Auckland",806 m²,1665000,-36.897104,174.800171,7005917,54,33,27,27,15,30,Greenlane,"(-36.897104299999995, 174.8001712)",192


#### Save dataframe with population to csv

In [13]:
df.to_csv('add-population.csv')

#### Read Deprivation index data file

In [14]:
df2 = pd.read_excel('otago730395.xlsx')

In [15]:
df2.head()

Unnamed: 0,SA12018_code,NZDep2018,NZDep2018_Score,URPopnSA1_2018,SA22018_code,SA22018_name
0,7000000,10.0,1245.0,141,100100,North Cape
1,7000001,10.0,1245.0,114,100100,North Cape
2,7000002,,,0,100300,Inlets Far North District
3,7000003,10.0,1207.0,225,100100,North Cape
4,7000004,9.0,1093.0,138,100100,North Cape


In [16]:
sample.merge(df2[['NZDep2018', 'NZDep2018_Score', 'SA12018_code']], 'left', left_on='SA1', right_on='SA12018_code').drop('SA12018_code', axis=1)

Unnamed: 0,Bedrooms,Bathrooms,Address,Land area,CV,Latitude,Longitude,SA1,0-19 years,20-29 years,30-39 years,40-49 years,50-59 years,60+ years,Suburbs,Combined coordinates,Population,NZDep2018,NZDep2018_Score
0,5,3.0,"106 Lawrence Crescent Hill Park, Auckland",714,960000,-37.01292,174.904069,7009770,48,27,24,21,24,21,Manurewa,"(-37.0129205, 174.90406940000003)",174,6.0,997.0
1,5,3.0,"8 Corsica Way Karaka, Auckland",564,1250000,-37.063672,174.922912,7009991,42,18,12,21,15,30,Karaka,"(-37.0636724, 174.9229121)",129,1.0,881.0
2,6,4.0,"243 Harbourside Drive Karaka, Auckland",626,1250000,-37.06358,174.924044,7009991,42,18,12,21,15,30,Karaka,"(-37.063579700000005, 174.9240444)",129,1.0,881.0
3,2,1.0,"2/30 Hardington Street Onehunga, Auckland",65,740000,-36.912996,174.787425,7007871,42,6,21,21,12,15,Onehunga,"(-36.9129964, 174.78742490000002)",120,2.0,908.0
4,3,1.0,"59 Israel Avenue Clover Park, Auckland",601,630000,-36.979037,174.892612,7008902,93,27,33,30,21,33,Clover Park,"(-36.979036900000004, 174.8926119)",231,9.0,1091.0


#### Add Deprivation index to dataframe

In [20]:
df = df.merge(df2[['NZDep2018', 'NZDep2018_Score', 'SA12018_code']], 'left', left_on='SA1', right_on='SA12018_code').drop('SA12018_code', axis=1)

#### Save dataframe with Deprivation index to csv

In [21]:
df.to_csv('add-deprivation.csv')