# Coordinates and Location Scores

In this notebook, I will include longitude, latitude, and location scores to enhance predictive accuracy.

In [18]:
import requests
import pandas as pd
df = pd.read_csv('pre_clean_data.csv')
df

Unnamed: 0,name,price,address,Type,nbeds,nbath,laundry,heating,parking,air_conditioner,furnished,animal_friendly,Area
0,3 beds 3 baths townhouse,3300,"kelowna, bc, v1y 5t5",Townhouse,3,3,in-unit laundry,gas heating,garage parking,central ac,unfurnished,No pets,1371
1,2 beds 1 bath house,2000,"west kelowna, bc, v4t",House,2,1,in-unit laundry,Not specified,Not specified,Not specified,Not specified,dog and cat friendly,914
2,1 bed 1 bath apartment,1250,"kelowna, bc, v1v",Apartment/Condo,1,1,laundry in building,Not specified,parking available,Not specified,unfurnished,No pets,910
3,1 bedroom available in 2 bedroom basement suite,650,"west kelowna, bc",Room,1,1,in-unit laundry,central heating,street parking,Not specified,unfurnished,No pets,150
4,3 beds 1 bath house,2000,"kelowna, bc, v1v 1r1",House,3,1,Not specified,Not specified,Not specified,Not specified,Not specified,No pets,1200
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1258,one bedroom in 3 bed 3 bath condo,1000,"kelowna, bc, v1v 0a5",Room,1,1,Not specified,Not specified,Not specified,Not specified,unfurnished,No pets,150
1259,come stay with us pet friendly kelowna cottage,1111,"central okanagan, bc, v1z 3r8",Room,1,1,Not specified,Not specified,Not specified,Not specified,unfurnished,dog friendly,150
1260,room for rent available in rutland,850,"kelowna, bc",Room,1,1,Not specified,Not specified,Not specified,Not specified,Not specified,No pets,150
1261,room for rent for girls in rutland dm for more...,900,"kelowna, bc",Room,1,1,Not specified,Not specified,Not specified,Not specified,unfurnished,No pets,150


To eliminate duplicates, I created a new dataframe named "af_address" that consists of unique values from the "address" column.

In [2]:
df_address = df['address'].drop_duplicates()
df_address = pd.DataFrame(df_address)

The following code utilizes the MapQuest API to retrieve longitude and latitude data based on the information in the "address" column.

In [5]:
for index, row in df_address.iterrows():
    api_address = row['address']
    parameters = {
        "key": "API_KEY",
        "location": api_address
    }
    response = requests.get("http://www.mapquestapi.com/geocoding/v1/address", params=parameters)
    data = response.json()['results']
    lat = data[0]['locations'][0]['latLng']['lat']
    lng = data[0]['locations'][0]['latLng']['lng']
    df_address.at[index, 'lat'] = lat
    df_address.at[index, 'lng'] = lng

In [19]:
df_address = pd.read_csv('../../processed/location.csv')

In [4]:
df_address

Unnamed: 0,address,lat,lng,Walk Score,Bike Score,Transit Score
0,"kelowna, bc, v1y 5t5",49.87816,-119.47675,54,96,56
1,"west kelowna, bc, v4t",49.83173,-119.62367,43,49,-1
2,"kelowna, bc, v1v",49.94804,-119.42824,8,35,19
3,"west kelowna, bc",49.85902,-119.58413,6,23,-1
4,"kelowna, bc, v1v 1r1",49.91729,-119.44397,66,90,37
...,...,...,...,...,...,...
487,"kelowna, bc, v1y 2r7",49.86395,-119.48816,75,97,48
488,"penticton, bc, v2a 8t7",49.53912,-119.56932,0,28,-1
489,"west kelowna, bc, v4t 1v3",49.84618,-119.60449,14,24,-1
490,"kelowna, bc, v1v 2n2",49.90500,-119.46058,15,52,37


In [9]:
# function to get walk and bike scores
def get_walkscore(address, lat, lon):
    api_key = "API_KEY"
    url = f"https://api.walkscore.com/score?format=json&address={address}&lat={lat}&lon={lon}&transit=1&bike=1&wsapikey={api_key}"
    response = requests.get(url)
    data = response.json()
    return data


# add new columns for scores
df_address["Walk Score"] = ""
df_address["Bike Score"] = ""
df_address["Transit Score"] = ""

# loop through each row and get scores
for index, row in df_address.iterrows():
    address = row["address"]
    lat = row["lat"]
    lon = row["lng"]
    walk_bike_transit_scores = get_walkscore(address, lat, lon)
    if "walkscore" in walk_bike_transit_scores:
        df_address.at[index, "Walk Score"] = walk_bike_transit_scores["walkscore"]
    else:
        df_address.at[index, "Walk Score"] = -1
    if "bike" in walk_bike_transit_scores and "score" in walk_bike_transit_scores["bike"]:
        df_address.at[index, "Bike Score"] = walk_bike_transit_scores["bike"]["score"]
    else:
        df_address.at[index, "Bike Score"] = -1
    if "transit" in walk_bike_transit_scores and "score" in walk_bike_transit_scores["transit"]:
        df_address.at[index, "Transit Score"] = walk_bike_transit_scores["transit"]["score"]
    else:
        df_address.at[index, "Transit Score"] = -1

df_address

Unnamed: 0,address,lat,lng,Walk Score,Bike Score,Transit Score
0,"kelowna, bc, v1y 5t5",49.87816,-119.47675,54,96,56
1,"west kelowna, bc, v4t",49.83173,-119.62367,43,49,-1
2,"kelowna, bc, v1v",49.94804,-119.42824,8,35,19
3,"west kelowna, bc",49.85902,-119.58413,6,23,-1
4,"kelowna, bc, v1v 1r1",49.91729,-119.44397,66,90,37
...,...,...,...,...,...,...
487,"kelowna, bc, v1y 2r7",49.86395,-119.48816,75,97,48
488,"penticton, bc, v2a 8t7",49.53912,-119.56932,0,28,-1
489,"west kelowna, bc, v4t 1v3",49.84618,-119.60449,14,24,-1
490,"kelowna, bc, v1v 2n2",49.90500,-119.46058,15,52,37


Here I save all new data in the file called 'location.csv'.

In [10]:
df_address.to_csv("../../processed/location.csv", index = False)

In [20]:
df = df.merge(df_address, on = 'address')
# merge data to df

In [9]:
import folium
map1 = folium.Map(
    location=[49.9138315, -119.439960],
    tiles='cartodbpositron',
    zoom_start=12,
)
df.apply(lambda row:folium.CircleMarker(location=[row["lat"], row["lng"]]).add_to(map1), axis=1)
map1

Unfortunately, Github can't display the map, so I decided to paste the image here.

![Kelowna Map!](../../../images/map.png)

In [21]:
df.to_csv("../../processed/clean_kelowna_housing_data.csv", index = False)
df.to_excel('../../processed/clean_kelowna_housing_data.xlsx')