In [25]:
import pandas as pd
import ast
from geopy.distance import geodesic
import math
import numpy as np
from get_bounds import *
from pathlib import Path

### Dataset Retrieval and Preprocessing

In [19]:
def getTCDSBSchools():
    df = pd.read_csv('../res/datasets/schools/TCDSB_Schools.csv', sep = ',')
    coordinates = df.drop(['_id', 'OBJECTID', 'TCDSB_CODE', 'SCH_NAME','ADDRESS_FULL', 'MUNICIPALITY', 'POSTAL_CODE', 'TCDSB_LEVEL', 'STATUS'], axis = 1)
    coordinates = coordinates['geometry'].astype('str')
    coordinates = coordinates.apply(lambda x: ast.literal_eval(x))
    coordinates = coordinates.apply(pd.Series)
    lat = coordinates['coordinates'].apply(lambda x: x[1])
    long = coordinates['coordinates'].apply(lambda x: x[0])
    return pd.DataFrame(data={'name':df['SCH_NAME'], 'type': 'tcdsb', 'lat': lat, 'long': long})

tcdsbSchools = getTCDSBSchools()

In [35]:
def getRestaurants():
    df = pd.read_csv('../res/datasets/csv/restaurant.csv', sep = ',', header=None, names=['lat', 'long', 'name'])
    df['type'] = 'restaurant'
    return df

restaurants = getRestaurants()

In [36]:
restaurants

Unnamed: 0,lat,long,name,type
0,43.660201,-79.433188,The Black Briik,restaurant
1,43.754385,-79.215960,Prague Restaurant,restaurant
2,43.681487,-79.425824,Pi Co.,restaurant
3,43.655674,-79.409891,Harry's Charbroiled,restaurant
4,43.651706,-79.409605,The Fourth Man in the Fire,restaurant
...,...,...,...,...
2769,43.693111,-79.333620,Brunch On Cosburn Ave,restaurant
2770,43.692931,-79.334458,Sophie's Sports Bar & Grill,restaurant
2771,43.783739,-79.253209,Freshii,restaurant
2772,43.783710,-79.253151,Thai Room,restaurant


### Scoring Parameters

In [37]:
STD_DEV = 0.6
PT_DENSITY = 50
SCORE_MIN_THRESHOLD = 0.1

### Scoring Functions

In [14]:
def getDistance(origin, stationCor):
    return abs(geodesic(origin, stationCor).meters/1000)

In [23]:
def score(latStation, longStation, latOrigin, longOrigin, stdDev):

    stationCor = (latStation, longStation)

    totalScore = 0

    for Lat, Long in zip(latOrigin, longOrigin):
        origin = (Lat, Long)
        r = getDistance(origin, stationCor)
        # this score function simply uses the traditional gaussian distribution formula,
        # except it extracts all the common parts to be multiplied last for efficiency.
        score =  math.exp(-0.5*((r**2) / (stdDev**2)))
        totalScore += score

    #adjustFactor = 210 * 0.9974
    #totalScore = (totalScorePre/adjustFactor)*100

    # Multiply common part in gaussian distribution formula
    totalScore *= (1/(stdDev * math.sqrt(2 * math.pi)))

    return totalScore

In [17]:
def generateGridScores(poi, upperLatBound, bottomLatBound, leftLongBound, rightLongBound):
    gridPoints = pd.DataFrame(columns = ['lat', 'long', 'score'])
    latStep = round((upperLatBound-bottomLatBound)/PT_DENSITY, 4)
    longStep = round((rightLongBound-leftLongBound)/PT_DENSITY, 4)

    for lat in np.arange(bottomLatBound, upperLatBound, latStep):
        for long in np.arange(leftLongBound, rightLongBound, longStep):
            pointScore = score(lat, long, poi['lat'], poi['long'], STD_DEV);

            pointScore = pointScore if pointScore > SCORE_MIN_THRESHOLD else 0.0;

            pointScoreDict = {'lat' : [lat], 'long' : [long], 'score' : [pointScore]};
            pointScoreDf = pd.DataFrame(data=pointScoreDict)
            gridPoints = pd.concat([gridPoints, pointScoreDf], ignore_index = True)

    return gridPoints

### Score

In [47]:
pointsOfInterest = pd.concat([tcdsbSchools, restaurants])

In [48]:
[YYZ_UpperLatBound, YYZ_BottomLatBound, YYZ_LeftLongBound, YYZ_RightLongBound] = getBounds(pointsOfInterest['lat'], pointsOfInterest['long'], 4)

In [49]:
YYZ_GridPoints = generateGridScores(pointsOfInterest, YYZ_UpperLatBound, YYZ_BottomLatBound, YYZ_LeftLongBound, YYZ_RightLongBound)

### Export

In [50]:
def exportScoresToCsv(gridPoints, name):
    filepath = Path('../res/grid_points/' + name + '.csv')
    filepath.parent.mkdir(parents=True, exist_ok=True)
    gridPoints.to_csv(filepath)

In [51]:
def exportDataFrameToCoordsTxt(dataFrame, name):
    with open(name + '_coords.txt', 'w') as f:
        for lat in dataFrame['lat']:
            f.write(str(lat))
            f.write(',')
    f.write("\n")
    for long in dataFrame['long']:
        f.write(str(long))
        f.write(',')

In [52]:
exportScoresToCsv(YYZ_GridPoints, 'yyz_grid_points_1')