In [1]:
import pandas as pd
import ast
from geopy.distance import geodesic
import math
import numpy as np
from get_bounds import *
from pathlib import Path

### Dataset Retrieval and Preprocessing

In [2]:
def getTCDSBSchools():
    df = pd.read_csv('../res/datasets/schools/TCDSB_Schools.csv', sep = ',')
    coordinates = df.drop(['_id', 'OBJECTID', 'TCDSB_CODE', 'SCH_NAME','ADDRESS_FULL', 'MUNICIPALITY', 'POSTAL_CODE', 'TCDSB_LEVEL', 'STATUS'], axis = 1)
    coordinates = coordinates['geometry'].astype('str')
    coordinates = coordinates.apply(lambda x: ast.literal_eval(x))
    coordinates = coordinates.apply(pd.Series)
    lat = coordinates['coordinates'].apply(lambda x: x[1])
    long = coordinates['coordinates'].apply(lambda x: x[0])
    return pd.DataFrame(data={'name':df['SCH_NAME'], 'type': 'tcdsb', 'lat': lat, 'long': long})

tcdsbSchools = getTCDSBSchools()

In [3]:
def getRestaurants():
    df = pd.read_csv('../res/datasets/csv/restaurant.csv', sep = ',')
    return df

restaurants = getRestaurants()

In [4]:
tcdsbSchools

Unnamed: 0,name,type,lat,long
0,Annunciation,tcdsb,43.753351,-79.321268
1,All Saints,tcdsb,43.685618,-79.527163
2,St. Martin Program,tcdsb,43.667175,-79.364426
3,Bishop Marrocco-Thomas Merton,tcdsb,43.655858,-79.451808
4,Bishop Allen Academy,tcdsb,43.634428,-79.504465
...,...,...,...,...
205,Our Lady of Perpetual Help,tcdsb,43.687298,-79.383305
206,St Josaphat,tcdsb,43.599299,-79.509060
207,North East SAL Program,tcdsb,43.809351,-79.316603
208,East Facilities,tcdsb,43.735510,-79.324065


### Scoring Parameters

In [5]:
STD_DEV = 0.6
PT_DENSITY = 50
SCORE_MIN_THRESHOLD = 0.1

### Scoring Functions

In [6]:
def getDistance(origin, stationCor):
    return abs(geodesic(origin, stationCor).meters/1000)

In [7]:
def score(latStation, longStation, latOrigin, longOrigin, stdDev):

    stationCor = (latStation, longStation)

    totalScore = 0

    for Lat, Long in zip(latOrigin, longOrigin):
        origin = (Lat, Long)
        r = getDistance(origin, stationCor)
        # this score function simply uses the traditional gaussian distribution formula,
        # except it extracts all the common parts to be multiplied last for efficiency.
        score =  math.exp(-0.5*((r**2) / (stdDev**2)))
        totalScore += score

    #adjustFactor = 210 * 0.9974
    #totalScore = (totalScorePre/adjustFactor)*100

    # Multiply common part in gaussian distribution formula
    totalScore *= (1/(stdDev * math.sqrt(2 * math.pi)))

    return totalScore

In [8]:
def generateGridScores(poi, upperLatBound, bottomLatBound, leftLongBound, rightLongBound):
    gridPoints = pd.DataFrame(columns = ['lat', 'long', 'score'])
    latStep = round((upperLatBound-bottomLatBound)/PT_DENSITY, 4)
    longStep = round((rightLongBound-leftLongBound)/PT_DENSITY, 4)

    for lat in np.arange(bottomLatBound, upperLatBound + latStep, latStep):
        for long in np.arange(leftLongBound, rightLongBound + longStep, longStep):
            pointScore = score(lat, long, poi['lat'], poi['long'], STD_DEV);

            pointScore = pointScore if pointScore > SCORE_MIN_THRESHOLD else 0.0;

            pointScoreDict = {'lat' : [lat], 'long' : [long], 'score' : [pointScore]};
            pointScoreDf = pd.DataFrame(data=pointScoreDict)
            gridPoints = pd.concat([gridPoints, pointScoreDf], ignore_index = True)

    return gridPoints

### Score

In [9]:
[YYZ_UpperLatBound, YYZ_BottomLatBound, YYZ_LeftLongBound, YYZ_RightLongBound] = getBounds(tcdsbSchools['lat'], tcdsbSchools['long'], 4)

In [10]:
YYZ_GridPoints = generateGridScores(tcdsbSchools, YYZ_UpperLatBound, YYZ_BottomLatBound, YYZ_LeftLongBound, YYZ_RightLongBound)

### Export

In [11]:
def exportScoresToCsv(gridPoints, name):
    filepath = Path('../res/grid_points/' + name + '.csv')
    filepath.parent.mkdir(parents=True, exist_ok=True)
    gridPoints.to_csv(filepath)

In [12]:
def exportDataFrameToCoordsTxt(dataFrame, name):
    with open(name + '_coords.txt', 'w') as f:
        for lat in dataFrame['lat']:
            f.write(str(lat))
            f.write(',')
    f.write("\n")
    for long in dataFrame['long']:
        f.write(str(long))
        f.write(',')

In [13]:
exportScoresToCsv(YYZ_GridPoints, 'yyz_grid_points')