# Data Prediction

This notebook is used to predict on the given values for the saved trained model.

In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from math import sin, cos, sqrt, atan2, radians

## Methods

In [2]:
def loadModel(name):
    '''
    function to load model object with the given name.
    '''
    with open('store/'+name+'_model.pkl', 'rb') as f:
        model = pickle.load(f)
    return model

In [3]:
def discretizeTime(time):
    '''
    function to map 24-hours format time to one of the six 4 hours intervals.
    '''
    if '00:00' <= time < '04:00':
        return 't1'
    elif '04:00' <= time < '08:00':
        return 't2'
    elif '08:00' <= time < '12:00':
        return 't3'
    elif '12:00' <= time < '16:00':
        return 't4'
    elif '16:00' <= time < '20:00':
        return 't5'
    elif '20:00' <= time < '24:00':
        return 't6'

In [4]:
def calculateDistance(src, dst):
    '''
    function to calculate the distance between two locations on earth
    using src & dst tuples given in the format (latitude, longitude).
    '''
    # approximate radius of earth in km
    R = 6373.0

    # approximate 1 km to miles conversion
    to_miles = 0.621371

    lat1 = radians(abs(src[0]))
    lon1 = radians(abs(src[1]))
    lat2 = radians(abs(dst[0]))
    lon2 = radians(abs(dst[1]))

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    return R * c * to_miles

In [5]:
def isNear(location, data, radius):
    '''
    function to determine if the given location (latitude, longitude)
    is near to any location in the given data (dataframe) based on the given radius.
    '''
    for index, row in data.iterrows():
        if calculateDistance(location, (row['latitude'], row['longitude'])) <= radius:
            return 1
    return 0

In [6]:
def loadLabelEncoder(column):
    '''
    function to load label encoder object for the given column.
    '''
    with open('store/'+column+'_label_encoder.pkl', 'rb') as f:
        le = pickle.load(f)
    return le

In [7]:
def createDataPoint(day, district, longitude, latitude, month, time):
    columns = [
        loadLabelEncoder('day').transform(np.array([day]))[0],
        loadLabelEncoder('district').transform(np.array([district]))[0],
        longitude,
        latitude,
        loadLabelEncoder('month').transform(np.array([month]))[0],
        loadLabelEncoder('time_interval').transform(np.array([discretizeTime(time)]))[0],
        0,
        isNear((latitude, longitude), pd.read_pickle('store/facilities.pkl', compression='gzip'), 1),
        isNear((latitude, longitude), pd.read_pickle('store/private_spaces.pkl', compression='gzip'), 1),
        isNear((latitude, longitude), pd.read_pickle('store/colleges.pkl', compression='gzip'), 1),
        isNear((latitude, longitude), pd.read_pickle('store/public_open_spaces.pkl', compression='gzip'), 1),
        isNear((latitude, longitude), pd.read_pickle('store/commuter_stops.pkl', compression='gzip'), 1),
        isNear((latitude, longitude), pd.read_pickle('store/public_park.pkl', compression='gzip'), 1),
        isNear((latitude, longitude), pd.read_pickle('store/landmarks.pkl', compression='gzip'), 1),
        isNear((latitude, longitude), pd.read_pickle('store/schools.pkl', compression='gzip'), 1)
    ]
    return np.array(columns).reshape(1, -1)

In [8]:
def getLabel(pred):
    return loadLabelEncoder('label').inverse_transform(pred)[0]

## Model

In [9]:
createDataPoint('sunday', 'tenderloin',-122.414406, 37.784191, 'february', '02:00')

array([[   3.      ,    9.      , -122.414406,   37.784191,    3.      ,
           0.      ,    0.      ,    1.      ,    1.      ,    1.      ,
           1.      ,    1.      ,    1.      ,    1.      ,    1.      ]])

In [10]:
getLabel(np.array([0]))

  if diff:


'high'