## Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import geopy.distance
import matplotlib.pyplot as plt
from shapely.geometry import Point
import shapely

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import KFold

import matplotlib as mpl

from scipy import stats
from sklearn import metrics
import folium

import math

from descartes import PolygonPatch
import altair as alt

from pprint import pprint
from IPython.display import clear_output

# for the notebook only (not for JupyterLab) run this command once per session
alt.renderers.enable('notebook')

import warnings
warnings.filterwarnings('ignore')

## Data Paths

In [None]:
### Consts
datapath = '../rawdata/sensors/'
filename = datapath + 'nodes.txt'

## taxi regions
taxi_regions = '../assets/taxi_zones.zip'

## Reading taxi regions

In [None]:
taxi_regions_geodf = gpd.read_file('zip://../assets/taxi_zones.zip')
taxi_regions_geodf = taxi_regions_geodf[taxi_regions_geodf['borough'] == 'Manhattan']
taxi_regions_geodf = taxi_regions_geodf.to_crs({'init':'epsg:3857'})

In [None]:
map_osm = folium.Map(location=[40.742, -73.956], zoom_start=12, tiles="cartodbpositron")
folium.GeoJson(taxi_regions_geodf).add_to(map_osm)
display(map_osm)

## Join Sensor Position x Taxi Regions

In [None]:
f = open(filename)

sensors_geodf = gpd.GeoDataFrame(crs={'init': 'epsg:4326'}) 

for line in f:
    s, lat, lon = line.split(' ')
    
    lat = float(lat)
    lon = float(lon)
    
    sensor_point = shapely.geometry.Point(lon, lat)
    sensors_geodf = sensors_geodf.append({'geometry': sensor_point, 'sensorID':s}, ignore_index=True)        
    
sensors_geodf = sensors_geodf.to_crs({'init':'epsg:3857'})

intersection = gpd.tools.sjoin(sensors_geodf,taxi_regions_geodf, how='inner', op="within")

## Loading datasets

In [None]:
## loading taxi
taxi = pd.read_pickle('../data/taxi/taxi.pkl')
taxi = taxi['2018-01-01':'2018-05-01']

## Calculating Correlations

In [None]:
## training region
training_region = 79

taxi_regions = taxi_regions_geodf['LocationID'].values

correlations_dataframe = pd.DataFrame()

nOfRegions = taxi_regions.shape[0]

for region in taxi_regions:
    
    print(nOfRegions)
    
    ## filtering
    training_region_df = taxi[taxi['location'] == training_region]
    testing_region_df = taxi[taxi['location'] == region]
    
    ## resampling 
    training_region_df = training_region_df.resample('H').agg({'location': 'count'})
    testing_region_df = testing_region_df.resample('H').agg({'location': 'count'})
    

    training_region_df['testing'] = testing_region_df
    correlation = training_region_df.corr()
    correlation = correlation.iloc[0]['testing']
    
    correlations_dataframe = correlations_dataframe.append({
        'training_region': training_region,
        'testing_region':region,
        'correlation':correlation
    }, ignore_index=True)
    
    nOfRegions = nOfRegions - 1

In [None]:
correlations_dataframe

In [None]:
correlations_dataframe.sort_values(by='correlation', ascending=True)

norm = mpl.colors.Normalize(vmin=-0.067979,vmax=0.963370)

In [None]:
## testing
taxi_region = 79

sensors_in_region = dataframe[dataframe['LocationID'] == taxi_region]['sensorID']

testing_map = folium.Map(location=[40.742, -73.956], zoom_start=12, tiles="cartodbpositron")

folium.GeoJson(taxi_regions_geodf[taxi_regions_geodf['LocationID'] == taxi_region]).add_to(testing_map)
sensors_points = sensors_geodf[sensors_geodf['sensorID'].isin(sensors_in_region.values)]

sensors_points = sensors_points.to_crs({'init':'epsg:4326'})

plotMapPoints(sensors_points.iloc[0]['geometry'],testing_map)
plotMapPoints(sensors_points.iloc[1]['geometry'],testing_map)
plotMapPoints(sensors_points.iloc[2]['geometry'],testing_map)

display(testing_map)

## Helper Functions

### Geospatial Functions

In [None]:
nyc_boundaries = gpd.read_file('../data/nyc_boundaries/NYC_Boundaries.geojson')

def pointWithinCircle(point, circle):
    ## Return if a given point is within a circle
    c = (circle[0], circle[1])
    r = circle[2]
    dist = geopy.distance.distance(c, point).meters
    if dist <= r:
        return True

    return False

### Visualization Functions

In [None]:
def plotMapPoints(mapCoordGroups, map_osm):
    
#     for group in mapCoordGroups:
#         for coord in group:    

    folium.CircleMarker(
        location=[mapCoordGroups.y, mapCoordGroups.x],
        radius=5,
        fill=True,
        fill_opacity=0.5,
        fill_color="red",
        color="black").add_to(map_osm)
    
def plotCircleRegions(circleRegions, map_osm): 
    
    for circle in circleRegions: 
        
        circle_str = '('+  str(circle[0]) + ' , ' + str(circle[1]) + ')'
        
        folium.Circle(
            radius=5,
            location=[circle[0], circle[1]],
            popup='The Waterfront',
            color='crimson',
            fill=False,
            tooltip=circle_str).add_to(map_osm)

In [None]:
# areas_coords = [
#     (40.748203, -74.002728),
#     (40.744006, -73.990839),
#     (40.739258, -73.978691),
#     (40.730250, -73.981419),
#     (40.730999, -73.997317),
#     (40.736302, -74.003391),
#     (40.727475, -74.005537),
#     (40.722948, -73.995838),
#     (40.719485, -73.984144),
#     (40.718380, -74.006466),
#     (40.710166, -74.008929),
#     (40.714867, -73.993454),
#     (40.758490, -73.996376),
#     (40.754586, -73.985882),
#     (40.750930, -73.976355),
#     (40.760797, -73.967662),
#     (40.762536, -73.977212),
#     (40.765541, -73.986568)]