# Capstone proposal project to use taxi pickup data to find the best bars in NYC

## Load data and libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os
import folium
from folium import plugins
from folium.plugins import HeatMap
from folium.plugins import MarkerCluster

# Data source
# http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml
# yellow_tripdata = pd.read_csv("yellow_tripdata_2016-01.csv")
# yellow_loc_code = pd.read_csv("taxi+_zone_lookup.csv")

# I made a reduced dataset for the subsequent analysis
yellow_pickup_2_to_4 = pd.read_csv("data2.csv")

# Data source
# https://www.kaggle.com/somesnm/heatmap-of-pubs-and-bars-of-new-york-city/data
bar_location = pd.read_csv("bar_locations.csv")
man_bar_location = bar_location.loc[(bar_location["Borough"] == "MANHATTAN")]

## Make map of all bars and taxi pickups between the hours of 2AM and 4AM in NYC (Jan 2016)

In [None]:
NYC = (40.729861, -73.988)
f = folium.Figure()
f.html.add_child(\
    folium.Element("<h1>Can we use taxi pickups (moving red dots) " + \
                   "to predict the popularity " + \
                   "of nearby bars (map markers)? " + \
                   "(Taxi pickups occur at 2-4AM during Jan 2016) ~JLai</h1>"))
map_of_nyc = folium.Map(location=NYC, zoom_start=12, max_zoom=18)
array = []
for i,j in zip(man_bar_location["Latitude"],man_bar_location["Longitude"]):
    array.append([i,j])
cluster = MarkerCluster(array,overlay="bar").add_to(map_of_nyc)

heat_data = []
for i in range(1,32):
    tmp = yellow_pickup_2_to_4.loc[yellow_pickup_2_to_4["date"] == i]
    puLon = tmp["pickup_longitude"]
    puLat = tmp["pickup_latitude"]
    tmp2 = []
    for i,j in zip(puLat,puLon):
        tmp2.append([float(i),float(j)])
    heat_data.append(tmp2)


hm = plugins.HeatMapWithTime(heat_data,auto_play=True,max_opacity=0.4)
hm.add_to(map_of_nyc)
f.add_child(map_of_nyc)
f.save('Vis1/index.html')
f

## Compute the number of taxi pickups within walking distance of the bar

In [None]:
##############################################################################

puLon = yellow_pickup_2_to_4["pickup_longitude"]
puLat = yellow_pickup_2_to_4["pickup_latitude"]

lat2km = 110
lon2km = 84

# barHist = {}
barIndex = 0
array = []
for i,j in zip(man_bar_location["Latitude"],man_bar_location["Longitude"]):
    count = 0
    for k,l in zip(puLat,puLon):
        dlat = (i-k)*(i-k)
        dlon = (j-l)*(j-l)
        dist2 = dlat*lat2km + dlon*lon2km

        # 0.0001 km**2 = 100 m**2 
        if(dist2 <= 0.0001):
            count+=1
    array.append(count)
    barIndex += 1
    
man_bar_location["Taxi counts"] = pd.Series(data=array)

##############################################################################

labels = ["Social Bar, Grill & Lounge; 795 8th Ave, 10019",\
"Mamajuana Cafe; 247 Dyckman St, 10034",\
"Coyote Ugly; 153 1st Avenue, 10009",\
"Arthur's Tavern; 57 Grove St, 10014",\
"Niagara; 112 Avenue A, 10009"]
sns.set_style('white')
sns.set_context('talk')

fig, ax = plt.subplots() # create a new figure with a default 111 subplot
ax.set_title("Number of 2-4AM taxi pickups within walking distance " + \
             "of bar during the month of Jan 2016")
ax.set_ylabel('Bars (rank ordered)')
ax.set_xlabel('Number of taxi pickups (bigger is more popular)')
ax.set_yticks([])
ax.barh(range(len(man_bar_location)),sorted(man_bar_location['Taxi counts']))

from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, inset_axes
#axins = zoomed_inset_axes(ax, 30, loc="lower right",borderpad=3)
axins = inset_axes(ax, width=3, height=3, loc="lower right",borderpad=3)

from mpl_toolkits.axes_grid1.inset_locator import mark_inset
mark_inset(ax, axins, loc1=1, loc2=2, fc="none", ec="0.5")

x1, x2, y1, y2 = 2400,2820,507,513
axins.set_xlim(x1, x2)
axins.set_ylim(y1, y2)
axins.barh(range(len(man_bar_location)),\
           sorted(man_bar_location['Taxi counts']))
axins.set_aspect('auto')
axins.set_yticks(range(508,513))
axins.set_yticklabels(labels)
axins.set_xticks(range(2400,2820,100))
# axins.set_xticklabels(range(2400,2820,100))
# axins.set_aspect(100)


# plt.tight_layout()
plt.show()
# plt.savefig('Vis2/vis2.png')