In [24]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objs as go
from plotly.offline import iplot
from keys import mapbox_key

import json


# Invetigating the rollout strategy
We look at how citibike expanded their stations , year by year

In [36]:
rollout_data = pd.read_csv('./../queries/get_rollout_cluster/rollout_clusters.csv')
px.scatter_mapbox(  rollout_data,
                    lat = 'latitude',
                    lon = 'longitude',
                    color = 'rollout_cluster',
                    mapbox_style = 'carto-positron',
                    color_continuous_scale=['pink','red','orange', 'yellow' 'green', 'blue','purple' ,'grey','black'],
                    zoom = 10,
                    center = dict(lat = 40.76421, lon = -73.95623)
                 )

## Rollout 1
CitiBike started with stations in downtow/midtown Manhattan, and Brooklyn Business District. 
We suspect this was because these areas have very high business density, very high population density, and/or were close to many public transit points.

From the Legally_operating_Businesses dataset ,from NYC OpenData, we will look at how many businesses there are per zipcode

In [4]:
business_location_data = pd.read_csv('./../data/nyc_data_misc/Legally_Operating_Businesses.csv')
zips = business_location_data['Address ZIP']

with open('./../data/nyc_data_misc/zip_codes.geojson', 'r') as j:
     zip_code_boundaries = json.loads(j.read())



Columns (13,19,20) have mixed types.Specify dtype option on import or set low_memory=False.



In [5]:
business_per_zip = zips.value_counts()
business_per_zip = business_per_zip.reset_index().rename(columns = {'index':'Address ZIP', 'Address ZIP' : 'count'})
str_int = [str(x) for x in range(10001,11698)]
business_per_zip = business_per_zip[business_per_zip['Address ZIP'].isin(str_int)]

In [28]:
fig = px.choropleth_mapbox( business_per_zip,
                            geojson=zip_code_boundaries,
                            locations = 'Address ZIP',
                            featureidkey="properties.postalCode",
                            color = 'count',
                            mapbox_style="carto-positron"
                          )
fig.show()

Hmm, this does not seem to be what we were looking for. It looks like most businesses are located on Staten Island.
Wht is probably the case is many faily owned busnesses with very few employees are probably registered on Staten Island. So going by pure company count does not show insightinto where to start citi-bike.

### Perhaps we look at some different data

In [14]:
try1 = go.Figure(go.Choroplethmapbox(geojson=zip_code_boundaries, 
                                     featureidkey='properties.postalCode',
                                       locations=business_per_zip['Address ZIP'], 
                                       z=business_per_zip['count'],
                                       colorscale="Viridis",
                                       marker_opacity=0.5, 
                                       marker_line_width=0))
try1.update_layout(mapbox_style="carto-positron", 
                  mapbox_zoom=9, mapbox_center = dict(lat = 40.76421, lon = -73.95623))
try1.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
try1.show()

## Next we will look at the tranist hubs downtown
From the bus stops location data, we will look at the lat long pocation of the stops

In [15]:
bus_stops = pd.read_csv('./../data/nyc_data_misc/bus_stop_locations.csv')

In [17]:
bus_stops = bus_stops[['LATITUDE','LONGITUDE']]

In [30]:
px.scatter_mapbox(  bus_stops,
                    lat = 'LATITUDE',
                    lon = 'LONGITUDE',
                    mapbox_style = 'carto-positron',
                    zoom = 10,
                    center = dict(lat = 40.76421, lon = -73.95623)
                 )

In [75]:
overlayed = go.Figure()

In [76]:
data1 = go.Densitymapbox(
            lat = bus_stops['LATITUDE'],
            lon = bus_stops['LONGITUDE'],
            radius=5
#             mode = 'markers',
#             marker= dict(
#                 size = 8,
#                 color = 'black',
#                 opacity = .8
#             )
        )

In [77]:
first_cluster = rollout_data[rollout_data['rollout_cluster']== 1]
data2 = go.Densitymapbox(
            lat = first_cluster['latitude'],
            lon = first_cluster['longitude'],
            radius=5
#             mode = 'markers',
#             marker= dict(
#                 size = 8,
#                 color = 'blue',
#                 opacity = .8
#             )
        )


In [78]:
overlayed.add_trace(data1)
overlayed.add_trace(data2)
overlayed.update_layout(mapbox_style="carto-positron", 
                  mapbox_zoom=9, mapbox_center = dict(lat = 40.76421, lon = -73.95623))
overlayed.update_layout(margin={"r":0,"t":0,"l":0,"b":0})