This notebook will grab the data at a specific time range and plot on a map

In [10]:
# imports

import numpy as np
import pandas as pd
import geopandas
# from geopandas import GeoDataFrame

import sys
sys.path.append("C:\\Users\\beezy\\git\\bluebikes_system_analysis\\code")
sys.path.append("C:\\Users\\beezy\\git\\bluebikes_system_analysis\\code\\data_handling")

# for loading station information
from data_handling.data_loading_methods import load_station_info

# for loading a json and saving to a csv
from data_handling.loadjson_savetocsv import load_jsons_savetocsv

In [11]:
# load data, perform housekeeping, calculate capacity

# read from csv
filetoload = 'C:\\Users\\beezy\\git\\bluebikes_system_analysis\\data\\station data 220715 jsons\\log_220715.csv' 
station_df = pd.read_csv(filetoload, index_col=0) # read csv into df

# drop any duplicates
station_df = station_df.drop_duplicates()

# delete rows with unreal values
station_df = station_df.replace('', np.nan) # this replaces empty strings with NaN quickly
[rows2drop, cols2drop] = np.where(pd.isnull(station_df)) # this uses numpy to find NaN values
station_df = station_df.drop(labels=station_df.index[rows2drop], axis=0)

# extract hour and minute from timestamps
# convert time to datetime
station_df['last_reported'] = pd.to_datetime( station_df['last_reported'], unit='s' )

# add hour, minute column
station_df['hour'] = station_df['last_reported'].dt.hour
station_df['minute'] = station_df['last_reported'].dt.minute

# remove timestamp column, since explore() doesn't like it
station_df = station_df.drop('last_reported', axis=1)

# rename capacity to total_capacity
station_df = station_df.rename(columns={'capacity': 'total_capacity'})

# calculate true capacity
station_df['true_capacity'] = station_df['num_bikes_available']/station_df['total_capacity']

station_df

Unnamed: 0,station_status,num_bikes_available,is_renting,num_docks_disabled,is_installed,is_returning,num_docks_available,num_bikes_disabled,station_id,lon,region_id,lat,name,total_capacity,hour,minute,true_capacity
0,active,2,1,0,1,1,13,0,3,-71.100619,10.0,42.340115,Colleges of the Fenway - Fenway at Avenue Loui...,15,3,58,0.133333
1,active,6,1,0,1,1,12,1,4,-71.069616,10.0,42.345392,Tremont St at E Berkeley St,19,3,42,0.315789
2,active,12,1,0,1,1,3,0,5,-71.090179,10.0,42.341814,Northeastern University - North Parking Lot,15,3,58,0.800000
3,active,6,1,0,1,1,8,1,6,-71.065287,10.0,42.361257,Cambridge St at Joy St,15,3,50,0.400000
4,active,0,1,0,1,1,15,0,7,-71.044571,10.0,42.353391,Fan Pier,15,1,40,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124653,active,2,1,1,1,1,15,1,514,-71.094644,9.0,42.386736,Gilman Square T at Medford St,19,3,56,0.105263
124654,active,10,1,1,1,1,8,0,515,-71.109988,8.0,42.368952,955 Mass Ave,19,3,53,0.526316
124690,active,21,1,0,1,1,1,1,553,-71.076198,8.0,42.371141,Cambridge Crossing at North First Street,23,3,54,0.913043
124695,active,2,1,0,1,1,16,0,558,-71.101246,10.0,42.333293,St. Alphonsus St at Tremont St,18,3,55,0.111111


In [35]:
# keep entries in desired timeframe
selected_hour     = 23
selected_min_low  = 0
selected_min_high = 59
station_df_crop = station_df.loc[ station_df['hour'] == selected_hour ]
station_df_crop = station_df_crop.loc[ ( station_df_crop['minute'] >= selected_min_low ) & 
                             ( station_df_crop['minute'] <= selected_min_high ) ]
# station_df_crop

# convert to a geodataframe
station_gdf = geopandas.GeoDataFrame(
    station_df_crop, geometry=geopandas.points_from_xy(station_df_crop['lon'], station_df_crop['lat']), crs='EPSG:4326')

# station_gdf

In [53]:
# plot on explore map
station_gdf.explore(column='true_capacity', 
                    marker_kwds=dict(radius=10, fill=True),
                    cmap='RdYlGn', 
                    vmin=0, vmax=1,
                   location=[42.351, -71.0898],
                   zoom_start=13)

In [47]:
# get some attributes
# stationmap.location

[42.395136699999995, -71.0534830391407]