In [1]:
import pandas as pd
import os
from keplergl import KeplerGl
from pyproj import CRS
import numpy as np
from matplotlib import pyplot as plt

In [2]:
df = pd.read_csv('largedata_weather_trips_2022.csv', usecols=['start_lat', 'start_lng','end_lat','end_lng','end_station_name', 'start_station_name'])

In [3]:
df.head()

Unnamed: 0,start_station_name,end_station_name,start_lat,start_lng,end_lat,end_lng
0,Berkeley Pl & 6 Ave,Windsor Pl & 8 Ave,40.67653,-73.978469,40.660906,-73.983074
1,21 St & 4 Ave,Degraw St & Smith St,40.662584,-73.995554,40.682915,-73.993182
2,48 St & Barnett Ave,Queens Plaza North & Crescent St,40.750016,-73.915409,40.751102,-73.940737
3,7 Ave & Central Park South,8 Ave & W 33 St,40.766741,-73.979069,40.751551,-73.993934
4,Jerome Ave & W 195 St,Sherman Ave & E 166 St,40.868858,-73.8965,40.831305,-73.917448


In [4]:
df['value']=1
df_group=df.groupby(['start_station_name' , 'end_station_name'])['value'].count().reset_index()

In [5]:
df_group.rename(columns={'value':'trips'}, inplace=True)

In [6]:
df_group.head()

Unnamed: 0,start_station_name,end_station_name,trips
0,1 Ave & E 110 St,1 Ave & E 110 St,1473
1,1 Ave & E 110 St,1 Ave & E 18 St,2
2,1 Ave & E 110 St,1 Ave & E 30 St,4
3,1 Ave & E 110 St,1 Ave & E 39 St,1
4,1 Ave & E 110 St,1 Ave & E 44 St,12


In [7]:
print(df_group ['trips'].sum())
print(df.shape)

30618148
(30689921, 7)


In [8]:
df_group.shape

(1013414, 3)

In [9]:
df_m = pd.merge(df, df_group, on = ['start_station_name', 'end_station_name'], how = 'inner')

In [10]:
df_m=df_m.drop(columns= ['value'])

In [13]:
df_m.head(2)

Unnamed: 0,start_station_name,end_station_name,start_lat,start_lng,end_lat,end_lng,trips
0,Berkeley Pl & 6 Ave,Windsor Pl & 8 Ave,40.67653,-73.978469,40.660906,-73.983074,23
1,21 St & 4 Ave,Degraw St & Smith St,40.662584,-73.995554,40.682915,-73.993182,32


In [14]:
df_m.rename(columns = {'start_lat':'Latitude_x', 'start_lng':'Longitude_x', 'end_lat':'Latitude_y', 
                       'end_lng': 'Longitude_y'})

Unnamed: 0,start_station_name,end_station_name,Latitude_x,Longitude_x,Latitude_y,Longitude_y,trips
0,Berkeley Pl & 6 Ave,Windsor Pl & 8 Ave,40.676530,-73.978469,40.660906,-73.983074,23
1,21 St & 4 Ave,Degraw St & Smith St,40.662584,-73.995554,40.682915,-73.993182,32
2,48 St & Barnett Ave,Queens Plaza North & Crescent St,40.750016,-73.915409,40.751102,-73.940737,17
3,7 Ave & Central Park South,8 Ave & W 33 St,40.766741,-73.979069,40.751551,-73.993934,173
4,Jerome Ave & W 195 St,Sherman Ave & E 166 St,40.868858,-73.896500,40.831305,-73.917448,7
...,...,...,...,...,...,...,...
30618143,E 41 St & Madison Ave (SW corner),6 Ave & W 33 St,40.752165,-73.979922,40.749013,-73.988484,647
30618144,Peck Slip & South St,Vesey St & Church St,40.707689,-74.001254,40.712220,-74.010472,114
30618145,E 41 St & Madison Ave (SW corner),Lenox Ave & W 111 St,40.752399,-73.980150,40.798786,-73.952300,12
30618146,W 37 St & 10 Ave,W 70 St & Amsterdam Ave,40.756604,-73.997901,40.777480,-73.982886,116


In [15]:
df_m.shape 

(30618148, 7)

In [16]:
df_m.head()

Unnamed: 0,start_station_name,end_station_name,start_lat,start_lng,end_lat,end_lng,trips
0,Berkeley Pl & 6 Ave,Windsor Pl & 8 Ave,40.67653,-73.978469,40.660906,-73.983074,23
1,21 St & 4 Ave,Degraw St & Smith St,40.662584,-73.995554,40.682915,-73.993182,32
2,48 St & Barnett Ave,Queens Plaza North & Crescent St,40.750016,-73.915409,40.751102,-73.940737,17
3,7 Ave & Central Park South,8 Ave & W 33 St,40.766741,-73.979069,40.751551,-73.993934,173
4,Jerome Ave & W 195 St,Sherman Ave & E 166 St,40.868858,-73.8965,40.831305,-73.917448,7


In [17]:
sample_df = df_m.sample(n=200000, random_state=1)

In [18]:
sample_df.isna().sum()

start_station_name    0
end_station_name      0
start_lat             0
start_lng             0
end_lat               0
end_lng               0
trips                 0
dtype: int64

In [19]:
import gc  # This imports the garbage collector module.
gc.collect()  # This triggers the garbage collector to free up memory.

1746

In [37]:
m = KeplerGl(height = 600, data={"New York Bike Rides": sample_df})

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


In [38]:
m

KeplerGl(data={'New York Bike Rides':                  start_station_name         end_station_name  start_lat …

##### The colours were changed to a combination of yellow and blue to complement each other. The stroke line size was adjusted to 2.

#### Some of the most common trips occur in popular areas that also attract tourists such as central park, the Meatpacking district, the east side, and neighbourhoods in Brooklyn. 


In [39]:
config = m.config

In [40]:
import json
with open("config.json", "w") as outfile:
    json.dump(config, outfile)

In [41]:
m.save_to_html(file_name = 'NewYorkBikeTrips.html', read_only = False, config = config)

Map saved to NewYorkBikeTrips.html!
