# Importing needed Lib

In [1]:
import pandas as pd
import os
from keplergl import KeplerGl
from pyproj import CRS
import numpy as np
from matplotlib import pyplot as plt


In [2]:
df = pd.read_csv('chicago_data.csv', index_col = 0)

In [3]:
# Group by 'from_station_name' and 'to_station_name'
grouped_df = df.groupby(['from_station_name', 'to_station_name']).size().reset_index(name='trip_count')

In [4]:
grouped_df

Unnamed: 0,from_station_name,to_station_name,trip_count
0,2112 W Peterson Ave,2112 W Peterson Ave,14
1,2112 W Peterson Ave,Ashland Ave & Belle Plaine Ave,1
2,2112 W Peterson Ave,Avondale Ave & Irving Park Rd,1
3,2112 W Peterson Ave,Benson Ave & Church St,2
4,2112 W Peterson Ave,Broadway & Argyle St,2
...,...,...,...
113674,Yates Blvd & 75th St,South Shore Dr & 74th St,2
113675,Yates Blvd & 75th St,Stony Island Ave & 71st St,2
113676,Yates Blvd & 75th St,Stony Island Ave & 75th St,3
113677,Yates Blvd & 75th St,Woodlawn Ave & 55th St,2


In [5]:
print(grouped_df['trip_count'].sum())
print(df.shape)

3603082
(3603082, 14)


In [6]:
# Rename the columns
grouped_df = grouped_df.rename(columns={
    'from_station_name': 'start_station_name',
    'to_station_name': 'end_station_name',
    'trip_count': 'trips'})

In [7]:
grouped_df

Unnamed: 0,start_station_name,end_station_name,trips
0,2112 W Peterson Ave,2112 W Peterson Ave,14
1,2112 W Peterson Ave,Ashland Ave & Belle Plaine Ave,1
2,2112 W Peterson Ave,Avondale Ave & Irving Park Rd,1
3,2112 W Peterson Ave,Benson Ave & Church St,2
4,2112 W Peterson Ave,Broadway & Argyle St,2
...,...,...,...
113674,Yates Blvd & 75th St,South Shore Dr & 74th St,2
113675,Yates Blvd & 75th St,Stony Island Ave & 71st St,2
113676,Yates Blvd & 75th St,Stony Island Ave & 75th St,3
113677,Yates Blvd & 75th St,Woodlawn Ave & 55th St,2


In [8]:
# Create a new column 'all_station_id' by concatenating 'from_station_id' and 'to_station_id'
df['all_station_id'] = df['from_station_id'].astype(str) + ', ' + df['to_station_id'].astype(str)

# Create a new column 'all_station_name' by concatenating 'from_station_name' and 'to_station_name'
df['all_station_name'] = df['from_station_name'] + ', ' + df['to_station_name']

# Combine the two station columns and drop duplicates for IDs and names
df_combined = pd.DataFrame({
    'station_id': pd.concat([df['from_station_id'], df['to_station_id']]).drop_duplicates(),
    'station_name': pd.concat([df['from_station_name'], df['to_station_name']]).drop_duplicates()
})

In [9]:
df_combined

Unnamed: 0,station_id,station_name
0,69.0,Damen Ave & Pierce Ave
1,253.0,Winthrop Ave & Lawrence Ave
2,98.0,LaSalle St & Washington St
3,125.0,Rush St & Hubbard St
4,129.0,Blue Island Ave & 18th St
...,...,...
3401823,632.0,Clark St & Newport St
3444409,651.0,Michigan Ave & 71st St
3487700,647.0,Elizabeth St & 59th St
3538937,664.0,Leavitt St & Belmont Ave (*)


In [10]:
df_combined. dtypes

station_id      float64
station_name     object
dtype: object

In [11]:
# Fill NaN values with 0 (or any other value) and convert 'station_id' to integer
df_combined['station_id'] = df_combined['station_id'].fillna(0).astype(int)

In [12]:
df_combined.head()

Unnamed: 0,station_id,station_name
0,69,Damen Ave & Pierce Ave
1,253,Winthrop Ave & Lawrence Ave
2,98,LaSalle St & Washington St
3,125,Rush St & Hubbard St
4,129,Blue Island Ave & 18th St


In [13]:
df_combined. dtypes

station_id       int32
station_name    object
dtype: object

#### Import location data - only for Chicago

In [14]:
# Load location data

df_stations_coordinates = pd.read_csv("all_bike_stations_coordinates.csv")

In [15]:
df_stations_coordinates

Unnamed: 0,station_id,lat,lon
0,2,41.8765,-87.6205
1,3,41.8672,-87.6154
2,4,41.8563,-87.6133
3,5,41.8741,-87.6277
4,6,41.8870,-87.6128
...,...,...,...
958,20253,41.9400,-87.6700
959,20254,41.9200,-87.6500
960,20256,41.9400,-87.6500
961,20257,41.9400,-87.6600


In [16]:
# Rename the columns
df_stations_coordinates = df_stations_coordinates.rename(columns={
    'lat': 'latitude',
    'lon': 'longitude'})

In [17]:
df_stations_coordinates

Unnamed: 0,station_id,latitude,longitude
0,2,41.8765,-87.6205
1,3,41.8672,-87.6154
2,4,41.8563,-87.6133
3,5,41.8741,-87.6277
4,6,41.8870,-87.6128
...,...,...,...
958,20253,41.9400,-87.6700
959,20254,41.9200,-87.6500
960,20256,41.9400,-87.6500
961,20257,41.9400,-87.6600


In [18]:
df_stations_coordinates. dtypes

station_id      int64
latitude      float64
longitude     float64
dtype: object

In [19]:
# Convert 'station_id' in df_combined to match the dtype of df_stations ('int64')
df_combined['station_id'] = df_combined['station_id'].astype('int64')

In [20]:
# Now merge the dataframes on the 'station_id' column
df_stations = pd.merge(df_combined, df_stations_coordinates, on='station_id', how='inner')  # Use 'how'

In [21]:
df_stations

Unnamed: 0,station_id,station_name,latitude,longitude
0,69,Damen Ave & Pierce Ave,41.9094,-87.6777
1,253,Winthrop Ave & Lawrence Ave,41.9688,-87.6577
2,98,LaSalle St & Washington St,41.8827,-87.6325
3,125,Rush St & Hubbard St,41.8902,-87.6262
4,129,Blue Island Ave & 18th St,41.8576,-87.6615
...,...,...,...,...
599,632,Clark St & Newport St,41.9445,-87.6547
600,651,Michigan Ave & 71st St,41.7653,-87.6217
601,647,Elizabeth St & 59th St,41.7867,-87.6559
602,664,Leavitt St & Belmont Ave (*),41.9394,-87.6833


In [22]:
# Merge on 'start_station_name' with 'station_name' to get start station details (latitude, longitude)
df_merged_start = pd.merge(grouped_df, df_stations, left_on='start_station_name', right_on='station_name', how='left')

In [23]:
# Rename the columns from df_stations after the merge to specify they are related to the start station
df_merged_start.rename(columns={'latitude': 'start_latitude', 'longitude': 'start_longitude', 'station_name': 'start_station_name'}, inplace=True)

In [24]:
# Merge again on 'end_station_name' with 'station_name' to get end station details (latitude, longitude)
df_final = pd.merge(df_merged_start, df_stations, left_on='end_station_name', right_on='station_name', how='left')

In [25]:
# Rename the columns from df_stations after the second merge to specify they are related to the end station
df_final.rename(columns={'latitude': 'end_latitude', 'longitude': 'end_longitude', 'station_name': 'end_station_name'}, inplace=True)


In [27]:
df_final.head()

Unnamed: 0,start_station_name,end_station_name,trips,station_id_x,start_station_name.1,start_latitude,start_longitude,station_id_y,end_station_name.1,end_latitude,end_longitude
0,2112 W Peterson Ave,2112 W Peterson Ave,14,456.0,2112 W Peterson Ave,41.9912,-87.6836,456.0,2112 W Peterson Ave,41.9912,-87.6836
1,2112 W Peterson Ave,Ashland Ave & Belle Plaine Ave,1,456.0,2112 W Peterson Ave,41.9912,-87.6836,246.0,Ashland Ave & Belle Plaine Ave,41.9561,-87.6688
2,2112 W Peterson Ave,Avondale Ave & Irving Park Rd,1,456.0,2112 W Peterson Ave,41.9912,-87.6836,483.0,Avondale Ave & Irving Park Rd,41.9534,-87.732
3,2112 W Peterson Ave,Benson Ave & Church St,2,456.0,2112 W Peterson Ave,41.9912,-87.6836,596.0,Benson Ave & Church St,42.0482,-87.6835
4,2112 W Peterson Ave,Broadway & Argyle St,2,456.0,2112 W Peterson Ave,41.9912,-87.6836,295.0,Broadway & Argyle St,41.9738,-87.6597


In [28]:
df_final.to_csv('df_final_locations_for_map.csv')

In [29]:
# Create KeplerGl instance
m = KeplerGl(height = 700, data={"data_1": df_final})

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


  return df.to_dict('split')
Out of range float values are not JSON compliant: nan
Supporting this message is deprecated in jupyter-client 7, please make sure your message is JSON-compliant
  content = self.pack(content)


In [30]:
config = m.config

In [31]:
config

{}

In [32]:
# Initialize a Kepler.gl map
map_1 = KeplerGl(height=600)

# Add the DataFrame to the map
map_1.add_data(data=df_final, name="Trips Data")

# Display the map
map_1.save_to_html(file_name="kepler_map.html")
map_1

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


  return df.to_dict('split')
Out of range float values are not JSON compliant: nan
Supporting this message is deprecated in jupyter-client 7, please make sure your message is JSON-compliant
  content = self.pack(content)


Map saved to kepler_map.html!


IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



KeplerGl(data={'Trips Data': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, …

### settings you changed and why
- Arc Layer
The Arc Layer is Perfect for representing connections between two locations (e.g., shipping routes, travel patterns). 
The arcs can visually display flow or connectivity.

- Categorical Color
The categorical color schemes. For representing quantities/number of trips, a sequential color scale (light to dark) helps show magnitude clearly.

### particularly busy zone 

The city following have a very higher trips turnover compare to city parks 

- wicker park
- West Town
- Ukrainian Village

