# Columbia Citibike Data Analysis

This notebook loads the filtered Citibike data for Columbia University area stations.

In [50]:
import pandas as pd
import os

## Load Filtered Data

Load the pre-filtered dataset containing only trips involving Columbia University area stations.

In [51]:
# Load the filtered data
data_path = os.path.join('..', 'data', 'columbia_filtered_citibike.csv')
df = pd.read_csv(data_path, parse_dates=['started_at', 'ended_at'])

print(f"Loaded DataFrame shape: {df.shape}")
print(f"Total rows: {df.shape[0]:,}")
print(f"Total columns: {df.shape[1]}")

Loaded DataFrame shape: (529908, 13)
Total rows: 529,908
Total columns: 13



Columns (5,7) have mixed types. Specify dtype option on import or set low_memory=False.



## Data Overview

In [52]:
# Display basic information
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 529908 entries, 0 to 529907
Data columns (total 13 columns):
 #   Column              Non-Null Count   Dtype         
---  ------              --------------   -----         
 0   ride_id             529908 non-null  object        
 1   rideable_type       529908 non-null  object        
 2   started_at          529908 non-null  datetime64[ns]
 3   ended_at            529908 non-null  datetime64[ns]
 4   start_station_name  529792 non-null  object        
 5   start_station_id    529792 non-null  object        
 6   end_station_name    529146 non-null  object        
 7   end_station_id      529095 non-null  object        
 8   start_lat           529792 non-null  float64       
 9   start_lng           529792 non-null  float64       
 10  end_lat             529096 non-null  float64       
 11  end_lng             529096 non-null  float64       
 12  member_casual       529908 non-null  object        
dtypes: datetime64[ns](2), float64

In [53]:
# Display first few rows
df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual
0,DF041079291BCB57,classic_bike,2024-01-01 00:05:39.030,2024-01-01 00:29:03.147,W 116 St & Amsterdam Ave,7692.11,W 116 St & Amsterdam Ave,7692.11,40.806758,-73.960708,40.806758,-73.960708,casual
1,ECC36795CBE519C0,electric_bike,2024-01-01 00:12:53.593,2024-01-01 00:44:46.877,W 45 St & 8 Ave,6676.02,W 113 St & Broadway,7713.01,40.759291,-73.988597,40.805973,-73.964928,casual
2,09AEBF4510BFBE52,electric_bike,2024-01-01 00:13:21.695,2024-01-01 00:45:09.962,W 45 St & 8 Ave,6676.02,W 113 St & Broadway,7713.01,40.759291,-73.988597,40.805973,-73.964928,casual
3,79C06624CD5FDD91,electric_bike,2024-01-01 00:13:27.263,2024-01-01 00:44:31.619,W 45 St & 8 Ave,6676.02,W 113 St & Broadway,7713.01,40.759291,-73.988597,40.805973,-73.964928,casual
4,E4C6AAB102A0EFD9,electric_bike,2024-01-01 00:13:30.398,2024-01-01 00:44:39.129,W 45 St & 8 Ave,6676.02,W 113 St & Broadway,7713.01,40.759291,-73.988597,40.805973,-73.964928,casual


In [54]:
# Display last few rows
df.tail()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual
529903,5A0849C2B6727F27,electric_bike,2025-10-31 23:34:54.881,2025-10-31 23:48:01.874,W 92 St & Broadway,7502.01,W 116 St & Broadway,7713.11,40.7921,-73.9739,40.8082,-73.9641,casual
529904,AF2930038B1F3AC2,classic_bike,2025-10-31 23:41:38.825,2025-10-31 23:44:58.094,W 113 St & Broadway,7713.01,W 110 St & Amsterdam Ave,7646.04,40.805973,-73.964928,40.802692,-73.96295,member
529905,71A90135DC79B8A4,electric_bike,2025-10-31 23:41:56.765,2025-10-31 23:47:49.764,W 92 St & Broadway,7502.01,W 113 St & Broadway,7713.01,40.7921,-73.9739,40.805973,-73.964928,casual
529906,108438BF3F88CBF8,electric_bike,2025-10-31 23:47:54.310,2025-10-31 23:51:04.795,Amsterdam Ave & W 119 St,7727.07,Cathedral Pkwy & Broadway,7680.03,40.808625,-73.959621,40.804213,-73.966991,member
529907,BA199318AB4AD1BA,electric_bike,2025-10-31 23:51:14.035,2025-10-31 23:57:06.333,Broadway & W 142 St,7981.16,W 116 St & Broadway,7713.11,40.824686,-73.951947,40.8082,-73.9641,member


In [55]:
# Basic statistics
df.describe()

Unnamed: 0,started_at,ended_at,start_lat,start_lng,end_lat,end_lng
count,529908,529908,529792.0,529792.0,529096.0,529096.0
mean,2024-12-30 00:04:54.543382016,2024-12-30 00:18:58.433293312,40.8016,-73.963025,40.799992,-73.963692
min,2024-01-01 00:05:39.030000,2024-01-01 00:26:03.535000,40.635679,-74.021506,40.638246,-74.043845
25%,2024-07-26 09:19:21.963500032,2024-07-26 09:33:55.308499968,40.799757,-73.964928,40.796879,-73.964928
50%,2024-11-30 07:46:02.148499968,2024-11-30 07:59:35.353499904,40.806758,-73.9634,40.806758,-73.9634
75%,2025-06-25 15:11:36.047749888,2025-06-25 15:26:54.370749952,40.810285,-73.959621,40.810285,-73.959621
max,2025-10-31 23:51:14.035000,2025-10-31 23:57:06.333000,40.8863,-73.84672,40.8863,-73.84887
std,,,0.018286,0.011517,0.020131,0.012613


---

## Example Visualization

Map of Columbia University area stations included in the filtered dataset.

In [56]:
import plotly.express as px

In [57]:
# Extract unique stations from start and end locations
start_stations = df[['start_station_id', 'start_station_name', 'start_lat', 'start_lng']].dropna()
start_stations.columns = ['station_id', 'station_name', 'lat', 'lng']

end_stations = df[['end_station_id', 'end_station_name', 'end_lat', 'end_lng']].dropna()
end_stations.columns = ['station_id', 'station_name', 'lat', 'lng']

# Combine and get unique stations
all_stations = pd.concat([start_stations, end_stations]).drop_duplicates(subset=['station_id'])

# Filter for Columbia stations only
columbia_station_ids = ["7783.18", "7741.04", "7745.07", "7727.07", "7713.11", "7692.11", "7713.01"]
columbia_stations = all_stations[all_stations['station_id'].isin(columbia_station_ids)]

print(f"Found {len(columbia_stations)} Columbia University area stations:")
print(columbia_stations[['station_id', 'station_name']].to_string(index=False))

Found 7 Columbia University area stations:
station_id                   station_name
   7692.11       W 116 St & Amsterdam Ave
   7713.01            W 113 St & Broadway
   7713.11            W 116 St & Broadway
   7727.07       Amsterdam Ave & W 119 St
   7783.18            Broadway & W 122 St
   7741.04 Morningside Dr & Amsterdam Ave
   7745.07       W 120 St & Claremont Ave


In [58]:
# Create interactive map with Plotly
fig = px.scatter_map(
	columbia_stations,
	lat='lat',
	lon='lng',
	hover_name='station_name',
	hover_data={'station_id': True, 'lat': ':.6f', 'lng': ':.6f'},
	zoom=14,
	height=600,
	title='Columbia University Area Citibike Stations'
)

# Use OpenStreetMap tiles and center on Columbia
fig.update_layout(
	map_style='open-street-map',
	map_center={'lat': 40.807, 'lon': -73.962}
)

# Update marker appearance
fig.update_traces(marker=dict(size=12, color='blue'))

fig.show()

---

## Data Analysis

Add your analysis below.

In [59]:
# Your analysis code here