# Step 3: make a map

## Import dependencies

Code dependencies.

In [35]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Markdown


Import the trip data file.

In [36]:
# Load the trip data file
df = pd.read_csv(Path("data/hub-pair-stats.csv").resolve())

df.sample(5)

Unnamed: 0,Hub Pair,Total Count,Hub 1 Geolocation,Hub 2 Geolocation,Hub 1 Start Count,Hub 2 Start Count,Single Hub
60,"('Market Square', 'Spring St, Cofield')",2,"(41.1609922, -73.8627212)","(41.1570899, -73.8640002)",1,1,False
59,"('Spring & Waller', 'Spring St, Cofield')",2,"(41.1593892, -73.8638232)","(41.1570899, -73.8640002)",1,1,False
27,"('Metro North - Plaza', 'Spring St, Cofield')",12,"(41.157844, -73.868112)","(41.1570899, -73.8640002)",6,6,False
40,"('Cronton Ave & Pleasantville Road', 'Municipa...",6,"(41.1669377, -73.8497735)","(41.1618223, -73.8629757)",0,6,False
53,"('C Town Markets', 'Spring & Waller')",4,"(41.1653387, -73.856297)","(41.1593892, -73.8638232)",1,3,False


In [37]:
df.sort_values(by="Total Count", ascending=False, inplace=True)
df.head(10)

Unnamed: 0,Hub Pair,Total Count,Hub 1 Geolocation,Hub 2 Geolocation,Hub 1 Start Count,Hub 2 Start Count,Single Hub
0,"('Nelson park', 'Nelson park')",159,"(41.1521048, -73.8623897)","(41.1521048, -73.8623897)",159,159,True
1,"('Arcadian Shopping Center', 'Municipal Parkin...",132,"(41.1394616, -73.861803)","(41.1618223, -73.8629757)",46,86,False
2,"('Metro North - Plaza', 'Nelson park')",92,"(41.157844, -73.868112)","(41.1521048, -73.8623897)",34,58,False
3,"('Wishnie Park', 'Wishnie Park')",86,"(41.1607067, -73.8477673)","(41.1607067, -73.8477673)",86,86,True
4,"('Metro North - Plaza', 'Metro North - Plaza')",78,"(41.157844, -73.868112)","(41.157844, -73.868112)",78,78,True
5,"('Spring St, Cofield', 'Spring St, Cofield')",59,"(41.1570899, -73.8640002)","(41.1570899, -73.8640002)",59,59,True
6,"('Cronton Ave & Pleasantville Road', 'Metro No...",51,"(41.1669377, -73.8497735)","(41.157844, -73.868112)",15,36,False
7,"('C Town Markets', 'C Town Markets')",48,"(41.1653387, -73.856297)","(41.1653387, -73.856297)",48,48,True
8,"('Municipal Parking Lot Lot# 7', 'Municipal Pa...",47,"(41.1618223, -73.8629757)","(41.1618223, -73.8629757)",47,47,True
9,"('Cronton Ave & Pleasantville Road', 'Cronton ...",46,"(41.1669377, -73.8497735)","(41.1669377, -73.8497735)",46,46,True


Import the hub addresses

In [38]:
addresses_df = pd.read_csv(Path("data/hub-locations.csv").resolve())
addresses_df = addresses_df.drop(columns=["OpenStreetMap Data"]) # drop the column with OSM data... we don't need it here.

addresses_df.sample(5)

Unnamed: 0,Hub,Address,Geolocation
7,Ossining Public Library,"53 Croton Ave, Ossining, NY 10562","(41.1642872, -73.8604165)"
3,Market Square,"160 Main St, Ossining, NY 10562","(41.1609922, -73.8627212)"
5,Municipal Parking Lot Lot# 7,"1-15 Leonard St, Ossining, NY 10562","(41.1618223, -73.8629757)"
10,Wishnie Park,"145-149 Orchard Rd, Briarcliff Manor, NY 10510","(41.1607067, -73.8477673)"
1,C Town Markets,"100 Croton Ave, Ossining, NY 10562","(41.1653387, -73.856297)"


In [39]:
# Add a 'Start Count' field to addresses_df
addresses_df['Start Count'] = addresses_df['Hub'].apply(
    lambda hub: df[df['Hub Pair'].str.contains(f"'{hub}'") & df['Hub Pair'].str.startswith(f"('{hub}'")]['Hub 1 Start Count'].sum()
)

addresses_df

Unnamed: 0,Hub,Address,Geolocation,Start Count
0,Arcadian Shopping Center,"225-207 Albany Post Rd, Briarcliff Manor, NY 1...","(41.1394616, -73.861803)",166
1,C Town Markets,"100 Croton Ave, Ossining, NY 10562","(41.1653387, -73.856297)",95
2,Cronton Ave & Pleasantville Road,"Croton Ave & Pleasantville Rd, Ossining, NY 10562","(41.1669377, -73.8497735)",93
3,Market Square,"160 Main St, Ossining, NY 10562","(41.1609922, -73.8627212)",50
4,Metro North - Plaza,"1 Secor Rd, Ossining, NY 10562","(41.157844, -73.868112)",150
5,Municipal Parking Lot Lot# 7,"1-15 Leonard St, Ossining, NY 10562","(41.1618223, -73.8629757)",66
6,Nelson park,"20 Madison Ave, Ossining, NY 10562","(41.1521048, -73.8623897)",166
7,Ossining Public Library,"53 Croton Ave, Ossining, NY 10562","(41.1642872, -73.8604165)",51
8,Spring & Waller,"Spring St & Waller Ave, Ossining, NY 10562","(41.1593892, -73.8638232)",32
9,"Spring St, Cofield","Spring St & Broad Ave, Ossining, NY 10562","(41.1570899, -73.8640002)",59


In [40]:
# Add a 'Start Count' field to addresses_df
addresses_df['End Count'] = addresses_df['Hub'].apply(
    lambda hub: df[df['Hub Pair'].str.contains(f", '{hub}'") & df['Hub Pair'].str.endswith(f"'{hub}')")]['Hub 2 Start Count'].sum()
)

addresses_df

Unnamed: 0,Hub,Address,Geolocation,Start Count,End Count
0,Arcadian Shopping Center,"225-207 Albany Post Rd, Briarcliff Manor, NY 1...","(41.1394616, -73.861803)",166,40
1,C Town Markets,"100 Croton Ave, Ossining, NY 10562","(41.1653387, -73.856297)",95,54
2,Cronton Ave & Pleasantville Road,"Croton Ave & Pleasantville Rd, Ossining, NY 10562","(41.1669377, -73.8497735)",93,50
3,Market Square,"160 Main St, Ossining, NY 10562","(41.1609922, -73.8627212)",50,72
4,Metro North - Plaza,"1 Secor Rd, Ossining, NY 10562","(41.157844, -73.868112)",150,129
5,Municipal Parking Lot Lot# 7,"1-15 Leonard St, Ossining, NY 10562","(41.1618223, -73.8629757)",66,150
6,Nelson park,"20 Madison Ave, Ossining, NY 10562","(41.1521048, -73.8623897)",166,257
7,Ossining Public Library,"53 Croton Ave, Ossining, NY 10562","(41.1642872, -73.8604165)",51,84
8,Spring & Waller,"Spring St & Waller Ave, Ossining, NY 10562","(41.1593892, -73.8638232)",32,65
9,"Spring St, Cofield","Spring St & Broad Ave, Ossining, NY 10562","(41.1570899, -73.8640002)",59,91


In [41]:
addresses_df['Total Count'] = addresses_df['Start Count'] + addresses_df['End Count']
addresses_df.sort_values(by='Total Count', ascending=False, inplace=True)
addresses_df

Unnamed: 0,Hub,Address,Geolocation,Start Count,End Count,Total Count
6,Nelson park,"20 Madison Ave, Ossining, NY 10562","(41.1521048, -73.8623897)",166,257,423
4,Metro North - Plaza,"1 Secor Rd, Ossining, NY 10562","(41.157844, -73.868112)",150,129,279
5,Municipal Parking Lot Lot# 7,"1-15 Leonard St, Ossining, NY 10562","(41.1618223, -73.8629757)",66,150,216
0,Arcadian Shopping Center,"225-207 Albany Post Rd, Briarcliff Manor, NY 1...","(41.1394616, -73.861803)",166,40,206
10,Wishnie Park,"145-149 Orchard Rd, Briarcliff Manor, NY 10510","(41.1607067, -73.8477673)",86,106,192
9,"Spring St, Cofield","Spring St & Broad Ave, Ossining, NY 10562","(41.1570899, -73.8640002)",59,91,150
1,C Town Markets,"100 Croton Ave, Ossining, NY 10562","(41.1653387, -73.856297)",95,54,149
2,Cronton Ave & Pleasantville Road,"Croton Ave & Pleasantville Rd, Ossining, NY 10562","(41.1669377, -73.8497735)",93,50,143
7,Ossining Public Library,"53 Croton Ave, Ossining, NY 10562","(41.1642872, -73.8604165)",51,84,135
3,Market Square,"160 Main St, Ossining, NY 10562","(41.1609922, -73.8627212)",50,72,122


In [42]:
addresses_df['Single Hub Count'] = addresses_df['Hub'].apply(
    lambda hub: df[(df['Hub Pair'] == f"('{hub}', '{hub}')")]['Total Count'].sum()
)

addresses_df

Unnamed: 0,Hub,Address,Geolocation,Start Count,End Count,Total Count,Single Hub Count
6,Nelson park,"20 Madison Ave, Ossining, NY 10562","(41.1521048, -73.8623897)",166,257,423,159
4,Metro North - Plaza,"1 Secor Rd, Ossining, NY 10562","(41.157844, -73.868112)",150,129,279,78
5,Municipal Parking Lot Lot# 7,"1-15 Leonard St, Ossining, NY 10562","(41.1618223, -73.8629757)",66,150,216,47
0,Arcadian Shopping Center,"225-207 Albany Post Rd, Briarcliff Manor, NY 1...","(41.1394616, -73.861803)",166,40,206,40
10,Wishnie Park,"145-149 Orchard Rd, Briarcliff Manor, NY 10510","(41.1607067, -73.8477673)",86,106,192,86
9,"Spring St, Cofield","Spring St & Broad Ave, Ossining, NY 10562","(41.1570899, -73.8640002)",59,91,150,59
1,C Town Markets,"100 Croton Ave, Ossining, NY 10562","(41.1653387, -73.856297)",95,54,149,48
2,Cronton Ave & Pleasantville Road,"Croton Ave & Pleasantville Rd, Ossining, NY 10562","(41.1669377, -73.8497735)",93,50,143,46
7,Ossining Public Library,"53 Croton Ave, Ossining, NY 10562","(41.1642872, -73.8604165)",51,84,135,45
3,Market Square,"160 Main St, Ossining, NY 10562","(41.1609922, -73.8627212)",50,72,122,38


## Map
Create a simple map visualizing each hub pair.

Calculate the min and max number of trips in the grouped data.

In [43]:
# min_count = grouped_df['Count'].min()
# max_count = grouped_df['Count'].max()

min_count = df['Total Count'].min()
max_count = df['Total Count'].max()

min_count, max_count

(np.int64(1), np.int64(159))

Do some mappping.

In [55]:
import seaborn as sns

# Generate a list of colors with a lot of variation between them
num_colors = len(df) * 10
colors = sns.color_palette("tab20", num_colors).as_hex()

colors

In [None]:
import folium

# Create a map centered on Ossining, NY
map = folium.Map(location=[41.162, -73.861], zoom_start=14, tiles='CartoDB dark_matter')

# Add circles for each unique hub
for _, row in addresses_df.iterrows():
    hub_coords = eval(row['Geolocation'])
    hub_name = row['Hub']
    # Count the number of rows with this location as 'Start Hub Geolocation'
    start_hub_count = row['Start Count']
    end_hub_count = row['End Count']
    same_hub_count = row['Single Hub Count']
    total_hub_count = row['Total Count']
    percent_same_hub = round((same_hub_count / total_hub_count) * 100)
    hub_popup = f"<h4>{hub_name}</h4><p><strong>{start_hub_count}</strong> trips started here</p><p><strong>{end_hub_count}</strong> trips ended here</p><p><strong>{percent_same_hub}%</strong> ({same_hub_count}) of these both started and ended here</p>"
    folium.CircleMarker(
        location=hub_coords,
        radius=5 + start_hub_count / 10,  # Base radius is 5, scaled by the count
        color='gray',
        fill=True,
        fill_color='black',
        fill_opacity=0.7,
        popup=folium.Popup(hub_popup, max_width=300, parse_html=False)
    ).add_to(map)


# Add lines for each pair of hubs
for _, row in df.iterrows():
    if pd.notnull(row['Hub Pair']):
        start_coords = eval(row['Hub 1 Geolocation'])
        end_coords = eval(row['Hub 2 Geolocation'])
        # start_coords = (start_coords[0] + 0.0005, start_coords[1] + 0.0005) # offset the start coords a bit to separate them from other lines going the other direction
        random_color = np.random.choice(colors) # get a random color from the list
        line_weight = row['Total Count'] / 5  # Adjust the weight based on the count
        line = folium.PolyLine([start_coords, end_coords], color=random_color, weight=line_weight)

        # Add an arrowhead in the middle of the line, pointing towards the end
        # mid_coords = ((start_coords[0] + end_coords[0]) / 2, (start_coords[1] + end_coords[1]) / 2)
        # offset_mid_coords = ((mid_coords[0] + start_coords[0]) / 2, (mid_coords[1] + start_coords[1]) / 2) # offset from midpoint to prevent overlap
        # add arrow head at midpoint
        # arrow_head = folium.RegularPolygonMarker(
        #     location=offset_mid_coords,
        #     number_of_sides=3,
        #     radius=8,
        #     color=None,
        #     fill=True,
        #     fill_color=random_color,
        #     rotation=45
        # ).add_to(map)

        # a popup that shows the number of trips between the two hubs in either direction
        hub1_name = row['Hub Pair'].split(",")[0].strip("(' ")
        hub2_name = row['Hub Pair'].split(",")[1].strip(")' ")

        popup_text = f'''
        <h4>{hub1_name} <-> {hub2_name}</h4>
        <p><strong>{row['Total Count']}</strong> total trips between the two, including:</p>
        <ul>
            <li><strong>{row['Hub 1 Start Count']}</strong>: {hub1_name} <strong>--></strong> {hub2_name}</li>
            <li><strong>{row['Hub 2 Start Count']}</strong>: {hub2_name} <strong>--></strong> {hub1_name}</li>
        </ul>
        '''
        folium.Popup(popup_text, max_width=300, parse_html=False).add_to(line) # add to line
        # folium.Popup(popup_text, max_width=300, parse_html=False).add_to(arrow_head) # add to arrow head too

        # add line to map
        line.add_to(map) # add line to map


# Display the map
map

In [45]:
# Ensure the renders directory exists
renders_dir = Path('./renders').resolve()
renders_dir.mkdir(parents=True, exist_ok=True)

# Save the map to an HTML file in the renders directory
map_file_path = renders_dir / 'all-trips-map.html'
map.save(str(map_file_path))