### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import time

from geopy.geocoders import Nominatim
import random
import requests
from bs4 import BeautifulSoup

import plotly.express as px
import plotly.graph_objects as go
import polyline

# Data Ingest & Initial Processing

## Data Sourcing
For the purpose of optimizing routes for ski resorts and areas in New York state. The names of these areas are to be parsed from the following website.

[Ski Central]('https://www.skicentral.com/') - A popular site to review popular ski destinations worldwide.

For the purposes of this optimization, we have restricted the area selection to New York state. The state with the most ski resorts and areas in the US. Obtaining these names and their location data is a straight forward procedure.

### Web Scraping (Resort Names)

In [2]:
base_url = "https://www.skicentral.com"
response = requests.get(f"{base_url}/newyork.html")
soup = BeautifulSoup(response.text, 'html.parser')

resort_titles = soup.find_all('div', class_='resorttitle')
resort_names = [title.text.strip() for title in resort_titles]
print(resort_titles)
print(resort_names)

[<div class="resorttitle"><a href="whiteface.html">Whiteface Mountain</a></div>, <div class="resorttitle"><a href="goremountain.html">Gore Mountain</a></div>, <div class="resorttitle"><a href="huntermountain.html">Hunter Mountain</a></div>, <div class="resorttitle"><a href="belleayre.html">Belleayre Mountain</a></div>, <div class="resorttitle"><a href="holidayvalley.html">Holiday Valley</a></div>, <div class="resorttitle"><a href="skiwindham.html">Windham Mountain</a></div>, <div class="resorttitle"><a href="greekpeak.html">Greek Peak</a></div>, <div class="resorttitle"><a href="catamount.html">Catamount Ski Area</a></div>, <div class="resorttitle"><a href="peeknpeak.html">Peek'n Peak Resort</a></div>, <div class="resorttitle"><a href="holimont.html">HoliMont</a></div>, <div class="resorttitle"><a href="beartownskiarea.html">Beartown Ski Area</a></div>, <div class="resorttitle"><a href="brantlingskiandsnowboardcenter.html">Brantling Ski and Snowboard Center</a></div>, <div class="resor

### Parse and collect resort data

In [3]:
resorts = []
for title in resort_titles:
    link = title.find('a')
    if link:
        resort_url = base_url + "/" + link.get('href')
        resorts.append({
        'name': title.text.strip(),
        'url': resort_url
    })
print(resorts)

[{'name': 'Whiteface Mountain', 'url': 'https://www.skicentral.com/whiteface.html'}, {'name': 'Gore Mountain', 'url': 'https://www.skicentral.com/goremountain.html'}, {'name': 'Hunter Mountain', 'url': 'https://www.skicentral.com/huntermountain.html'}, {'name': 'Belleayre Mountain', 'url': 'https://www.skicentral.com/belleayre.html'}, {'name': 'Holiday Valley', 'url': 'https://www.skicentral.com/holidayvalley.html'}, {'name': 'Windham Mountain', 'url': 'https://www.skicentral.com/skiwindham.html'}, {'name': 'Greek Peak', 'url': 'https://www.skicentral.com/greekpeak.html'}, {'name': 'Catamount Ski Area', 'url': 'https://www.skicentral.com/catamount.html'}, {'name': "Peek'n Peak Resort", 'url': 'https://www.skicentral.com/peeknpeak.html'}, {'name': 'HoliMont', 'url': 'https://www.skicentral.com/holimont.html'}, {'name': 'Beartown Ski Area', 'url': 'https://www.skicentral.com/beartownskiarea.html'}, {'name': 'Brantling Ski and Snowboard Center', 'url': 'https://www.skicentral.com/brantlin

In [4]:
geolocator = Nominatim(user_agent="DCJT3_NY_SKI")

for resort in resorts:
    location = geolocator.geocode(f"{resort['name']}, NY")
    if location:
        resort['latitude'] = location.latitude
        resort['longitude'] = location.longitude
        resort['address'] = location.address

    # Mountain stats scrape
    response = requests.get(resort['url'])
    resort_soup = BeautifulSoup(response.text, 'html.parser')
    stats_table = resort_soup.find('table', id='mountainstatistics')

    if stats_table:
        for row in stats_table.find_all('tr'):
            cols = row.find_all(['td', 'th'])
            if len(cols) == 2:
                stat_name = cols[0].text.strip().lower().replace(' ', '_')
                stat_value = cols[1].text.strip()
                resort[stat_name] = stat_value
    time.sleep(1)

df = pd.DataFrame(resorts)
df.head()

Unnamed: 0,name,url,latitude,longitude,address,vertical_rise,base_elevation,summit_elevation,annual_snowfall,number_of_trails,skiable_acres,longest_run,snowmaking
0,Whiteface Mountain,https://www.skicentral.com/whiteface.html,44.365784,-73.902984,"Whiteface Mountain, Essex County, New York, Un...",3430 ft,1220 ft,4650 ft,190 inches,90,288,2.1 miles / 3.4 km,98%
1,Gore Mountain,https://www.skicentral.com/goremountain.html,43.672954,-74.048853,"Gore Mountain, Town of Johnsburg, Warren Count...",2537 ft,998 ft,3600 ft,150 inches,107,439,4.4 miles / 7.1 km,97%
2,Hunter Mountain,https://www.skicentral.com/huntermountain.html,42.177866,-74.230422,"Hunter Mountain, Town of Hunter, Greene County...",1600 ft,1600 ft,3200 ft,125 inches,58,240,,100%
3,Belleayre Mountain,https://www.skicentral.com/belleayre.html,42.126893,-74.474075,"Belleayre Mountain Day Use Area, Pine Hill, To...",1404 ft,2025 ft,3429 ft,141 inches,50,171,,96%
4,Holiday Valley,https://www.skicentral.com/holidayvalley.html,42.263145,-78.663611,"Holiday Valley, Town of Ellicottville, Cattara...",750 ft,1500 ft,2250 ft,180 inches,58,290,0.8 miles / 1.3 km,95%


# Data Description

- **name**: name of the resort as it appears on the web source.
- **url**: URL of the resort or ski area as presented on the web source.
- **vertical_rise**: a measurement of the total elevation change a skier descends from the highest point to the lowest point on a ski run.
- **base_elevation**: a measurment of the altitude at the bottom of the ski resort or ski area, essentially the lowest point of on the mountain accessable to skiers.
- **summit_elevation**: a measurement of the height above sea level of the highest point on a mountain.
- **annual_snowfall**: the total amount of snow that falls over the course of a year.
- **number_of_trails**: the total number of trails on the mountain
- **skiable_acres**: the amount of terrain that is accessible by ski.
- **longest_run**: a measurement refering to a continuous downhill ski slope that covers the greatest distance from top to bottom on a mountain.
- **snowmaking**: a measurement or indication that the ski resort or area creates artifical snow for the purpose producing snow when natural snowfall is limited.
- **latitude**: decimal latitude of resort location.
- **longitude**: decimal longitude of resort location.
- **address**: geolocated address based on query "`name`, NY".

# Data Processing

In [5]:
df_clean = df.dropna(subset=['latitude', 'longitude'])
df_clean['vertical_rise_num'] = df_clean['vertical_rise'].str.replace(' ft', '').astype(float)
df_filtered = df_clean[df_clean['name'] != 'HoliMont'] # removed, too close to Holiday Valley (higher elevation)
top_9 = df_filtered.head(9)
top_9

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean['vertical_rise_num'] = df_clean['vertical_rise'].str.replace(' ft', '').astype(float)


Unnamed: 0,name,url,latitude,longitude,address,vertical_rise,base_elevation,summit_elevation,annual_snowfall,number_of_trails,skiable_acres,longest_run,snowmaking,vertical_rise_num
0,Whiteface Mountain,https://www.skicentral.com/whiteface.html,44.365784,-73.902984,"Whiteface Mountain, Essex County, New York, Un...",3430 ft,1220 ft,4650 ft,190 inches,90,288.0,2.1 miles / 3.4 km,98%,3430.0
1,Gore Mountain,https://www.skicentral.com/goremountain.html,43.672954,-74.048853,"Gore Mountain, Town of Johnsburg, Warren Count...",2537 ft,998 ft,3600 ft,150 inches,107,439.0,4.4 miles / 7.1 km,97%,2537.0
2,Hunter Mountain,https://www.skicentral.com/huntermountain.html,42.177866,-74.230422,"Hunter Mountain, Town of Hunter, Greene County...",1600 ft,1600 ft,3200 ft,125 inches,58,240.0,,100%,1600.0
3,Belleayre Mountain,https://www.skicentral.com/belleayre.html,42.126893,-74.474075,"Belleayre Mountain Day Use Area, Pine Hill, To...",1404 ft,2025 ft,3429 ft,141 inches,50,171.0,,96%,1404.0
4,Holiday Valley,https://www.skicentral.com/holidayvalley.html,42.263145,-78.663611,"Holiday Valley, Town of Ellicottville, Cattara...",750 ft,1500 ft,2250 ft,180 inches,58,290.0,0.8 miles / 1.3 km,95%,750.0
5,Windham Mountain,https://www.skicentral.com/skiwindham.html,42.293926,-74.261312,"Windham Mountain Club, Town of Windham, Greene...",1600 ft,1500 ft,3100 ft,105 inches,54,285.0,,97%,1600.0
6,Greek Peak,https://www.skicentral.com/greekpeak.html,42.50232,-76.148046,"Greek Peak Mountain Resort, 2000, South Hill R...",952 ft,1148 ft,2100 ft,122 inches,38,220.0,1.5 miles / 2.4 km,83%,952.0
8,Peek'n Peak Resort,https://www.skicentral.com/peeknpeak.html,42.060463,-79.744066,"Peek'n Peak Resort, Abbey Lane, Town of French...",400 ft,1400 ft,1800 ft,200 inches,27,105.0,0 miles / 1 km,100%,400.0
10,Beartown Ski Area,https://www.skicentral.com/beartownskiarea.html,44.764111,-73.584055,"Beartown Ski Area, Beartown Road, Beartown, To...",150 ft,,,,9,,,yes,150.0


In [6]:
fig = px.scatter_mapbox(df_filtered[:9], 
                       lat='latitude', 
                       lon='longitude',
                       hover_name='name',
                       zoom=5,
                       mapbox_style='carto-positron')

# Update the layout to center on NY
fig.update_layout(
    title='Selected New York Ski Resorts',
    mapbox=dict(
        center=dict(lat=43.2994, lon=-74.2179)  # Center of NY State
    )
)

fig.show()

# Exploratory Data Analysis

# Random Route Generator

In [7]:
def osrm_route(start_point, end_point):
    osrm_url = f"http://router.project-osrm.org/route/v1/driving/{start_point[1]},{start_point[0]};{end_point[1]},{end_point[0]}"
    response = requests.get(osrm_url)
    
    data = response.json()
    if data["code"] == "Ok":
        return {
            'distance': data["routes"][0]["distance"] / 1000 * 0.621371,  # meters to miles
            'geometry': data["routes"][0]["geometry"]
        }
    return None

In [8]:
def random_route_gen(df):
    locations = list(range(len(df))) # create list of indexes 
    route = random.sample(locations, len(locations)) # randomly sample from list
    route.append(route[0])  # Return to start

    total_distance = 0
    all_route_points = []
    route_details = []

    for i in range(len(route)-1):
        start = df.iloc[route[i]]
        end = df.iloc[route[i+1]]

        route_data = osrm_route(
            (start['latitude'], start['longitude']),
            (end['latitude'], end['longitude'])
        )

        if route_data:
            total_distance += route_data['distance']
            decoded_points = polyline.decode(route_data['geometry'])
            all_route_points.append(decoded_points)
            route_details.append({
                'from': start['name'],
                'to': end['name'],
                'distance': route_data['distance']
            })

    fig_random_route = go.Figure()

    # Add route lines
    for points in all_route_points:
        lat, lon = zip(*points)
        fig.add_trace(go.Scattermapbox(
            lon=lon,
            lat=lat,
            mode='lines',
            line=dict(width=2, color='blue'),
            hoverinfo='skip'
        ))

    # Add resort markers
    resort_lat = []
    resort_lon = []
    resort_names = []
    for i, idx in enumerate(route[:-1]):
        location = df.iloc[idx]
        resort_lat.append(location['latitude'])
        resort_lon.append(location['longitude'])
        resort_names.append(f"Stop {i+1}: {location['name']}")

    fig_random_route.add_trace(go.Scattermapbox(
        lon=resort_lon,
        lat=resort_lat,
        mode='markers',
        marker=dict(size=12, color='red'),
        text=resort_names,
        name="Ski Resort"
    ))

    # Update layout
    fig_random_route.update_layout(
        title='New York Ski Resort - Random Route',
        mapbox=dict(
            style="carto-positron",
            center=dict(lat=np.mean(resort_lat), lon=np.mean(resort_lon)),
            zoom=5
        ),
        showlegend=False,
        margin=dict(l=0, r=0, t=30, b=0)
    )
    
    for i, idx in enumerate(route[:-1]):
        print(f"{i+1}. {df.iloc[idx]['name']}")

    return fig, total_distance, route_details

In [9]:
def save_route_for_web(fig_random_route, filename='random_route.html'):
    try:
        fig_random_route.write_html(filename, 
                      full_html=True,
                      include_plotlyjs='cdn')
        return filename
    except Exception as e:
        print(f"Error saving route: {e}")
        return None

In [10]:
fig, total_dist, route_details = random_route_gen(top_9)

1. Belleayre Mountain
2. Hunter Mountain
3. Beartown Ski Area
4. Peek'n Peak Resort
5. Whiteface Mountain
6. Greek Peak
7. Gore Mountain
8. Holiday Valley
9. Windham Mountain


# Optimized Route (Dijkstra's Algorithm)