### Import Libraries

In [1]:
import pandas as pd
import time
from geopy.geocoders import Nominatim
import requests
from bs4 import BeautifulSoup

import plotly.express as px

# Data Ingest & Initial Processing

## Data Sourcing
For the purpose of optimizing routes for ski resorts and areas in New York state. The names of these areas are to be parsed from the following website.

[Ski Central]('https://www.skicentral.com/') - A popular site to review popular ski destinations worldwide.

For the purposes of this optimization, we have restricted the area selection to New York state. The state with the most ski resorts and areas in the US. Obtaining these names and their location data is a straight forward procedure.

### Web Scraping (Resort Names)

In [2]:
base_url = "https://www.skicentral.com"
response = requests.get(f"{base_url}/newyork.html")
soup = BeautifulSoup(response.text, 'html.parser')

resort_titles = soup.find_all('div', class_='resorttitle')
resort_names = [title.text.strip() for title in resort_titles]
print(resort_titles)
print(resort_names)

[<div class="resorttitle"><a href="whiteface.html">Whiteface Mountain</a></div>, <div class="resorttitle"><a href="goremountain.html">Gore Mountain</a></div>, <div class="resorttitle"><a href="huntermountain.html">Hunter Mountain</a></div>, <div class="resorttitle"><a href="belleayre.html">Belleayre Mountain</a></div>, <div class="resorttitle"><a href="holidayvalley.html">Holiday Valley</a></div>, <div class="resorttitle"><a href="skiwindham.html">Windham Mountain</a></div>, <div class="resorttitle"><a href="greekpeak.html">Greek Peak</a></div>, <div class="resorttitle"><a href="catamount.html">Catamount Ski Area</a></div>, <div class="resorttitle"><a href="peeknpeak.html">Peek'n Peak Resort</a></div>, <div class="resorttitle"><a href="holimont.html">HoliMont</a></div>, <div class="resorttitle"><a href="beartownskiarea.html">Beartown Ski Area</a></div>, <div class="resorttitle"><a href="brantlingskiandsnowboardcenter.html">Brantling Ski and Snowboard Center</a></div>, <div class="resor

### Parse and collect resort data

In [3]:
resorts = []
for title in resort_titles:
    link = title.find('a')
    if link:
        resort_url = base_url + "/" + link.get('href')
        resorts.append({
        'name': title.text.strip(),
        'url': resort_url
    })
print(resorts)

[{'name': 'Whiteface Mountain', 'url': 'https://www.skicentral.com/whiteface.html'}, {'name': 'Gore Mountain', 'url': 'https://www.skicentral.com/goremountain.html'}, {'name': 'Hunter Mountain', 'url': 'https://www.skicentral.com/huntermountain.html'}, {'name': 'Belleayre Mountain', 'url': 'https://www.skicentral.com/belleayre.html'}, {'name': 'Holiday Valley', 'url': 'https://www.skicentral.com/holidayvalley.html'}, {'name': 'Windham Mountain', 'url': 'https://www.skicentral.com/skiwindham.html'}, {'name': 'Greek Peak', 'url': 'https://www.skicentral.com/greekpeak.html'}, {'name': 'Catamount Ski Area', 'url': 'https://www.skicentral.com/catamount.html'}, {'name': "Peek'n Peak Resort", 'url': 'https://www.skicentral.com/peeknpeak.html'}, {'name': 'HoliMont', 'url': 'https://www.skicentral.com/holimont.html'}, {'name': 'Beartown Ski Area', 'url': 'https://www.skicentral.com/beartownskiarea.html'}, {'name': 'Brantling Ski and Snowboard Center', 'url': 'https://www.skicentral.com/brantlin

In [7]:
geolocator = Nominatim(user_agent="DCJT3_NY_SKI")

for resort in resorts:
    location = geolocator.geocode(f"{resort['name']}, NY")
    if location:
        resort['latitude'] = location.latitude
        resort['longitude'] = location.longitude
        resort['address'] = location.address

    # Mountain stats scrape
    response = requests.get(resort['url'])
    resort_soup = BeautifulSoup(response.text, 'html.parser')
    stats_table = resort_soup.find('table', id='mountainstatistics')

    if stats_table:
        for row in stats_table.find_all('tr'):
            cols = row.find_all(['td', 'th'])
            if len(cols) == 2:
                stat_name = cols[0].text.strip().lower().replace(' ', '_')
                stat_value = cols[1].text.strip()
                resort[stat_name] = stat_value
    time.sleep(1)

df = pd.DataFrame(resorts)
df

Unnamed: 0,name,url,vertical_rise,base_elevation,summit_elevation,annual_snowfall,number_of_trails,skiable_acres,longest_run,snowmaking,latitude,longitude,address
0,Whiteface Mountain,https://www.skicentral.com/whiteface.html,3430 ft,1220 ft,4650 ft,190 inches,90,288.0,2.1 miles / 3.4 km,98%,44.365784,-73.902984,"Whiteface Mountain, Essex County, New York, Un..."
1,Gore Mountain,https://www.skicentral.com/goremountain.html,2537 ft,998 ft,3600 ft,150 inches,107,439.0,4.4 miles / 7.1 km,97%,43.672954,-74.048853,"Gore Mountain, Town of Johnsburg, Warren Count..."
2,Hunter Mountain,https://www.skicentral.com/huntermountain.html,1600 ft,1600 ft,3200 ft,125 inches,58,240.0,,100%,42.177866,-74.230422,"Hunter Mountain, Town of Hunter, Greene County..."
3,Belleayre Mountain,https://www.skicentral.com/belleayre.html,1404 ft,2025 ft,3429 ft,141 inches,50,171.0,,96%,42.126893,-74.474075,"Belleayre Mountain Day Use Area, Pine Hill, To..."
4,Holiday Valley,https://www.skicentral.com/holidayvalley.html,750 ft,1500 ft,2250 ft,180 inches,58,290.0,0.8 miles / 1.3 km,95%,42.263145,-78.663611,"Holiday Valley, Town of Ellicottville, Cattara..."
5,Windham Mountain,https://www.skicentral.com/skiwindham.html,1600 ft,1500 ft,3100 ft,105 inches,54,285.0,,97%,42.293926,-74.261312,"Windham Mountain Club, Town of Windham, Greene..."
6,Greek Peak,https://www.skicentral.com/greekpeak.html,952 ft,1148 ft,2100 ft,122 inches,38,220.0,1.5 miles / 2.4 km,83%,42.50232,-76.148046,"Greek Peak Mountain Resort, 2000, South Hill R..."
7,Catamount Ski Area,https://www.skicentral.com/catamount.html,1000 ft,1000 ft,2000 ft,75 inches,36,130.0,1.8 miles / 2.8 km,98%,,,
8,Peek'n Peak Resort,https://www.skicentral.com/peeknpeak.html,400 ft,1400 ft,1800 ft,200 inches,27,105.0,0 miles / 1 km,100%,42.060463,-79.744066,"Peek'n Peak Resort, Abbey Lane, Town of French..."
9,HoliMont,https://www.skicentral.com/holimont.html,700 ft,1560 ft,2260 ft,180 inches,52,135.0,,100%,42.270015,-78.683382,"Holimont, Village of Ellicottville, Town of El..."


# Data Processing

In [8]:
selected_resorts = [
    'Whiteface Mountain',
    'Gore Mountain',
    'Hunter Mountain',
    'Belleayre Mountain',
    'Windham Mountain',
    'Greek Peak',
    "Peek'n Peak Resort",
    'HoliMont',
    'Holiday Mountain'
]

df_selected = df[df['name'].isin(selected_resorts)]

print("Selected Resorts Data:")
print(df_selected[['name', 'latitude', 'longitude']])

Selected Resorts Data:
                  name   latitude  longitude
0   Whiteface Mountain  44.365784 -73.902984
1        Gore Mountain  43.672954 -74.048853
2      Hunter Mountain  42.177866 -74.230422
3   Belleayre Mountain  42.126893 -74.474075
5     Windham Mountain  42.293926 -74.261312
6           Greek Peak  42.502320 -76.148046
8   Peek'n Peak Resort  42.060463 -79.744066
9             HoliMont  42.270015 -78.683382
19    Holiday Mountain  41.628043 -74.606688


In [9]:
fig = px.scatter_mapbox(df_selected, 
                       lat='latitude', 
                       lon='longitude',
                       hover_name='name',
                       zoom=5,
                       mapbox_style='carto-positron')

# Update the layout to center on NY
fig.update_layout(
    title='Selected New York Ski Resorts',
    mapbox=dict(
        center=dict(lat=43.2994, lon=-74.2179)  # Center of NY State
    )
)

fig.show()