In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Define Functions #

In [8]:
#define function that takes a list of html and returns list of text elements
def to_text_list(list):
    new_list = []
    for item in list:
        new_list.append(item.text)
    return new_list

#define a function that starts a request and restarts it if it lasts longer than 10 seconds
# def refresh_request(URL):


#function that take table elements from html and returns it in a dataframe
def html_table_to_df(sheet_elements):
    sheet_headers = to_text_list(sheet_elements[0].find_all("td"))
    
    df_lift = pd.DataFrame(columns = sheet_headers)
    
    for i in range(1,len(sheet_elements)):
        sheet_data = to_text_list(sheet_elements[i].find_all("td"))
        df_lift.loc[len(df_lift)] = sheet_data
    
    return df_lift

#define function that takes input of link to ski resort, state, and country and outputs a dataframe
def df_from_resort_link(URL, resort, state, country):
    #get the link to the google sheet
    #this needs to be wrapped by an exception because some links to resort go straight to the google sheet but most go to another page that previews the sheet
    try:
        page = requests.get(URL, timeout=(3.05, 27))

        #create a beautifulsoup object with the page content and choose the appropriate parser
        soup = BeautifulSoup(page.content, "html.parser")
        results = soup.find("div", class_="entry-content")
        google_sheet_URL = results.find("a")['href']
    except:
        print('Page links directly to the google sheet')
        google_sheet_URL = URL
        
    #if some google links are broken handle the exception
    try:
        google_page = requests.get(google_sheet_URL, timeout=(3.05, 27))
        google_soup = BeautifulSoup(google_page.content, "html.parser")

        google_table = google_soup.find("tbody")
        sheet_elements = google_table.find_all("tr")

        df_lifts = html_table_to_df(sheet_elements)
        #insert column for country and state to the dataframe
        df_lifts.insert(0, "Country", [country]*(len(sheet_elements)-1), True)
        df_lifts.insert(1, "State", [state]*(len(sheet_elements)-1), True)
        df_lifts.insert(2, "Resort", [resort]*(len(sheet_elements)-1), True)
    except:
        df_lifts = pd.DataFrame()
        

    return df_lifts

    #define function that takes input of state name, country and state url and returns a data frame for all of the resort data in that state.
def df_from_state(URL, state_name, country):
    page = requests.get(URL, timeout=(3.05, 27))

    #create a beautifulsoup object with the page content and choose the appropriate parser
    soup = BeautifulSoup(page.content, "html.parser")

    # grabs container for list of resorts
    results = soup.find("div", class_="entry-content")

    # make dataframe to collect skilift data from all resorts
    df_resorts = pd.DataFrame()

    #MD link goes straight to the Wisp resort page
    try:
        ski_resorts = results.find_all("ul")[0].find_all("a")
    except:
        print("State link went to a ski lift page...")
        resort_name = state_name + " Resort"
        resort_link = results.find("iframe")["src"]
        ski_resorts = ['<a href="{}">{}</a>'.format(resort_link,resort_name)]

    for resort in ski_resorts:
        resort_name = resort.text.strip()
        print('{} in {}'.format(resort_name, state_name))
        resort_link = resort['href']
        df_resorts = pd.concat([df_resorts, df_from_resort_link(resort_link, 
                                                                resort_name, 
                                                                state_name, 
                                                                country)])
    

    return df_resorts  

# Gather Data #

In [9]:
URL = "https://liftblog.com/united-states/"
page = requests.get(URL, timeout=(3.05, 27))

#create a beautifulsoup object with the page content and choose the appropriate parser
soup = BeautifulSoup(page.content, "html.parser")

# grabs container for job postings
results = soup.find(id="post-1491")

#there are two lists on this page.  first is the states and second is sharing links
states = results.find_all("ul")[0].find_all("a")

#start dataframe for the country lift database
df_us_lifts = pd.DataFrame()

for state in states:
    state_name = state.text.strip()
    state_link = state['href']
    print(state_name)
    df_state_lifts = df_from_state(state_link, state_name, 'United States')
    df_state_lifts.to_csv('data/{}_liftdata.cvs'.format(state_name))
    #concat to us df
    df_us_lifts = pd.concat([df_us_lifts, df_state_lifts])

print("Done gathering data from states...")

Alabama
Montgomery Zoo in Alabama
Alaska
Alyeska in Alaska
Arctic Valley in Alaska
Birch Hill in Alaska
Eaglecrest in Alaska
Goldbelt Tram in Alaska
Hillberg in Alaska
Hilltop in Alaska
Icy Strait Point in Alaska
Mt. Eyak in Alaska
Northern Warfare Training Center in Alaska
Skeetawk in Alaska
Skiland in Alaska
Arizona
Arizona Snowbowl in Arizona
Mt. Lemmon in Arizona
Sunrise Park in Arizona
Wildlife World in Arizona
California
Alameda County Fairgrounds in California
Alpine Slide at Magic Mountain in California
Alta Sierra in California
Badger Pass in California
Bear Mountain in California
Bear Valley in California
Boreal in California
Cal Expo in California
California’s Great America in California
Cedar Pass in California
China Peak in California
Coppervale in California
Dodge Ridge in California
Donner Ski Ranch in California
Granlibakken in California
Heavenly in California
Homewood in California
June Mountain in California
Kirkwood in California
Los Angeles Fairplex in California
M

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Spout Springs in Oregon
Timberline Lodge in Oregon
Wallowa Lake Tramway in Oregon
Warner Canyon in Oregon
Willamette Pass in Oregon
Pennsylvania
Bear Creek in Pennsylvania
Big Boulder in Pennsylvania
Blue Knob in Pennsylvania
Blue Mountain in Pennsylvania
Boyce Park in Pennsylvania
Camelback in Pennsylvania
Dutch Wonderland in Pennsylvania
Eagle Rock in Pennsylvania
Elk Mountain in Pennsylvania
Hersheypark in Pennsylvania
Hidden Valley in Pennsylvania
Jack Frost in Pennsylvania
Kennywood in Pennsylvania
Knoebels in Pennsylvania
Laurel Mountain in Pennsylvania
Liberty Mountain in Pennsylvania
Locust Lake Village in Pennsylvania
Montage Mountain in Pennsylvania
Mt. Pleasant of Edinboro in Pennsylvania
Mystic Mountain at Nemacolin Woodlands in Pennsylvania
Pocono Ranch Lands in Pennsylvania
Roundtop in Pennsylvania
Saw Creek in Pennsylvania
Seven Springs in Pennsylvania
Shawnee Mountain in Pennsylvania
Ski Big Bear in Pennsylvania
Ski Sawmill in Pennsylvania
Skytop Lodge in Pennsylvania
S

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Nordic Valley in Utah
Park City in Utah
Powder Mountain in Utah
Snowbasin in Utah
Snowbird in Utah
Solitude in Utah
Sundance in Utah
Utah Olympic Park in Utah
Wasatch Peaks Ranch in Utah
Woodward Park City in Utah
US Virgin Islands
Paradise Point in US Virgin Islands
Vermont
Ascutney in Vermont
Bolton Valley in Vermont
Brattleboro in Vermont
Bromley in Vermont
Burke Mountain in Vermont
Camp Ethan Allen in Vermont
Cochran’s in Vermont
Cosmic Hill in Vermont
Grill Hill in Vermont
Hermitage Club in Vermont
High Pond in Vermont
Jay Peak in Vermont
Killington in Vermont
Lyndon Outing Club in Vermont
Mad River Glen in Vermont
Magic Mountain in Vermont
Middlebury College Snow Bowl in Vermont
Mt. Snow in Vermont
Northeast Slopes in Vermont
Okemo in Vermont
Pico in Vermont
Quechee Lakes in Vermont
Saskadena Six in Vermont
Seaver Hill in Vermont
Smugglers’ Notch in Vermont
Stowe in Vermont
Stratton in Vermont
Sugarbush in Vermont
Virginia
Bryce Resort in Virginia
Busch Gardens Williamsburg in Vi

In [11]:
#export data to a csv
df_us_lifts.to_csv('UnitedStates_liftdata.csv')

# Clean Data #

Unnamed: 0,Country,State,Resort,Status,Lift Name,Type,Manufacturer,Years of Operation,Capacity,Vertical Rise,Length,Horsepower,Line Speed,Chairs,Towers,Drive,Tension,Ride Time,Notes
0,United States,Minesota,Afton Alps,Operating,Chair 1,Double,Hall,1968-,1000,240,1150,,400,,7,Bottom,Bottom,2.9,
1,United States,Minesota,Afton Alps,Operating,Chair 2,Quad,Hall,1979-,2150,200,1140,,375,,9,Bottom,Bottom,3.0,
2,United States,Minesota,Afton Alps,Operating,Chair 3,Triple,Hall,1975-,1800,235,983,,400,,6,Bottom,Bottom,2.5,
3,United States,Minesota,Afton Alps,Operating,Chair 4,Double,Hall,1971-,1000,65,554,,340,,4,Bottom,Bottom,1.6,
4,United States,Minesota,Afton Alps,Operating,Chair 5,Double,Hall,1975-,1200,230,1232,,400,,7,Top,Bottom,3.1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,United States,Minesota,Wild Mountain,Operating,Chair 1,Quad,Borvig,1983-,2400,192,968,,400,,5,Bottom,Bottom,2.4,
1,United States,Minesota,Wild Mountain,Operating,Chair 2,Quad,Borvig,1978-,2400,268,1156,,400,,7,Bottom,Bottom,2.9,
2,United States,Minesota,Wild Mountain,Operating,Chair 3,Quad,Borvig,1973-,1400,270,1225,,400,,8,Bottom,Bottom,3.1,
3,United States,Minesota,Wild Mountain,Operating,Chair 4,Quad,Borvig,1977-,2400,271,1490,,400,,9,Bottom,Bottom,3.7,
