In [5]:
import numpy as np
import pandas as pd
import json
import urllib.request

In [6]:
pd.set_option('display.max_colwidth', 500)

## Pull data

In [7]:
limit = 500
source_url = (r'https://skiapp.onthesnow.com/app/widgets/resortlist?region=us'
              r'&regionids=429&language=en&pagetype=skireport&direction=-1'
              fr'&order=stop&limit={limit}&offset=30&countrycode=USA'
              r'&minvalue=-1&open=anystatus')

In [8]:
with urllib.request.urlopen(source_url) as url_file:
    station_data = url_file.read().decode()
    json_data = json.loads(station_data)

## json -> dataframe

In [39]:
station_df = pd.DataFrame.from_dict(json_data['rows'])

station_data_df = (station_df
                   .join(pd.DataFrame.from_dict(
                       station_df[['links']].to_dict()['links'])
                         .T)
                   .filter(items=['resort_name_short', 'weather'])
                   )
station_data_df.shape

# if we get as many station as requested, there could be more we didn't get
assert limit > station_data_df.shape[0]

In [40]:
pages = {'snowfall': "", 'base': "&q=top"}

def link_fixer(ser, year, page):
    """creates correct links from relative links for different page."""
    base = 'https://www.onthesnow.com'
    tail = f'historical-snowfall.html?y={year}'
    ser2 = ser.str.replace(pat=r'weather.html', repl=tail)
    return base + ser2 + pages[page]

In [41]:
def link_maker(df, yr, page):
    """makes df based on year given"""
    return (df
            .assign(snow_link=link_fixer(df.weather, yr, page))
            .assign(year=yr)
            .assign(page=page)
            )
years = range(2010, 2019)
link_dfs = [link_maker(station_data_df, yr, page) 
            for yr in years for page in pages.keys()]

In [42]:
station_links_df = pd.concat(link_dfs).drop(columns='weather')
station_links_df.tail()

Unnamed: 0,resort_name_short,snow_link,year,page
297,Whaleback,https://www.onthesnow.com/new-hampshire/whaleback-mountain/historical-snowfall.html?y=2018&q=top,2018,base
298,Oak Mountain,https://www.onthesnow.com/new-york/oak-mountain/historical-snowfall.html?y=2018&q=top,2018,base
299,Mount Pleasant of Edinboro,https://www.onthesnow.com/pennsylvania/mount-pleasant-of-edinboro/historical-snowfall.html?y=2018&q=top,2018,base
300,Hunt Hollow,https://www.onthesnow.com/new-york/hunt-hollow-ski-club/historical-snowfall.html?y=2018&q=top,2018,base
301,Powder Ridge Park,https://www.onthesnow.com/connecticut/powder-ridge-park/historical-snowfall.html?y=2018&q=top,2018,base


In [44]:
# save to disk
(station_links_df
.reset_index(drop=True)
.to_feather('../data/ots_station_links.feather'))