In [1]:
import numpy as np
import pandas as pd
import json
import urllib.request

In [2]:
pd.set_option('display.max_colwidth', 500)

## Pull data

In [3]:
limit = 500
source_url = (r'https://skiapp.onthesnow.com/app/widgets/resortlist?region=us'
              r'&regionids=429&language=en&pagetype=skireport&direction=-1'
              fr'&order=stop&limit={limit}&offset=30&countrycode=USA'
              r'&minvalue=-1&open=anystatus')

In [4]:
with urllib.request.urlopen(source_url) as url_file:
    station_data = url_file.read().decode()
    json_data = json.loads(station_data)

## json -> dataframe

In [12]:
station_df = pd.DataFrame.from_dict(json_data['rows'])

station_data_df = (station_df
                   .join(pd.DataFrame.from_dict(
                       station_df[['links']].to_dict()['links'])
                         .T)
                   .filter(items=['resort_name_short', 'weather'])
                   )
station_data_df.shape

# if we get as many station as requested, there could be more we didn't get
assert limit > station_data_df.shape[0]

In [6]:
def link_fixer(ser, year=2018):
    """creates correct links from relative links for different page."""
    base = 'https://www.onthesnow.com'
    tail = f'historical-snowfall.html?y={year}'
    ser2 = ser.str.replace(pat=r'weather.html', repl=tail)
    return base + ser2

In [13]:
station_data_df = (station_data_df
                   .assign(snow_link=link_fixer(station_data_df.weather))
                   .drop(columns='weather')
                  )

# save to disk
station_data_df.to_feather('../data/ots_station_links.feather')