In [1]:
import numpy as np
from bs4 import BeautifulSoup
import requests
import time
from datetime import datetime

In [2]:
years = range(2006, 2016)

resort = ['washington/mt-baker', 'british-columbia/whistler-blackcomb']
url = 'http://www.onthesnow.com/{0}/historical-snowfall.html?&q=snow&v=list&y={1}'

data = {}
for loc in [0, 1]:
    snowdata = []

    for y in years:
        r = requests.get(url.format(resort[loc], y))
        
        soup = BeautifulSoup(r.content, 'lxml')
        
        snowfall = soup.find('table', {'class': 'snowfall'})

        for row in snowfall('tr')[1:]:
            snowdata.append([x.contents[0] for x in row('td')])
            
    data[loc] = snowdata

In [3]:
def convert_date(string):
    return datetime.fromtimestamp(time.mktime(time.strptime(string, "%b  %d, %Y")))

def in_to_int(string):
    return int(string.replace(" in.",""))

In [4]:
data[0][0]

[u'Jan  6, 2006', u'5 in.', u'5 in.', u'75 in.']

In [5]:
for day in data[0]:
    print [convert_date(day[0])] + [in_to_int(string) for string in day[1:4]]

[datetime.datetime(2006, 1, 6, 0, 0), 5, 5, 75]
[datetime.datetime(2006, 1, 17, 0, 0), 2, 7, 0]
[datetime.datetime(2006, 1, 18, 0, 0), 2, 9, 135]
[datetime.datetime(2006, 1, 26, 0, 0), 8, 17, 135]
[datetime.datetime(2006, 2, 3, 0, 0), 7, 24, 186]
[datetime.datetime(2006, 3, 1, 0, 0), 6, 30, 188]
[datetime.datetime(2006, 11, 13, 0, 0), 14, 44, 59]
[datetime.datetime(2006, 11, 14, 0, 0), 10, 54, 68]
[datetime.datetime(2006, 11, 15, 0, 0), 6, 60, 68]
[datetime.datetime(2006, 11, 17, 0, 0), 3, 63, 68]
[datetime.datetime(2006, 11, 20, 0, 0), 4, 67, 68]
[datetime.datetime(2006, 11, 21, 0, 0), 15, 82, 68]
[datetime.datetime(2006, 11, 22, 0, 0), 16, 98, 68]
[datetime.datetime(2006, 11, 23, 0, 0), 15, 113, 72]
[datetime.datetime(2006, 11, 24, 0, 0), 16, 129, 76]
[datetime.datetime(2006, 11, 26, 0, 0), 20, 149, 116]
[datetime.datetime(2006, 11, 27, 0, 0), 26, 175, 116]
[datetime.datetime(2006, 11, 30, 0, 0), 1, 176, 125]
[datetime.datetime(2006, 12, 1, 0, 0), 2, 178, 117]
[datetime.datetime(2006

## Time to store data

Schema:
* date
* resort (whistler/baker)
* 24snowfall
* basedepth

Ignore total seasonal snowfall (position 3 of 4)

In [6]:
# import psycopg2

In [7]:
# conn = psycopg2.connect("dbname=border")

In [8]:
# cur = conn.cursor()

In [9]:
# for i, resort in enumerate(['baker', 'whistler']):
#     for day in data[i]:
#         cur.execute("INSERT INTO skiconditions (date, resort, snow24, basedepth) VALUES (%s, %s, %s, %s)",\
#             (convert_date(day[0]), resort, in_to_int(day[1]), in_to_int(day[3])))
        
#         conn.commit()

In [10]:
# cur.close()
# conn.close()

## Switch to MySQL
Much better admin tools

In [11]:
import mysql.connector

In [14]:
conn = mysql.connector.connect(user='dev', password='dev', host='localhost', database='border')

In [15]:
cur = conn.cursor()

for i, resort in enumerate(['baker', 'whistler']):
    for day in data[i]:
        cur.execute("INSERT INTO skiconditions (date, resort, snow24, basedepth) VALUES (%s, %s, %s, %s)",\
            (convert_date(day[0]), resort, in_to_int(day[1]), in_to_int(day[3])))
        
conn.commit()

cur.close()
conn.close()