In [1]:
import numpy as np
import pandas as pd
import requests

import matplotlib.pyplot as plt

from bs4 import BeautifulSoup

%matplotlib inline

In [182]:
class Table:
    """take a parent tag of a table and create dict object for that table"""
    
    def __init__(self, parent):
        self.parent = parent
    
    def __repr__(self):
        return f'<Table Dict is {self.table_dict}'
    
    @property
    def table_dict(self):
        d ={}
        if self.parent.find('tr',{'class':'table-top'}).text.split()[1].isnumeric():
            d['date'] = self.parent.find('tr',{'class':'table-top'}).text.split()[1]
            for tr in self.parent.findAll('tr',{'class':['column-light','column-dark']}):
                d[tr.td.text] = tr.td.find_next('td').text.split()[0]
        else:
            d['date'] = 0
        return d
    

In [162]:
class WeatherPage:
    """take and html page of weather and parse that to find all the table tags"""
    
    def __init__(self,page):
        self.soup = BeautifulSoup(page, 'html.parser')
        self.soup = self.soup.find('div', attrs={'id':'main-copy'})
    
    @property
    def AllTables(self):
        return[Table(e) for e in self.soup.find_all('table')]

In [223]:
df = pd.DataFrame()
years = pd.date_range(start='1/1/2009', end='28/11/2018',freq='M').strftime('%Y%m').tolist()
for year in years:
    base = 'http://www.estesparkweather.net/archive_reports.php?date='
    page_content = requests.get(base+year).content
    complete_list_from_url = []
    for tables in WeatherPage(page_content).AllTables:
        complete_list_from_url.append(tables.table_dict.copy())

    if complete_list_from_url:
        temp = pd.DataFrame(complete_list_from_url)
        temp.dropna(axis=0, how='all', inplace=True)
        temp['date'] = year + temp['date']
        df = df.append(temp)

In [224]:
df

Unnamed: 0,date,Average temperature,Average humidity,Average dewpoint,Average barometer,Average windspeed,Average gustspeed,Average direction,Rainfall for month,Rainfall for year,Maximum rain per minute,Maximum temperature,Minimum temperature,Maximum humidity,Minimum humidity,Maximum pressure,Minimum pressure,Maximum windspeed,Maximum gust speed,Maximum heat index
0,2009011,37.8,35,12.7,29.7,26.4,36.8,274,0.00,0.00,0.00,40.1,34.5,44,27,29.762,29.596,41.4,59,40.1
1,2009012,43.2,32,14.7,29.5,12.8,18.0,240,0.00,0.00,0.00,52.8,37.5,43,16,29.669,29.268,35.7,51,52.8
2,2009013,25.7,60,12.7,29.7,8.3,12.2,290,0.00,0.00,0.00,41.2,6.7,89,35,30.232,29.260,25.3,38,41.2
3,2009014,9.3,67,0.1,30.4,2.9,4.5,47,0.00,0.00,0.00,19.4,-0.0,79,35,30.566,30.227,12.7,20,32.0
4,2009015,23.5,30,-5.3,29.9,16.7,23.1,265,0.00,0.00,0.00,30.3,15.1,56,13,30.233,29.568,38.0,53,32.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26,20181027,53.3°F,37%,26.6°F,29.8,10.3,14.8,282°,0.85,6.86,0.00,64.5°F,38.3°F,78%,24%,29.935,29.650,28.8,38.0,64.5°F
27,20181028,49.4°F,48%,25.2°F,29.9,5.3,7.6,282°,0.85,6.86,0.00,67.3°F,30.9°F,90%,19%,30.040,29.663,18.4,25.3,67.3°F
28,20181029,56.1°F,21%,16.0°F,29.6,8.5,12.4,282°,0.86,6.87,0.01,66.8°F,40.3°F,59%,13%,29.731,29.402,26.5,39.1,66.8°F
29,20181030,31.5°F,87%,28.2°F,29.9,1.9,3.1,275°,0.99,7.00,0.01,40.3°F,27.8°F,92%,60%,30.121,29.623,9.2,16.1,40.2°F


In [225]:
columns_rename = {'Average barometer': 'Average barometer (in)',
  'Average dewpoint': 'Average dewpoint (°F)',
  'Average direction': 'Average direction (°deg)',
  'Average gustspeed': 'Average gustspeed (mph)',
  'Average humidity': 'Average humidity (%)',
  'Average temperature': 'Average temperature (°F)',
  'Average windspeed': 'Average windspeed (mph)',
  'Maximum gust speed': 'Maximum gust speed (mph)',
  'Maximum heat index': 'Maximum heat index (°F)',
  'Maximum humidity': 'Maximum humidity (%)',
  'Maximum pressure': 'Maximum pressure',
  'Maximum rain per minute': 'Maximum rain per minute',
  'Maximum temperature': 'Maximum temperature (°F)',
  'Maximum windspeed': 'Maximum windspeed (mph)',
  'Minimum humidity': 'Minimum humidity (%)',
  'Minimum pressure': 'Minimum pressure',
  'Minimum temperature': 'Minimum temperature (°F)',
  'Rainfall for month': 'Rainfall for month (in)',
  'Rainfall for year': 'Rainfall for year (in)'}
df.rename(columns = columns_rename,inplace=True)
df['Average dewpoint (°F)']=df['Average dewpoint (°F)'].apply(lambda x: x.strip('°F'))
df['Average temperature (°F)']=df['Average temperature (°F)'].apply(lambda x: x.strip('°F'))
df['Maximum temperature (°F)']=df['Maximum temperature (°F)'].apply(lambda x: x.strip('°F'))
df['Minimum temperature (°F)']=df['Minimum temperature (°F)'].apply(lambda x: x.strip('°F'))
df['Maximum heat index (°F)']=df['Maximum heat index (°F)'].apply(lambda x: x.strip('°F'))
df['Average direction (°deg)']=df['Average direction (°deg)'].apply(lambda x: x.strip('°'))
df['Average humidity (%)']=df['Average humidity (%)'].apply(lambda x: x.strip('%'))
df['Maximum humidity (%)']=df['Maximum humidity (%)'].apply(lambda x: x.strip('%'))
df['Minimum humidity (%)']=df['Minimum humidity (%)'].apply(lambda x: x.strip('%'))

In [232]:
df = df.select_dtypes('object').astype('float')

In [235]:
df['Average barometer (in)'].dtype

dtype('float64')

In [236]:
df

Unnamed: 0,date,Average temperature (°F),Average humidity (%),Average dewpoint (°F),Average barometer (in),Average windspeed (mph),Average gustspeed (mph),Average direction (°deg),Rainfall for month (in),Rainfall for year (in),Maximum rain per minute,Maximum temperature (°F),Minimum temperature (°F),Maximum humidity (%),Minimum humidity (%),Maximum pressure,Minimum pressure,Maximum windspeed (mph),Maximum gust speed (mph),Maximum heat index (°F)
0,2009011.0,37.8,35.0,12.7,29.7,26.4,36.8,274.0,0.00,0.00,0.00,40.1,34.5,44.0,27.0,29.762,29.596,41.4,59.0,40.1
1,2009012.0,43.2,32.0,14.7,29.5,12.8,18.0,240.0,0.00,0.00,0.00,52.8,37.5,43.0,16.0,29.669,29.268,35.7,51.0,52.8
2,2009013.0,25.7,60.0,12.7,29.7,8.3,12.2,290.0,0.00,0.00,0.00,41.2,6.7,89.0,35.0,30.232,29.260,25.3,38.0,41.2
3,2009014.0,9.3,67.0,0.1,30.4,2.9,4.5,47.0,0.00,0.00,0.00,19.4,-0.0,79.0,35.0,30.566,30.227,12.7,20.0,32.0
4,2009015.0,23.5,30.0,-5.3,29.9,16.7,23.1,265.0,0.00,0.00,0.00,30.3,15.1,56.0,13.0,30.233,29.568,38.0,53.0,32.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26,20181027.0,53.3,37.0,26.6,29.8,10.3,14.8,282.0,0.85,6.86,0.00,64.5,38.3,78.0,24.0,29.935,29.650,28.8,38.0,64.5
27,20181028.0,49.4,48.0,25.2,29.9,5.3,7.6,282.0,0.85,6.86,0.00,67.3,30.9,90.0,19.0,30.040,29.663,18.4,25.3,67.3
28,20181029.0,56.1,21.0,16.0,29.6,8.5,12.4,282.0,0.86,6.87,0.01,66.8,40.3,59.0,13.0,29.731,29.402,26.5,39.1,66.8
29,20181030.0,31.5,87.0,28.2,29.9,1.9,3.1,275.0,0.99,7.00,0.01,40.3,27.8,92.0,60.0,30.121,29.623,9.2,16.1,40.2
