In [1]:
import aiohttp
import asyncio

import time
from IPython.display import HTML
import pandas
from bs4 import BeautifulSoup
import datetime

In [2]:
async def fetch(session, url):
    async with session.get(url) as response:
        return await response.text()

async def main(url):
    async with aiohttp.ClientSession() as session:
        html = await fetch(session, url)
        return html

loop = asyncio.get_event_loop()

In [3]:
html = loop.create_task(main('https://xappprod.aqmd.gov/aqdetail/AirQuality?AreaNumber=8'))

In [4]:
html.done()

True

In [5]:
tree = BeautifulSoup(html.result(), 'html.parser')

In [6]:
type(tree)

bs4.BeautifulSoup

In [7]:
def parse_station_name(tree):
    div = tree.find('div', attrs={'class': 'p20'})
    station_label = div.find('label')
    if station_label.text == 'Station Name:':
        station_name = station_label.next_sibling
        return station_name.strip()

In [8]:
parse_station_name(tree)

'West San Gabriel Valley'

In [21]:
def parse_report_time(tree):
    print(type(tree))
    div = tree.find('div', attrs={'class': 'p20'})
    time_label = div.find_all('label')[3].text.replace('\xa0', '')
    time_label = time_label.split(': ')[1].lstrip()
    time_label = time_label[:time_label.index('m')+1]
    return datetime.datetime.strptime(time_label,'%m/%d/%Y %I:%M%p')

In [22]:
parse_report_time(tree)

<class 'bs4.BeautifulSoup'>


datetime.datetime(2019, 7, 16, 21, 0)

In [11]:
def parse_aqi(tree):
    header = None
    values = []
    table = tree.find_all('table')[1]
    for row in table.find_all('tr'):
        parsed_row = []
        if header is None:
            for element in row.find_all('th'):
                parsed_row.append(element.text.strip())
            header = parsed_row
        else:
            for element in row.find_all('td'):
                text = element.text.strip()
                if len(text) > 0:
                    parsed_row.append(text)        
            values.append(parsed_row)

    return pandas.DataFrame(values, columns=header)    

parse_aqi(tree)

Unnamed: 0,Parameter,AQI Value,AQI Description,Averaging Period,Parameter Description,Meaning
0,PM2.5,64,Moderate,24-hr,Fine Particulate Matter,"Air quality is acceptable; however, for some p..."
1,PM10,37,Good,24-hr,Particulate Matter,"Air quality is considered satisfactory, and ai..."
2,O3,36,Good,8-hr,Ozone,"Air quality is considered satisfactory, and ai..."
3,NO2,5,Good,24-hr,Nitrogen Dioxide,"Air quality is considered satisfactory, and ai..."
4,CO,2,Good,8-hr,Carbon Monoxide,"Air quality is considered satisfactory, and ai..."


In [12]:
def parse_standards(tree):
    header = None
    values = []
    table = tree.find_all('table')[0]
    for row in table.find_all('tr'):
        parsed_row = []
        for i, element in enumerate(row.find_all('td')):
            parsed_row.append(element.text.strip())

        if header is None:
            header = parsed_row
        else:
            values.append(parsed_row)


    return pandas.DataFrame(values, columns=header)    

parse_standards(tree)

Unnamed: 0,Pollutants,Description,Averaging Period,State Standard,Federal Standard
0,CO,Carbon Monoxide,1 Hr,20 ppm,35 ppm
1,CO,Carbon Monoxide,8 Hr,9 ppm,9 ppm
2,O3,Ozone,1 Hr,0.09 ppm,
3,O3,Ozone,8 Hr,0.07 ppm,0.075 ppm
4,NO2,Nitrogen Dioxide,1 Hr,0.18 ppm,
5,SO2,Sulphur Dioxide,1 Hr,0.25 ppm,
6,SO2,Sulphur Dioxide,24 Hr,0.04 ppm,0.14 ppm
7,PM10,Particulate Matter 10,1 Hr,,
8,PM10,Particulate Matter 10,24 Hr,50 µg/m3,150 µg/m3
9,PM2.5,Fine Particulate Matter,1 Hr,,


In [16]:
https://xappprod.aqmd.gov/aqdetail/AirQuality/MonitoredData

<table border="0" cellpadding="0" cellspacing="10" class="table table-striped table-bordered table-hover border" width="100%">
<tbody><tr><td>
<p><b>Pollutants</b></p></td>
<td>
<p><b>Description</b></p></td>
<td><p><b>Averaging Period</b></p></td>
<td><p><b>State Standard</b></p></td>
<td><p><b>Federal Standard</b></p></td></tr>
<tr><td><p>CO</p></td>
<td><p>Carbon Monoxide</p></td>
<td><p>1 Hr</p></td><td><p>20 ppm</p></td>
<td><p>35 ppm</p></td></tr><tr>
<td><p>CO</p></td>
<td><p>Carbon Monoxide</p></td>
<td><p>8 Hr</p></td>
<td><p>9 ppm</p></td><td><p>9 ppm</p></td></tr>
<tr><td><p>O3</p></td><td><p>Ozone</p></td>
<td><p>1 Hr</p></td><td><p>0.09 ppm</p></td>
<td><p>N/A</p></td></tr><tr><td><p>O3</p></td><td><p>Ozone</p></td>
<td><p>8 Hr</p></td><td><p>0.07 ppm</p></td><td><p>0.075 ppm</p></td></tr>
<tr><td><p>NO2</p></td><td><p>Nitrogen Dioxide</p></td><td><p>1 Hr</p></td>
<td><p>0.18 ppm</p></td><td><p>N/A</p></td></tr><tr><td><p>SO2</p></td>
<td><p>Sulphur Dioxide</p></td><td><p>

In [17]:
forecast = loop.create_task(main('http://www.aqmd.gov/assets/forecast_today.txt'))

In [18]:
text = forecast.result()

In [19]:
def parse_forecast(text):
    columns = ['id', 'name', 'O3-1hr', 'O3-8hr', 'CO', 'PM10', 'PM2.5', 'NO2', 'MAX AQI']
    values = []
    for line in [x.strip() for x in text.split('\r\n')]:
        if len(line) > 0 and line[0].isdigit():
            row = [
                line[0:4].strip(),
                line[4:28].strip(),
            ]
            row.extend(line[28:].split())
            values.append(row)
        elif line.startswith('AIR QUALITY FORECAST'):
            break

    return pandas.DataFrame(values, columns=columns)

In [20]:
parse_forecast(text)

Unnamed: 0,id,name,O3-1hr,O3-8hr,CO,PM10,PM2.5,NO2,MAX AQI
0,1,Central LA Co,64,46,0.3,36,19,19,65
1,2,NW Coastal LA,72,48,0.3,25,25,19,78
2,3,SW Coastal LA,56,45,0.4,14,28,23,85
3,4,S Coastal LA,49,35,0.3,19,13,23,52
4,5,Southeast LA Co,82,57,0.3,29,19,23,65
5,6,W San Fernando Vly,100,73,0.3,29,13,23,108
6,7,E San Fernando Vly,85,62,0.4,31,16,30,73
7,8,W San Gabriel Vly,104,76,0.5,41,11,23,118
8,9-1,E San Gabriel Vly-1,109,78,0.4,45,15,23,124
9,9-2,E San Gabriel Vly-2,109,78,0.4,41,15,23,124


In [None]:
print(text)

In [22]:
max(3, 5, None)

TypeError: '>' not supported between instances of 'NoneType' and 'int'

In [31]:
html = loop.create_task(main('https://xappprod.aqmd.gov/aqdetail/AirQuality/MonitoredData?AreaNumber=8'))

In [32]:
html.done()

True

In [34]:
tree = BeautifulSoup(html.result())

In [37]:
HTML(str(tree.find_all('table')[0]))

0,1,2,3,4
Pollutants,Description,Averaging Period,State Standard,Federal Standard
CO,Carbon Monoxide,1 Hr,20 ppm,35 ppm
CO,Carbon Monoxide,8 Hr,9 ppm,9 ppm
O3,Ozone,1 Hr,0.09 ppm,
O3,Ozone,8 Hr,0.07 ppm,0.075 ppm
NO2,Nitrogen Dioxide,1 Hr,0.18 ppm,
SO2,Sulphur Dioxide,1 Hr,0.25 ppm,
SO2,Sulphur Dioxide,24 Hr,0.04 ppm,0.14 ppm
PM10,Particulate Matter 10,1 Hr,,
PM10,Particulate Matter 10,24 Hr,50 µg/m3,150 µg/m3
