In [None]:
# 👇 colab only
# !pip install -q pytest-playwright
# !playwright install

In [8]:
from playwright.async_api import async_playwright, Page
from contextlib import asynccontextmanager
import pandas as pd

In [2]:
@asynccontextmanager
async def page(url:str, headless=False) -> Page:
    """convenience func to create playwright page within context"""
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        context = await browser.new_context()
        page = await context.new_page()
        await page.goto(url)
        yield page

## Taiwan Reservoir Storage

In [5]:
# has to use playwright async api in jupyter
async with page('https://www.wra.gov.tw') as pg:
    tb = pg.locator('table').filter(has_text='仁義潭水庫')
    headers = await tb.locator('th').all_text_contents()
    rows = [
        await row.locator('td').all_inner_texts() 
        for row in await tb.locator('tbody tr').all()
    ]

In [6]:
headers

['水庫', '有效蓄水量(萬立方公尺)', '水位(公尺)', '蓄水率', '記錄時間']

In [9]:
eng_headers = ["name", "vol", "water-lv", "percent", "time"]
dtypes = dict(zip(eng_headers, [str, float, float, str, str]))
df = (
    pd.DataFrame(rows, columns=eng_headers)
    .astype(dtypes)
    .sort_values("vol", ascending=False, ignore_index=True)
)
df

Unnamed: 0,name,vol,water-lv,percent,time
0,翡翠水庫,17957.88,150.8,53.52%,112-05-19 20:00
1,德基水庫,9119.13,1378.36,48.34%,112-05-19 07:00
2,日月潭水庫,8646.55,742.94,66.70%,112-05-19 07:00
3,石門水庫,6876.78,224.56,33.50%,112-05-19 20:00
4,鯉魚潭水庫,4835.86,280.36,41.85%,112-05-19 20:00
5,烏山頭水庫,3593.0,52.88,45.36%,112-05-19 07:00
6,曾文水庫,3411.0,190.73,6.73%,112-05-19 20:00
7,寶山第二水庫,2177.47,142.98,69.19%,112-05-19 20:00
8,永和山水庫,1832.7,77.43,61.22%,112-05-19 07:00
9,南化水庫,1578.49,160.45,17.64%,112-05-19 07:00
