# Web Scraper Wasserstände KT Zuerich

## Libraries and settings

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

## Web Scraper Hydrological Data

In [2]:
# Send an HTTP request to the URL
url = 'https://hydroproweb.zh.ch/Listen/AktuelleWerte/aktuelle_werte.html'
response = requests.get(url)
html_content = response.content

# Parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')

# Locate the table and extract values
table = soup.find('table')

# Extract table headers
headers = [header.get_text().replace('\xa0', '') for header in table.find_all('th')]

# Extract table rows
rows = []
for row in table.find_all('tr')[1:]:
    cells = row.find_all('td')
    row_data = [cell.get_text().replace('\xa0', '').strip() for cell in cells]
    rows.append(row_data)

# Split the combined "ZeitDatum" column into separate "Zeit" and "Datum" columns
clean_rows = []
for row in rows:
    if len(row) == 9:  # Ensure the row has the correct number of columns
        zeit_datum = row[2]
        zeit, datum = zeit_datum[:5], zeit_datum[5:]
        clean_row = row[:2] + [zeit, datum] + row[3:]
        clean_rows.append(clean_row)

# Define the final columns
columns = ['Gewaesser', 'Einheit', 'Zeit', 'Datum', 'Wert_Aktuell', '24h_vorher', 'Differenz', 'Mittel_24h', 'Maximum_24h', 'Minimum_24h']

# Create DataFrame
df = pd.DataFrame(clean_rows, columns=columns)

# Optionally, save the DataFrame to a CSV file
df.to_csv('hydodata_table.csv', index=False)

# Print the DataFrame
df

Unnamed: 0,Gewaesser,Einheit,Zeit,Datum,Wert_Aktuell,24h_vorher,Differenz,Mittel_24h,Maximum_24h,Minimum_24h
0,Aa-Stegen-Wetzikon,l/s,15:40,10.11.2024,85,95,-9.6,90,95,83
1,Aabach-Käpfnach,l/s,15:40,10.11.2024,47,48,-1.3,47,48,47
2,Aabach-Mönchaltorf,m3/s,15:40,10.11.2024,0.30,0.30,0.005,0.27,0.31,0.22
3,Aabach-Niederuster,m3/s,15:40,10.11.2024,0.61,0.55,0.058,0.59,0.85,0.50
4,Abistbach-Marthalen HW-RB,müM,15:45,10.11.2024,398.88,398.88,0.000,398.87,398.88,398.87
...,...,...,...,...,...,...,...,...,...,...
61,Türlersee,müM,12:15,10.11.2024,644.26,644.27,-0.003,644.26,644.27,644.26
62,Türlersee Abfluss,l/s,12:00,10.11.2024,29,31,-1.7,30,33,28
63,"Wildbach-Grosswies, Wetzikon",müM,15:40,10.11.2024,532.99,533.00,-0.001,533.00,533.00,532.99
64,Wildbach-Wetzikon,m3/s,15:40,10.11.2024,0.13,0.14,-0.007,0.12,0.16,0.09


### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [3]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')

-----------------------------------
POSIX
Linux | 6.5.0-1025-azure
Datetime: 2024-11-10 14:52:30
Python Version: 3.11.10
-----------------------------------
