In [158]:
import requests
import json
import pandas as pd
import pyproj
from geopy.distance import geodesic
import csv
from datetime import datetime
from io import StringIO

In [10]:
import jupyter_black
jupyter_black.load()

## Match SMHI weather stations to lakes based on coordinates

In [135]:
# Load weather station data
df_stations = pd.read_csv("data/SMHI_weather_stations.csv", sep=";")

# Only include stations that are active
df_stations = df_stations.loc[df_stations["Aktiv"] == "Ja"]

df_stations.head()

Unnamed: 0,Id,Namn,Latitud,Longitud,Höjd (m),Aktiv
5,188790,Abisko Aut,68.3538,18.8164,392.303,Ja
7,158820,Adak,65.3531,18.5839,408.0,Ja
10,97280,Adelsö A,59.3579,17.5213,5.612,Ja
14,72560,Alingsås D,57.8939,12.5472,72.054,Ja
17,134590,Almdalen,63.9967,14.6701,615.0,Ja


In [131]:
# Inspect shape
df_stations.shape

(539, 6)

In [136]:
# Load lakes dataset
df_lake_chem = pd.read_excel(
    "data/LakeChem 2001-2022 Surface Season cleand.xlsx",
    sheet_name="LakeChem 2001-2022 Surface Seas",
)

df_lake_chem.head()

Unnamed: 0,MD-MVM Id,Nationellt övervakningsstations-ID,Övervakningsstation,Stationskoordinat N/X,Stationskoordinat E/Y,Län,Kommun,MS_CD C3,ProvId,Provdatum,...,Tot-P (µg/l P),Si (mg/l),Fe (µg/l),Al (µg/l),Al_s (µg/l),Syrgashalt (mg/l O2),Siktdjup (m),Siktdjup med kikare (m),Siktdjup utan kikare (m),Vattentemperatur (°C)
0,54,262403.0,Spjutsjön,6722638,524356,Dalarnas län,Falun,WA42559716,22480,2001-03-28,...,8.0,2.27,40.0,,85.0,,5.5,,,0.6
1,54,262403.0,Spjutsjön,6722638,524356,Dalarnas län,Falun,WA42559716,22481,2001-05-21,...,5.0,1.62,43.0,,80.0,,7.6,,,10.2
2,54,262403.0,Spjutsjön,6722638,524356,Dalarnas län,Falun,WA42559716,22482,2001-08-22,...,5.0,1.73,19.0,,45.0,,6.0,,,18.6
3,54,262403.0,Spjutsjön,6722638,524356,Dalarnas län,Falun,WA42559716,22483,2001-10-15,...,8.0,1.72,41.0,,50.0,,7.0,,,10.3
4,54,262403.0,Spjutsjön,6722638,524356,Dalarnas län,Falun,WA42559716,22484,2002-02-26,...,8.0,1.89,36.0,,55.0,,,,,1.5


In [137]:
# Drop all the columns that we don't need from lakes data
df_lake_coord = df_lake_chem[
    [
        "MD-MVM Id",
        "Övervakningsstation",
        "Stationskoordinat N/X",
        "Stationskoordinat E/Y",
    ]
]

df_lake_coord.head()

Unnamed: 0,MD-MVM Id,Övervakningsstation,Stationskoordinat N/X,Stationskoordinat E/Y
0,54,Spjutsjön,6722638,524356
1,54,Spjutsjön,6722638,524356
2,54,Spjutsjön,6722638,524356
3,54,Spjutsjön,6722638,524356
4,54,Spjutsjön,6722638,524356


In [138]:
# Define a function to convert coordinates from SWEREF 99 to standard
# double decimal format
def convert_coordinates_to_dd(df):
    df_copy = df.copy()
    transformer = pyproj.Transformer.from_crs(crs_from="EPSG:3006", crs_to="EPSG:4326")
    
    # Note that the inputs for transform is long, lat
    df_copy["lat_dd"], df_copy["long_dd"] = transformer.transform(
        df_copy["Stationskoordinat N/X"], df_copy["Stationskoordinat E/Y"]
    )
    return df_copy

In [139]:
# Run coordinate transformation
df_lake_coord = convert_coordinates_to_dd(df_lake_coord)

df_lake_coord.head()

Unnamed: 0,MD-MVM Id,Övervakningsstation,Stationskoordinat N/X,Stationskoordinat E/Y,lat_dd,long_dd
0,54,Spjutsjön,6722638,524356,60.638793,15.445276
1,54,Spjutsjön,6722638,524356,60.638793,15.445276
2,54,Spjutsjön,6722638,524356,60.638793,15.445276
3,54,Spjutsjön,6722638,524356,60.638793,15.445276
4,54,Spjutsjön,6722638,524356,60.638793,15.445276


In [141]:
# Function to calculate Haversine distance between two coordinates
def haversine_distance(coord_1, coord_2):
    distance = geodesic(coord_1, coord_2).kilometers
    return round(distance, 1)

In [142]:
# TEMP for testing
# df_stations = df_stations.iloc[:20]

# Iterate over each station in df_lakes_coord and find the closest station in df_stations
for index, row in df_lake_coord.iterrows():
    distances = df_stations.apply(
        lambda x: haversine_distance(
            (row["lat_dd"], row["long_dd"]), (x["Latitud"], x["Longitud"])
        ),
        axis=1,
    )
    min_distance_idx = distances.idxmin()
    closest_station = df_stations.loc[min_distance_idx, "Id"]
    df_lake_coord.at[index, "closest_station_id"] = closest_station.astype(int)
    df_lake_coord.at[index, "closest_station_dist"] = distances[min_distance_idx]

In [143]:
# Cast weather station ids to int
df_lake_coord["closest_station_id"] = df_lake_coord["closest_station_id"].astype(int)

# Join name of weather stations
df_lake_coord = df_lake_coord.join(
    df_stations[["Id", "Namn"]].set_index("Id"), on="closest_station_id"
).rename(columns={"Namn": "closest_station_name"})

df_lake_coord.head()

Unnamed: 0,MD-MVM Id,Övervakningsstation,Stationskoordinat N/X,Stationskoordinat E/Y,lat_dd,long_dd,closest_station_id,closest_station_dist,closest_station_name
0,54,Spjutsjön,6722638,524356,60.638793,15.445276,105470,11.7,Bjursås
1,54,Spjutsjön,6722638,524356,60.638793,15.445276,105470,11.7,Bjursås
2,54,Spjutsjön,6722638,524356,60.638793,15.445276,105470,11.7,Bjursås
3,54,Spjutsjön,6722638,524356,60.638793,15.445276,105470,11.7,Bjursås
4,54,Spjutsjön,6722638,524356,60.638793,15.445276,105470,11.7,Bjursås


In [146]:
# Check deviations between lake and closest station
print(f"Mean: {df_lake_coord['closest_station_dist'].mean()}")
print(f"Min: {df_lake_coord['closest_station_dist'].min()}")
print(f"Max: {df_lake_coord['closest_station_dist'].max()}")

Mean: 13.567673278359706
Min: 1.0
Max: 45.4


In [170]:
# Check individual stations and their matches
df_lake_coord.loc[df_lake_coord["Övervakningsstation"] == "Hällvattnet"]

Unnamed: 0,MD-MVM Id,Övervakningsstation,Stationskoordinat N/X,Stationskoordinat E/Y,lat_dd,long_dd,closest_station_id,closest_station_dist,closest_station_name
609,61,Hällvattnet,7048754,630861,63.542734,17.633053,137350,4.4,Myckelgensjö
610,61,Hällvattnet,7048754,630861,63.542734,17.633053,137350,4.4,Myckelgensjö
611,61,Hällvattnet,7048754,630861,63.542734,17.633053,137350,4.4,Myckelgensjö
612,61,Hällvattnet,7048754,630861,63.542734,17.633053,137350,4.4,Myckelgensjö
613,61,Hällvattnet,7048754,630861,63.542734,17.633053,137350,4.4,Myckelgensjö
...,...,...,...,...,...,...,...,...,...
691,61,Hällvattnet,7048754,630861,63.542734,17.633053,137350,4.4,Myckelgensjö
692,61,Hällvattnet,7048754,630861,63.542734,17.633053,137350,4.4,Myckelgensjö
693,61,Hällvattnet,7048754,630861,63.542734,17.633053,137350,4.4,Myckelgensjö
694,61,Hällvattnet,7048754,630861,63.542734,17.633053,137350,4.4,Myckelgensjö


## Get SMHI data for the matching weather stations

In [152]:
# Get list of all stations
list_of_stations = list(df_lake_coord["closest_station_id"].unique())
print(len(list_of_stations))

93


In [167]:
# Function to fetch data for a given station and parameter
def fetch_data(station_id, parameter):
    # Base URL for SMHI Open Data API
    BASE_URL = "https://opendata-download-metobs.smhi.se/api/version/latest"

    endpoint = f"{BASE_URL}/parameter/{parameter}/station/{station_id}/period/corrected-archive/data.csv"
    response = requests.get(endpoint)

    if response.status_code == 200:
        # Parse the CSV file
        f = StringIO(response.text)
        reader = csv.DictReader(f, delimiter=";")
        return list(reader)
    else:
        print(
            f"Failed to fetch data for station {station_id} and parameter {parameter}: {response.status_code}, {endpoint}"
        )
        return None

In [169]:
# Parameters (types of data) to fetch
# 2 is average daily air temperature, measured at 00.00
# 5 is accumulated daily rainfall, measured at 06.00
PARAMETERS = [2, 5]

# List of station IDs
STATIONS = [105470, 97480]

# Iterate through stations and parameters and fetch data
for station in STATIONS:
    for parameter in PARAMETERS:
        data = fetch_data(station, parameter)
        if data:
            print(
                f"Data for station {station} and parameter {parameter} stored in dataframe"
            )
        else:
            # Handle the case where data could not be fetched
            print(f"No data for station {station} and parameter {parameter}")

Data for station 105370 and parameter 2 stored in dataframe
Data for station 105370 and parameter 5 stored in dataframe
Failed to fetch data for station 97480 and parameter 2: 404, https://opendata-download-metobs.smhi.se/api/version/latest/parameter/2/station/97480/period/corrected-archive/data.csv
No data for station 97480 and parameter 2
Data for station 97480 and parameter 5 stored in dataframe


In [None]:
"https://opendata-download-metobs.smhi.se/api/version/latest/parameter/2/station/105370/period/corrected-archive/data.csv"

In [None]:
"https://opendata-download-metobs.smhi.se/api/version/latest/parameter/2/station/105470/period/corrected-archive/data.csv"