In [1]:
import pandas as pd
import datetime as dt
from fmiopendata.wfs import download_stored_query
from tqdm import tqdm

#### download data in month batches with daily data

In [2]:
start_date = dt.date(2009, 1, 1)  # Example start date
end_date = dt.date(2023, 12, 31)

data_list = []  # List to collect data

current_date = start_date
while current_date <= end_date:
    # Set the start time to the beginning of the current month
    start_time = current_date.isoformat() + "T00:00:00Z"
    # Set the end time to the end of the current month
    # Find the last day of the current month
    if current_date.month == 12:
        next_month_first_day = dt.date(current_date.year + 1, 1, 1)
    else:
        next_month_first_day = dt.date(current_date.year, current_date.month + 1, 1)
    last_day_of_month = next_month_first_day - dt.timedelta(days=1)
    end_time = last_day_of_month.isoformat() + "T23:59:59Z"

    print(f"Downloading data from {start_time} to {end_time}")  # Optional: print statement to track progress

    # Download monthly water temperature data using the mareograph daily timevaluepair stored query
    obs = download_stored_query("fmi::observations::mareograph::daily::multipointcoverage",
                                args=["bbox=18,55,35,75",
                                      "starttime=" + start_time,
                                      "endtime=" + end_time])

    # Parse and organize the data
    for timestamp, stations in obs.data.items():
        for station, params in stations.items():
            for param, details in params.items():
                if param == 'Water temperature':
                    data_list.append({
                        'Timestamp': timestamp,
                        'Station': station,
                        param: details['value'],
                        'Unit': details['units']
                    })

    # Move to the first day of the next month for the next iteration
    current_date = next_month_first_day
    

Downloading data from 2009-01-01T00:00:00Z to 2009-01-31T23:59:59Z
Downloading data from 2009-02-01T00:00:00Z to 2009-02-28T23:59:59Z
Downloading data from 2009-03-01T00:00:00Z to 2009-03-31T23:59:59Z
Downloading data from 2009-04-01T00:00:00Z to 2009-04-30T23:59:59Z
Downloading data from 2009-05-01T00:00:00Z to 2009-05-31T23:59:59Z
Downloading data from 2009-06-01T00:00:00Z to 2009-06-30T23:59:59Z
Downloading data from 2009-07-01T00:00:00Z to 2009-07-31T23:59:59Z
Downloading data from 2009-08-01T00:00:00Z to 2009-08-31T23:59:59Z
Downloading data from 2009-09-01T00:00:00Z to 2009-09-30T23:59:59Z
Downloading data from 2009-10-01T00:00:00Z to 2009-10-31T23:59:59Z
Downloading data from 2009-11-01T00:00:00Z to 2009-11-30T23:59:59Z
Downloading data from 2009-12-01T00:00:00Z to 2009-12-31T23:59:59Z
Downloading data from 2010-01-01T00:00:00Z to 2010-01-31T23:59:59Z
Downloading data from 2010-02-01T00:00:00Z to 2010-02-28T23:59:59Z
Downloading data from 2010-03-01T00:00:00Z to 2010-03-31T23:59

Downloading data from 2019-04-01T00:00:00Z to 2019-04-30T23:59:59Z
Downloading data from 2019-05-01T00:00:00Z to 2019-05-31T23:59:59Z
Downloading data from 2019-06-01T00:00:00Z to 2019-06-30T23:59:59Z
Downloading data from 2019-07-01T00:00:00Z to 2019-07-31T23:59:59Z
Downloading data from 2019-08-01T00:00:00Z to 2019-08-31T23:59:59Z
Downloading data from 2019-09-01T00:00:00Z to 2019-09-30T23:59:59Z
Downloading data from 2019-10-01T00:00:00Z to 2019-10-31T23:59:59Z
Downloading data from 2019-11-01T00:00:00Z to 2019-11-30T23:59:59Z
Downloading data from 2019-12-01T00:00:00Z to 2019-12-31T23:59:59Z
Downloading data from 2020-01-01T00:00:00Z to 2020-01-31T23:59:59Z
Downloading data from 2020-02-01T00:00:00Z to 2020-02-29T23:59:59Z
Downloading data from 2020-03-01T00:00:00Z to 2020-03-31T23:59:59Z
Downloading data from 2020-04-01T00:00:00Z to 2020-04-30T23:59:59Z
Downloading data from 2020-05-01T00:00:00Z to 2020-05-31T23:59:59Z
Downloading data from 2020-06-01T00:00:00Z to 2020-06-30T23:59

#### convert data to dataframe

In [3]:
import pandas as pd

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(data_list)

# Convert the 'Timestamp' column to datetime format and set it as the DataFrame index
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df.set_index('Timestamp', inplace=True)

# Pivot the DataFrame to get the desired format
# Assuming 'Water temperature' is the column with the temperature values
pivoted_df = df.pivot(columns='Station', values='Water temperature')

# Display the pivoted DataFrame
pivoted_df


Station,Föglö Degerby,Hamina Pitäjänsaari,Hanko Pikku Kolalahti,Helsinki Kaivopuisto,Kaskinen Ådskär,Kemi Ajos,Oulu Toppila,Pietarsaari Leppäluoto,Pori Mäntyluoto Kallo,Porvoo Emäsalo Vaarlahti,Raahe Lapaluoto,Rauma Petäjäs,Turku Ruissalo Saaronniemi,Vaasa Vaskiluoto
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2009-01-01,1.7,2.2,6.3,,1.1,0.5,0.6,,0.1,,-0.2,0.6,2.4,0.1
2009-01-02,1.5,2.2,6.1,,1.5,0.3,0.5,,0.1,,-0.3,0.5,2.4,0.0
2009-01-03,1.1,2.2,5.8,,1.6,0.4,0.4,,0.2,,-0.2,0.1,3.7,-0.2
2009-01-04,0.8,2.1,5.8,,1.5,0.5,0.5,,0.1,,-0.2,-0.2,4.0,-0.3
2009-01-05,0.4,2.2,5.9,,1.1,0.3,0.6,,0.4,,-0.2,0.0,3.7,-0.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-27,2.0,0.3,1.8,1.5,,-0.1,0.5,0.8,0.7,1.6,1.3,-0.2,0.2,-0.1
2023-12-28,1.2,0.4,2.5,1.6,,0.0,0.6,0.7,-0.1,2.0,1.4,-0.2,0.3,-0.2
2023-12-29,1.4,0.6,1.8,1.2,,0.0,0.6,0.9,0.1,1.6,1.4,-0.2,0.2,-0.2
2023-12-30,2.0,0.7,1.6,0.8,,0.0,0.6,0.7,-0.2,1.4,1.0,-0.2,0.2,-0.2


#### save to file

In [4]:
import os


# Format the data_type string to create a valid and readable filename
filename = f"water_temperatures_data.csv"
# Create the full path by joining the subfolder and filename


# Save the DataFrame to a CSV file at the full path
pivoted_df.to_csv(filename)

print(f"Saved water_temperatures_data data to {filename}")


Saved water_temperatures_data data to water_temperatures_data.csv
