In [1]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
from datetime import datetime, timedelta, date
import yaml
import requests
import os
import time

In [2]:
# Relative path to the root-level file
config_path = os.path.join("..", "..", "..", "config.yml")
with open(config_path, "r") as file:
    config = yaml.safe_load(file)

In [3]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [4]:
# Latitude and longitude
lat = 39.3999
long = -8.2245

In [5]:
# Start and end date
start_date = "2016-01-01"
end_date = datetime.today().strftime('%Y-%m-%d')

In [6]:
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
params = {
	"latitude": lat,
	"longitude": long,
	"start_date": start_date,
	"end_date": end_date,
	"hourly": ["temperature_2m", "relative_humidity_2m", "precipitation"]
}
responses = openmeteo.weather_api(url, params=params)

In [7]:
response = responses[0]

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["precipitation"] = hourly_precipitation

hourly_dataframe = pd.DataFrame(data = hourly_data)

In [8]:
# Write to .csv
# Write output to data folder
processed_data = config['top_level']+"data/raw/temp_hum_precip_data.csv"
hourly_dataframe.to_csv(processed_data, index=True)

In [29]:
def classify_day_night(row):
    sunrise = row['sunrise']
    sunset = row['sunset']
    timestamp = row['timestamp']
    
    # Classify as 'day' or 'night'
    if sunrise <= timestamp < sunset:
        return 'day'
    else:
        return 'night'

In [32]:
# Read in sunrise/sunset data
raw_data_filepath = config['raw_data']
sunset_sunrise_filepath = raw_data_filepath+"sunset_sunrise.csv"
sunset_sunrise = pd.read_csv(sunset_sunrise_filepath, index_col=None)

In [40]:
# Convert 'sunrise' and 'sunset' to datetime with timezone-awareness (e.g., UTC)
sunset_sunrise['sunrise'] = pd.to_datetime(sunset_sunrise['sunrise'], utc=True)
sunset_sunrise['sunset'] = pd.to_datetime(sunset_sunrise['sunset'], utc=True)
sunset_sunrise['date'] = pd.to_datetime(sunset_sunrise['date']).dt.date

# Create column "timestamp"
hourly_dataframe = hourly_dataframe.copy()
hourly_dataframe.loc[:, 'timestamp'] = pd.to_datetime(hourly_dataframe['date'])

# Convert date column to match sunrise data
hourly_dataframe.loc[:, 'date'] = pd.to_datetime(hourly_dataframe['date']).dt.strftime('%Y-%m-%d')
# clean_weather['date'] = pd.to_datetime(clean_weather['date'])

In [48]:
hourly_dataframe['date'] = hourly_dataframe["date"].dt.tz_convert(None)
sunset_sunrise['date'] = pd.to_datetime(sunset_sunrise['date'])

In [50]:
merged_data = pd.merge(hourly_dataframe, sunset_sunrise, on='date', how='inner')

In [58]:
merged_data['day_night'] = merged_data.apply(classify_day_night, axis=1)
merged_data.columns

Index(['date', 'temperature_2m', 'relative_humidity_2m', 'precipitation',
       'timestamp', 'Unnamed: 0', 'sunrise', 'sunset', 'day_night'],
      dtype='object')

In [69]:
# Get average day and night time temperatures
average_temps_by_date = merged_data.groupby(['date', 'day_night'])['temperature_2m'].mean().unstack()
average_temps_by_date = average_temps_by_date.dropna()

In [68]:
# Get average day and night time temperatures
average_hum_by_date = merged_data.groupby(['date', 'day_night'])['relative_humidity_2m'].mean().unstack()
average_hum_by_date = average_hum_by_date.dropna()

In [65]:
average_precip_by_date = merged_data.groupby(['date', 'day_night'])['precipitation'].mean().unstack()
average_precip_by_date = average_precip_by_date.dropna()

day_night,day,night
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-03-23,0.0,0.000000
2021-03-24,0.0,0.000000
2021-03-25,0.0,0.000000
2021-03-26,0.0,0.000000
2021-03-27,0.0,0.000000
...,...,...
2024-12-10,0.0,0.000000
2024-12-11,0.0,0.000000
2024-12-12,0.0,0.000000
2024-12-13,0.0,0.014286


In [175]:
# # Read in municipality data
# raw_data_filepath = config['raw_data']
# municipality_filepath = raw_data_filepath+"Weather/"+"Municipal Boundaries of Portugal.csv"
# municipalities = pd.read_csv(municipality_filepath, index_col=None)
# municipalities = municipalities.round(2)
# municipalities.tail()

In [79]:
# Create list of coordinates based on the two columns in the municipalities df
# coordinates = [(row['Latitude'], row['Longitude']) for _, row in municipalities.iterrows()]

In [174]:
# # Loop through each of the municipalities and get weather data for each location

# # Setup the Open-Meteo API client with cache and retry on error
# cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
# retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
# openmeteo = openmeteo_requests.Client(session = retry_session)

# # Start and end date
# start_date = "2016-06-25"
# end_date = datetime.today().strftime('%Y-%m-%d')

# # for i in range(len(municipalities)):
# for index, row in municipalities.iterrows():
#     lat = municipalities.loc[i, 'Latitude']
#     long = municipalities.loc[i, 'Longitude']
#     municipality = municipalities.loc[i, 'Name']

#     # Make sure all required weather variables are listed here
#     # The order of variables in hourly or daily is important to assign them correctly below
#     url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
#     params = {
#     	"latitude": lat,
#     	"longitude": long,
#     	"start_date": start_date,
#     	"end_date": end_date,
#     	"hourly": ["temperature_2m", "relative_humidity_2m", "precipitation"]
#     }
#     responses = openmeteo.weather_api(url, params=params)
    
    
#     response = responses[0]
    
#     # Process hourly data. The order of variables needs to be the same as requested.
#     hourly = response.Hourly()
#     hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
#     hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
#     hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
    
#     hourly_data = {"date": pd.date_range(
#     	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
#     	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
#     	freq = pd.Timedelta(seconds = hourly.Interval()),
#     	inclusive = "left"
#     )}
#     hourly_data["temperature_2m"] = hourly_temperature_2m
#     hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
#     hourly_data["precipitation"] = hourly_precipitation
#     hourly_data["latitude"] = lat
#     hourly_data["longitude"] = long
#     hourly_data["municipality"] = municipality
    
    
#     hourly_dataframe = pd.DataFrame(data = hourly_data)

In [172]:
# # Loop through each of the municipalities and get weather data for each location

# # Setup the Open-Meteo API client with cache and retry on error
# cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
# retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
# openmeteo = openmeteo_requests.Client(session = retry_session)

# # Start and end date
# start_date = "2016-01-01"
# end_date = datetime.today().strftime('%Y-%m-%d')
# # Create a list to store all processed DataFrames
# hourly_dataframes = []

# # Iterate through each row in the municipalities DataFrame
# for index, row in municipalities_to_process.iterrows():
#     lat = row['Latitude']  # Extract latitude
#     long = row['Longitude']  # Extract longitude
#     municipality_name = row['Name']  # Extract municipality name (adjust the column name as needed)
#     time.sleep(20)  # Add a 1-second delay between requests

#     # Define API parameters
#     url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
#     params = {
#         "latitude": lat,
#         "longitude": long,
#         "start_date": start_date,
#         "end_date": end_date,
#         "hourly": ["temperature_2m", "relative_humidity_2m", "precipitation"]
#     }

#     try:
#         # Make the API request
#         responses = openmeteo.weather_api(url, params=params)
#         response = responses[0]
        
#         # Process hourly data
#         hourly = response.Hourly()
#         hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
#         hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
#         hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
        
#         # Create a dictionary for the data
#         hourly_data = {
#             "date": pd.date_range(
#                 start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
#                 end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
#                 freq=pd.Timedelta(seconds=hourly.Interval()),
#                 inclusive="left"
#             ),
#             "temperature_2m": hourly_temperature_2m,
#             "relative_humidity_2m": hourly_relative_humidity_2m,
#             "precipitation": hourly_precipitation,
#             "municipality": municipality_name,  # Add municipality name,
#             "latitude": lat,
#             "longitude": long
#         }
        
#         # Convert the dictionary to a DataFrame
#         hourly_dataframe = pd.DataFrame(data=hourly_data)
        
#         # Append the DataFrame to the list
#         hourly_dataframes.append(hourly_dataframe)
    
#     except Exception as e:
#         print(f"Error processing municipality {municipality_name}: {e}")

# # Combine all DataFrames into one
# all_hourly_data = pd.concat(hourly_dataframes, ignore_index=True)

In [173]:
# Combine all DataFrames into one
# all_hourly_data = pd.concat(hourly_dataframes, ignore_index=True)
# updated_hourly_data = pd.concat(hourly_dataframes, ignore_index=True)
# newest_hourly_data = pd.concat(hourly_dataframes, ignore_index=True)

In [160]:
# # Row-bind the datasets
# combined_data = pd.concat([all_hourly_data, newest_hourly_data, updated_hourly_data])

# # Eliminate duplicate rows
# unique_data = combined_data.drop_duplicates()
# # unique_data

In [154]:
# # Write output to data folder
# import zipfile
# partial_municipality_data = config['top_level']+"data/raw/Weather/partial_municipality_data.csv"
# ZipFile.write(partial_municipality_data, arcname=None, compress_type=None)
# unique_data.to_csv(partial_municipality_data, index=True)

In [155]:
# import zipfile
# import io

# partial_municipality_data = config['top_level']+"data/raw/Weather/"

# # Define the zip file and CSV file names
# zip_file_name = partial_municipality_data+"municipality_data.zip"
# csv_file_name = "partial_municipality_data.csv"

# # Create the zip file and write the DataFrame to it
# with zipfile.ZipFile(zip_file_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
#     # Create an in-memory text stream
#     with io.StringIO() as csv_buffer:
#         # Write the DataFrame to the buffer as CSV
#         unique_data.to_csv(csv_buffer, index=False)
        
#         # Write the CSV content to the zip file
#         zipf.writestr(csv_file_name, csv_buffer.getvalue())

In [111]:
# # Create column "timestamp"
# all_hourly_data = all_hourly_data.copy()
# all_hourly_data.loc[:, 'timestamp'] = pd.to_datetime(all_hourly_data['date'])

# # Convert date column to match sunrise data
# all_hourly_data.loc[:, 'date'] = pd.to_datetime(all_hourly_data['date']).dt.strftime('%Y-%m-%d')
# all_hourly_data['date'] = all_hourly_data["date"].dt.tz_convert(None)

In [116]:
# sunset_sunrise['date'] = pd.to_datetime(sunset_sunrise['date'])
# all_hourly_data.head()

Unnamed: 0,date,temperature_2m,relative_humidity_2m,precipitation,municipality,latitude,longitude,timestamp
0,2024-12-25,10.5325,77.0,0.0,Abrantes,39.43,-8.16,2024-12-25 00:00:00+00:00
1,2024-12-25,10.1825,76.0,0.0,Abrantes,39.43,-8.16,2024-12-25 01:00:00+00:00
2,2024-12-25,9.882501,77.0,0.0,Abrantes,39.43,-8.16,2024-12-25 02:00:00+00:00
3,2024-12-25,10.0325,76.0,0.0,Abrantes,39.43,-8.16,2024-12-25 03:00:00+00:00
4,2024-12-25,9.9325,75.0,0.0,Abrantes,39.43,-8.16,2024-12-25 04:00:00+00:00


In [114]:
# merged_data = pd.merge(all_hourly_data, sunset_sunrise, on='date', how='inner')
# merged_data.head()

Unnamed: 0.1,date,temperature_2m,relative_humidity_2m,precipitation,municipality,latitude,longitude,timestamp,Unnamed: 0,sunrise,sunset


In [157]:
# merged_data['day_night'] = merged_data.apply(classify_day_night, axis=1)

In [None]:
# import openmeteo_requests

# import requests_cache
# import pandas as pd
# from retry_requests import retry

# # Setup the Open-Meteo API client with cache and retry on error
# cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
# retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
# openmeteo = openmeteo_requests.Client(session = retry_session)

# # Make sure all required weather variables are listed here
# # The order of variables in hourly or daily is important to assign them correctly below
# url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
# params = {
# 	"latitude": lat,
# 	"longitude": long,
# 	"start_date": "2024-12-12",
# 	"end_date": "2024-12-25",
# 	"daily": ["temperature_2m_max", "temperature_2m_min", "sunrise", "sunset", "daylight_duration", "precipitation_sum"]
# }
# responses = openmeteo.weather_api(url, params=params)

# # Process first location. Add a for-loop for multiple locations or weather models
# response = responses[0]
# print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
# print(f"Elevation {response.Elevation()} m asl")
# print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
# print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# # Process daily data. The order of variables needs to be the same as requested.
# daily = response.Daily()
# daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
# daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
# daily_sunrise = daily.Variables(2).ValuesAsNumpy()
# daily_sunset = daily.Variables(3).ValuesAsNumpy()
# daily_daylight_duration = daily.Variables(4).ValuesAsNumpy()
# daily_precipitation_sum = daily.Variables(5).ValuesAsNumpy()

# daily_data = {"date": pd.date_range(
# 	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
# 	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
# 	freq = pd.Timedelta(seconds = daily.Interval()),
# 	inclusive = "left"
# )}
# daily_data["temperature_2m_max"] = daily_temperature_2m_max
# daily_data["temperature_2m_min"] = daily_temperature_2m_min
# daily_data["sunrise"] = daily_sunrise
# daily_data["sunset"] = daily_sunset
# daily_data["daylight_duration"] = daily_daylight_duration
# daily_data["precipitation_sum"] = daily_precipitation_sum

# daily_dataframe = pd.DataFrame(data = daily_data)

In [169]:
# Loop through each of the municipalities and get weather data for each location

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Start and end date
start_date = "2018-01-01"
end_date = datetime.today().strftime('%Y-%m-%d')
# Create a list to store all processed DataFrames
daily_dataframes = []

# Iterate through each row in the municipalities DataFrame
for index, row in municipalities_to_process.iterrows():
    lat = row['Latitude']  # Extract latitude
    long = row['Longitude']  # Extract longitude
    municipality_name = row['Name']  # Extract municipality name (adjust the column name as needed)
    time.sleep(10)  # Add a 1-second delay between requests

    # Define API parameters
    url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
    params = {
    	"latitude": lat,
    	"longitude": long,
    	"start_date": start_date,
    	"end_date": end_date,
    	"daily": ["temperature_2m_max", "temperature_2m_min", "sunrise", "sunset", "daylight_duration", "precipitation_sum"]
    }

    try:
        # Make the API request
        responses = openmeteo.weather_api(url, params=params)
        response = responses[0]
        
         # Process daily data. The order of variables needs to be the same as requested.
        daily = response.Daily()
        daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
        daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
        daily_sunrise = daily.Variables(2).ValuesAsNumpy()
        daily_sunset = daily.Variables(3).ValuesAsNumpy()
        # daily_daylight_duration = daily.Variables(4).ValuesAsNumpy()
        daily_precipitation_sum = daily.Variables(5).ValuesAsNumpy()
        
        daily_data = {"date": pd.date_range(
        	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
        	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
        	freq = pd.Timedelta(seconds = daily.Interval()),
        	inclusive = "left"
        )}
        daily_data["temperature_2m_max"] = daily_temperature_2m_max
        daily_data["temperature_2m_min"] = daily_temperature_2m_min
        daily_data["sunrise"] = daily_sunrise
        daily_data["sunset"] = daily_sunset
        # daily_data["daylight_duration"] = daily_daylight_duration
        daily_data["precipitation_sum"] = daily_precipitation_sum
        daily_data["municipality"] = municipality_name
        
        daily_dataframe = pd.DataFrame(data = daily_data)
        
        # Append the DataFrame to the list
        daily_dataframes.append(daily_dataframe)

        print(f"Processed municipality {municipality_name}")
    
    except Exception as e:
        print(f"Error processing municipality {municipality_name}: {e}")

# Combine all DataFrames into one
all_daily_data = pd.concat(daily_dataframes, ignore_index=True)

Processed municipality Aveiro
Processed municipality Benavente
Processed municipality Bombarral
Processed municipality Borba
Processed municipality Campo Maior
Processed municipality Cantanhede
Processed municipality Castro Marim
Processed municipality Castro Verde
Processed municipality Celorico da Beira
Processed municipality Celorico de Basto
Processed municipality Chamusca
Processed municipality Chaves
Processed municipality Cinfães
Processed municipality Coimbra
Processed municipality Condeixa-a-Nova
Processed municipality Constância
Processed municipality Coruche
Processed municipality Corvo
Processed municipality Covilhã
Processed municipality Crato
Processed municipality Cuba
Processed municipality Câmara de Lobos
Processed municipality Elvas
Processed municipality Entroncamento
Processed municipality Espinho
Processed municipality Esposende
Processed municipality Estarreja
Processed municipality Estremoz
Processed municipality Fafe
Processed municipality Faro
Processed municip

KeyboardInterrupt: 

In [170]:
# Combine all DataFrames into one
all_daily_data = pd.concat(daily_dataframes, ignore_index=True)

In [171]:
# Write output to data folder
processed_data = config['top_level']+"data/raw/Weather/partial_municipalities_data.csv"
all_daily_data.to_csv(processed_data, index=True)