In [1]:
import pandas as pd
import numpy as np
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry


In [2]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [3]:
url = "https://archive-api.open-meteo.com/v1/archive"

In [4]:
#capitals ARI
params_ari = {
    "latitude": [50.8503, 42.6977, 50.0755, 52.52, 59.437, 40.4168, 48.8566, 47.4979, 54.6872, 49.8153, 56.9496, 44.4268, 46.0569],
    "longitude": [4.3517, 23.3219, 14.4378, 13.405, 24.7536, -3.7038, 2.3522, 19.0402, 25.2797, 6.1296, 24.1052, 26.1025, 14.5058],
    "hourly": "relative_humidity_2m",
    "timezone": "auto",
    "start_date": "2014-10-05",
    "end_date": "2024-10-13"
}
country_names_ari = [
    "BE", "BG", "CZ", "DE", "EE",
    "ES", "FR", "HU", "LT", "LU",
    "LV", "RO", "SI"
]



In [5]:
# Assuming `responses` is the result from `openmeteo.weather_api` for multiple locations
responses = openmeteo.weather_api(url, params=params_ari)  # This should return a list of responses

# Initialize an empty list to store data for all locations
all_data_ari = []

# Loop through all responses
for country, response in zip(country_names_ari, responses):
    # Check if the response is valid
    if response is None:
        print(f"No data available for {country}.")
        continue

    # Process the hourly data
    hourly = response.Hourly()
    hourly_relative_humidity_2m = hourly.Variables(0).ValuesAsNumpy()

    # Create a DataFrame for the current location
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "relative_humidity_2m": hourly_relative_humidity_2m,
        "country": [country] * len(hourly_relative_humidity_2m)  # Add country column
    }

    # Convert to DataFrame and append to the list
    all_data_ari.append(pd.DataFrame(data=hourly_data))

# Combine all DataFrames into one
combined_data_ari_humidity = pd.concat(all_data_ari, ignore_index=True)


# Optionally save to CSV
combined_data_ari_humidity.to_csv("combined_weather_data_ari_humidity.csv", index=False)


In [6]:
combined_data_ari_humidity.groupby('country').count()

Unnamed: 0_level_0,date,relative_humidity_2m
country,Unnamed: 1_level_1,Unnamed: 2_level_1
BE,87888,87888
BG,87888,87888
CZ,87888,87888
DE,87888,87888
EE,87888,87888
ES,87888,87888
FR,87888,87888
HU,87888,87888
LT,87888,87888
LU,87888,87888


In [45]:
params = {
 	"latitude": [50.8503, 42.6977, 50.0755, 52.52, 59.437, 40.4168, 48.8566, 47.4979, 54.6872, 49.8153, 56.9496, 44.4268, 46.0569],
    "longitude": [4.3517, 23.3219, 14.4378, 13.405, 24.7536, -3.7038, 2.3522, 19.0402, 25.2797, 6.1296, 24.1052, 26.1025, 14.5058],
	"daily": ["temperature_2m_max", "temperature_2m_min"],
	"timezone": "auto",
	"start_date": "2014-10-05",
	"end_date": "2024-10-13"
}

In [46]:
# Assuming `responses` is the result from `openmeteo.weather_api` for multiple locations
responses = openmeteo.weather_api(url, params=params)  # This should return a list of responses


# Initialize an empty list to store data for all locations
all_data = []

# Loop through all responses
for country, response in zip(country_names, responses):
    # Check if the response is valid
    if response is None:
        print(f"No data available for {country}.")
        continue

    # Process the daily data
    daily = response.Daily()
    daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
    daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()

    # Create a DataFrame for the current location
    daily_data = {
        "date": pd.date_range(
            start=pd.to_datetime(daily.Time(), unit="s", utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive="left"
        ),
        "temperature_2m_max": daily_temperature_2m_max,
        "temperature_2m_min": daily_temperature_2m_min,
        "country": [country] * len(daily_temperature_2m_max)  # Add country column
    }

    # Convert to DataFrame and append to the list
    all_data.append(pd.DataFrame(data=daily_data))

# Combine all DataFrames into one
combined_data_daily = pd.concat(all_data, ignore_index=True)

# Print or save the combined DataFrame
print(combined_data_daily)

# Optionally save to CSV
combined_data_daily.to_csv("combined_daily_weather_data.csv", index=False)


                           date  temperature_2m_max  temperature_2m_min  \
0     2014-10-04 23:00:00+00:00           13.586500           11.336500   
1     2014-10-05 23:00:00+00:00           13.936501           12.186501   
2     2014-10-06 23:00:00+00:00           15.236501            9.836500   
3     2014-10-07 23:00:00+00:00           16.886499            9.586500   
4     2014-10-08 23:00:00+00:00           17.936499           12.886500   
...                         ...                 ...                 ...   
47601 2024-10-08 23:00:00+00:00           20.231998           14.932000   
47602 2024-10-09 23:00:00+00:00           19.431999           13.182000   
47603 2024-10-10 23:00:00+00:00           18.132000           10.432000   
47604 2024-10-11 23:00:00+00:00           15.382000            7.732000   
47605 2024-10-12 23:00:00+00:00           18.532000            7.632000   

        country  
0       Belgium  
1       Belgium  
2       Belgium  
3       Belgium  
4       B

In [2]:
temp_ari = pd.read_csv("open-meteo-50.86N4.33E26m.csv")
temp_ari_hum = pd.read_csv("open-meteo-50.86N4.33E26m_hum.csv")
temp_ill = pd.read_csv("open-meteo-48.19N16.38E179m_ili.csv")
temp_ill_hum = pd.read_csv("open-meteo-48.19N16.38E179m_hum.csv")                           

  temp_ari_hum = pd.read_csv("open-meteo-50.86N4.33E26m_hum.csv")
  temp_ill_hum = pd.read_csv("open-meteo-48.19N16.38E179m_hum.csv")


In [17]:
def function_get_data(df,n_countries,n_columns):
    data_description = df.iloc[0:n_countries+1,]
    aux_column_names = list(df.iloc[n_countries+1,0:n_columns+1])
    temp_data = df.iloc[n_countries+2:,0:n_columns+1]
    temp_data.columns = aux_column_names
    return data_description,temp_data

description_ari,data_ari = function_get_data(temp_ari,12,3)
description_ari_hum,data_ari_hum = function_get_data(temp_ari_hum,12,2)
description_ill,data_ill = function_get_data(temp_ill,18,3)
description_ill_hum,data_ill_hum = function_get_data(temp_ill_hum,18,2)

In [35]:
data_ari['DateTime'] = pd.to_datetime(data_ari['time'])
data_ari['temperature_2m_max (°C)'] = pd.to_numeric(data_ari['temperature_2m_max (°C)'])
data_ari['temperature_2m_min (°C)'] = pd.to_numeric(data_ari['temperature_2m_min (°C)'])
data_ari_hum['DateTime'] = pd.to_datetime(data_ari_hum['time'])
data_ari_hum['relative_humidity_2m (%)'] = pd.to_numeric(data_ari_hum['relative_humidity_2m (%)'])
data_ill['DateTime'] = pd.to_datetime(data_ill['time'])
data_ill['temperature_2m_max (°C)'] = pd.to_numeric(data_ill['temperature_2m_max (°C)'])
data_ill['temperature_2m_min (°C)'] = pd.to_numeric(data_ari_hum['relative_humidity_2m (%)'])
data_ill_hum['DateTime'] = pd.to_datetime(data_ill_hum['time'])
data_ill_hum['relative_humidity_2m (%)'] = pd.to_numeric(data_ill_hum['relative_humidity_2m (%)'])


In [40]:
data_ill_hum['Date'] = data_ill_hum['DateTime'].dt.strftime('%Y-%m-%d')
data_ari_hum['Date'] = data_ari_hum['DateTime'].dt.strftime('%Y-%m-%d')

In [None]:
data_ill_hum_by_day = data_ill_hum.groupby(['location_id', 'Date'])['relative_humidity_2m (%)'].mean().reset_index()
data_ari_hum_by_day = data_ari_hum.groupby(['location_id','Date'])['relative_humidity_2m (%)'].mean().reset_index()


Unnamed: 0,location_id,Date,relative_humidity_2m (%)
0,1,2019-08-25,49.166667
1,1,2019-08-26,51.583333
2,1,2019-08-27,46.583333
3,1,2019-08-28,45.875000
4,1,2019-08-29,42.375000
...,...,...,...
39946,1,2019-08-21,48.875000
39947,1,2019-08-22,48.958333
39948,1,2019-08-23,49.333333
39949,1,2019-08-24,54.416667


In [57]:
print(data_ill_hum_by_day.sort_values(by=['Date','location_id']).head())
print(data_ill_hum_by_day.tail())
print(data_ill_hum_by_day['Date'].nunique())

      location_id        Date  relative_humidity_2m (%)
1874            2  2014-09-22                 73.333333
5541            3  2014-09-22                 73.958333
9208            4  2014-09-22                 93.041667
12869           5  2014-09-22                 72.958333
16536           6  2014-09-22                 63.750000
      location_id        Date  relative_humidity_2m (%)
69119           1  2019-08-16                 72.625000
69120           1  2019-08-17                 82.625000
69121           1  2019-08-18                 80.083333
69122           1  2019-08-19                 69.958333
69123           1  2019-08-20                 82.916667
3667


In [51]:
print(data_ari_hum_by_day.tail())
print(data_ari.tail())

      location_id        Date  relative_humidity_2m (%)
39946           1  2019-08-21                 48.875000
39947           1  2019-08-22                 48.958333
39948           1  2019-08-23                 49.333333
39949           1  2019-08-24                 54.416667
39950           1  2019-08-25                 51.722222
      location_id        time  temperature_2m_max (°C)  \
39959          12  2024-09-26                     20.8   
39960          12  2024-09-27                     22.1   
39961          12  2024-09-28                     16.7   
39962          12  2024-09-29                     15.7   
39963          12  2024-09-30                     15.7   

       temperature_2m_min (°C)   DateTime  
39959                     13.6 2024-09-26  
39960                     15.9 2024-09-27  
39961                     10.9 2024-09-28  
39962                      6.7 2024-09-29  
39963                      4.7 2024-09-30  


In [12]:
def humudity_data(parametros,country_names):
    responses = openmeteo.weather_api(url, params=parametros)  
    all_data = []

    for country, response in zip(country_names, responses):
        if response is None:
            print(f"No data available for {country}.")
            continue

        hourly = response.Hourly()
        hourly_relative_humidity_2m = hourly.Variables(0).ValuesAsNumpy()

        hourly_data = {
            "date": pd.date_range(
                start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                freq=pd.Timedelta(seconds=hourly.Interval()),
                inclusive="left"
            ),
            "relative_humidity_2m": hourly_relative_humidity_2m,
            "country": [country] * len(hourly_relative_humidity_2m)  
        }

        all_data.append(pd.DataFrame(data=hourly_data))

    combined_data = pd.concat(all_data, ignore_index=True)
    return combined_data

#    combined_data.to_csv("combined_weather_data.csv", index=False)


In [6]:
def temperature_data(parametros,country_names):
    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)
    url = "https://archive-api.open-meteo.com/v1/archive"

    responses = openmeteo.weather_api(url, params=parametros)  
    all_data = []

    # Loop through all responses
    for country, response in zip(country_names, responses):
        # Check if the response is valid
        if response is None:
            print(f"No data available for {country}.")
            continue

        # Process the daily data
        daily = response.Daily()
        daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
        daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()

        # Create a DataFrame for the current location
        daily_data = {
            "date": pd.date_range(
                start=pd.to_datetime(daily.Time(), unit="s", utc=True),
                end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
                freq=pd.Timedelta(seconds=daily.Interval()),
                inclusive="left"
            ),
            "temperature_2m_max": daily_temperature_2m_max,
            "temperature_2m_min": daily_temperature_2m_min,
            "country": [country] * len(daily_temperature_2m_max)  # Add country column
        }

        all_data.append(pd.DataFrame(data=daily_data))

    combined_data_daily = pd.concat(all_data, ignore_index=True)
    return combined_data_daily
    


In [13]:
#ari countries
params_ari = {
 	"latitude": [50.8503, 42.6977, 50.0755, 52.52, 59.437, 40.4168, 48.8566, 47.4979, 54.6872, 49.8153, 56.9496, 44.4268, 46.0569],
    "longitude": [4.3517, 23.3219, 14.4378, 13.405, 24.7536, -3.7038, 2.3522, 19.0402, 25.2797, 6.1296, 24.1052, 26.1025, 14.5058],
	"daily": ["temperature_2m_max", "temperature_2m_min"],
	"timezone": "auto",
	"start_date": "2014-10-05",
	"end_date": "2024-10-13"
}
country_names_ari = [
    "BE", "BG", "CZ", "DE", "EE",
    "ES", "FR", "HU", "LT", "LU",
    "LV", "RO", "SI"
]


In [7]:
temperatura_ari = temperature_data(params_ari,country_names_ari)
temperatura_ari.head()

Unnamed: 0,date,temperature_2m_max,temperature_2m_min,country
0,2014-10-04 23:00:00+00:00,13.5865,11.3365,BE
1,2014-10-05 23:00:00+00:00,13.936501,12.186501,BE
2,2014-10-06 23:00:00+00:00,15.236501,9.8365,BE
3,2014-10-07 23:00:00+00:00,16.886499,9.5865,BE
4,2014-10-08 23:00:00+00:00,17.936499,12.8865,BE


In [14]:
humedad_ari = humudity_data(params_ari,country_names_ari)

AttributeError: 'NoneType' object has no attribute 'Variables'