In [119]:
import pandas as pd
import numpy as np
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
from datetime import datetime



In [120]:
ari_incidence = pd.read_csv("latest-ARI_incidence.csv")
ili_incidence = pd.read_csv("latest-ILI_incidence.csv")

In [121]:
ari_incidence.head()

Unnamed: 0,location,truth_date,year_week,value
0,BE,2024-10-13,2024-W41,1384.1
1,BE,2024-10-06,2024-W40,1182.0
2,BE,2024-09-29,2024-W39,1216.6
3,BE,2024-09-22,2024-W38,993.0
4,BE,2024-09-15,2024-W37,927.0


In [122]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [123]:
url = "https://archive-api.open-meteo.com/v1/archive"

In [124]:
#capitals ARI
params_ari = {
    "latitude": [50.8503, 42.6977, 50.0755, 52.52, 59.437, 40.4168, 48.8566, 47.4979, 54.6872, 49.8153, 56.9496, 44.4268, 46.0569],
    "longitude": [4.3517, 23.3219, 14.4378, 13.405, 24.7536, -3.7038, 2.3522, 19.0402, 25.2797, 6.1296, 24.1052, 26.1025, 14.5058],
    "hourly": "relative_humidity_2m",
    "timezone": "auto",
    "start_date": "2014-10-05",
    "end_date": "2024-10-13"
}
country_names_ari = [
    "BE", "BG", "CZ", "DE", "EE",
    "ES", "FR", "HU", "LT", "LU",
    "LV", "RO", "SI"
]



In [125]:

responses = openmeteo.weather_api(url, params=params_ari) 
all_data_ari = []

# Loop through all responses
for country, response in zip(country_names_ari, responses):
    # Check if the response is valid
    if response is None:
        print(f"No data available for {country}.")
        continue

    # Process the hourly data
    hourly = response.Hourly()
    hourly_relative_humidity_2m = hourly.Variables(0).ValuesAsNumpy()

    # Create a DataFrame for the current location
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "relative_humidity_2m": hourly_relative_humidity_2m,
        "country": [country] * len(hourly_relative_humidity_2m)  # Add country column
    }

    all_data_ari.append(pd.DataFrame(data=hourly_data))

data_ari_humidity = pd.concat(all_data_ari, ignore_index=True)


# Optionally save to CSV
data_ari_humidity.to_csv("data_humidity_ari .csv", index=False)


In [126]:
data_ari_humidity.groupby('country').count()

Unnamed: 0_level_0,date,relative_humidity_2m
country,Unnamed: 1_level_1,Unnamed: 2_level_1
BE,87888,87888
BG,87888,87888
CZ,87888,87888
DE,87888,87888
EE,87888,87888
ES,87888,87888
FR,87888,87888
HU,87888,87888
LT,87888,87888
LU,87888,87888


In [127]:
params_ari_temp = {
 	"latitude": [50.8503, 42.6977, 50.0755, 52.52, 59.437, 40.4168, 48.8566, 47.4979, 54.6872, 49.8153, 56.9496, 44.4268, 46.0569],
    "longitude": [4.3517, 23.3219, 14.4378, 13.405, 24.7536, -3.7038, 2.3522, 19.0402, 25.2797, 6.1296, 24.1052, 26.1025, 14.5058],
	"daily": ["temperature_2m_max", "temperature_2m_min"],
	"timezone": "auto",
	"start_date": "2014-10-05",
	"end_date": "2024-10-13"
}

In [128]:

responses = openmeteo.weather_api(url, params=params_ari_temp) 
all_data_temp_ari = []

# Loop through all responses
for country, response in zip(country_names_ari, responses):
    # Check if the response is valid
    if response is None:
        print(f"No data available for {country}.")
        continue

    # Process the daily data
    daily = response.Daily()
    daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
    daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()

    # Create a DataFrame for the current location
    daily_data = {
        "date": pd.date_range(
            start=pd.to_datetime(daily.Time(), unit="s", utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive="left"
        ),
        "temperature_2m_max": daily_temperature_2m_max,
        "temperature_2m_min": daily_temperature_2m_min,
        "country": [country] * len(daily_temperature_2m_max)  # Add country column
    }

    # Convert to DataFrame and append to the list
    all_data_temp_ari.append(pd.DataFrame(data=daily_data))

data_temp_ari = pd.concat(all_data_temp_ari, ignore_index=True)

data_temp_ari.to_csv("data_temp_ari.csv", index=False)


In [129]:
data_temp_ari.head()

Unnamed: 0,date,temperature_2m_max,temperature_2m_min,country
0,2014-10-04 23:00:00+00:00,13.5865,11.3365,BE
1,2014-10-05 23:00:00+00:00,13.936501,12.186501,BE
2,2014-10-06 23:00:00+00:00,15.236501,9.8365,BE
3,2014-10-07 23:00:00+00:00,16.886499,9.5865,BE
4,2014-10-08 23:00:00+00:00,17.936499,12.8865,BE


In [130]:
#params for ili humidity
params_ili = {
    "latitude": [
        47.5162, 50.8503, 50.0755, 55.6761, 59.437,
        48.8566, 37.9838, 45.815, 47.4979, 53.3498,
        54.6872, 49.8153, 56.9496, 35.8997, 52.3676,
        59.9139, 52.2297, 44.4268, 46.0569
    ],
    "longitude": [
        14.5501, 4.3517, 14.4378, 12.5683, 24.7536,
        2.3522, 23.7275, 15.9819, 19.0402, -6.2603,
        25.2797, 6.1296, 24.1052, 14.5146, 4.9041,
        10.7522, 21.0122, 26.1025, 14.5058
    ],
    "hourly": "relative_humidity_2m",
    "timezone": "auto",
    "start_date": "2014-10-05",
    "end_date": "2024-10-13"
}
country_names_ili = [
    "AT","BE","CZ","DK",
    "EE","FR","GR","HR",
    "HU","IE","LT","LU",
    "LV","MT","NL","NO",
    "PL","RO","SI"
]


In [131]:

responses = openmeteo.weather_api(url, params=params_ili) 
all_data_ili = []

for country, response in zip(country_names_ili, responses):
    # Check if the response is valid
    if response is None:
        print(f"No data available for {country}.")
        continue

    # Process the hourly data
    hourly = response.Hourly()
    hourly_relative_humidity_2m = hourly.Variables(0).ValuesAsNumpy()

    # Create a DataFrame for the current location
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "relative_humidity_2m": hourly_relative_humidity_2m,
        "country": [country] * len(hourly_relative_humidity_2m)  # Add country column
    }

    # Convert to DataFrame and append to the list
    all_data_ili.append(pd.DataFrame(data=hourly_data))

data_ili_humidity = pd.concat(all_data_ili, ignore_index=True)

data_ili_humidity.to_csv("data_humidity_ili .csv", index=False)


In [132]:
data_ili_humidity.head()

Unnamed: 0,date,relative_humidity_2m,country
0,2014-10-04 23:00:00+00:00,94.026505,AT
1,2014-10-05 00:00:00+00:00,95.318687,AT
2,2014-10-05 01:00:00+00:00,96.30085,AT
3,2014-10-05 02:00:00+00:00,96.961899,AT
4,2014-10-05 03:00:00+00:00,97.294327,AT


In [133]:
#params for temp ili
params_ili_temp = {
    "latitude": [
        47.5162, 50.8503, 50.0755, 55.6761, 59.437,
        48.8566, 37.9838, 45.815, 47.4979, 53.3498,
        54.6872, 49.8153, 56.9496, 35.8997, 52.3676,
        59.9139, 52.2297, 44.4268, 46.0569
    ],
    "longitude": [
        14.5501, 4.3517, 14.4378, 12.5683, 24.7536,
        2.3522, 23.7275, 15.9819, 19.0402, -6.2603,
        25.2797, 6.1296, 24.1052, 14.5146, 4.9041,
        10.7522, 21.0122, 26.1025, 14.5058
    ],
	"daily": ["temperature_2m_max", "temperature_2m_min"],
	"timezone": "auto",
	"start_date": "2014-10-05",
	"end_date": "2024-10-13"
}

In [134]:

responses = openmeteo.weather_api(url, params=params_ili_temp)  
all_data_temp_ili = []

for country, response in zip(country_names_ili, responses):
    # Check if the response is valid
    if response is None:
        print(f"No data available for {country}.")
        continue

    # Process the daily data
    daily = response.Daily()
    daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
    daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()

    # Create a DataFrame for the current location
    daily_data = {
        "date": pd.date_range(
            start=pd.to_datetime(daily.Time(), unit="s", utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive="left"
        ),
        "temperature_2m_max": daily_temperature_2m_max,
        "temperature_2m_min": daily_temperature_2m_min,
        "country": [country] * len(daily_temperature_2m_max)  # Add country column
    }

    # Convert to DataFrame and append to the list
    all_data_temp_ili.append(pd.DataFrame(data=daily_data))

# Combine all DataFrames into one
data_temp_ili = pd.concat(all_data_temp_ili, ignore_index=True)

# Optionally save to CSV
data_temp_ili.to_csv("data_temp_ili.csv", index=False)


In [135]:
data_temp_ili.nunique()

date                  10986
temperature_2m_max    18850
temperature_2m_min    16501
country                  19
dtype: int64

In [136]:
data_ari_humidity.dtypes

date                    datetime64[ns, UTC]
relative_humidity_2m                float32
country                              object
dtype: object

In [137]:
data_ari_humidity['Fecha'] = data_ari_humidity['date'].dt.strftime('%Y-%m-%d')
data_ili_humidity['Fecha'] = data_ili_humidity['date'].dt.strftime('%Y-%m-%d')
data_temp_ari['Fecha'] = data_temp_ari['date'].dt.strftime('%Y-%m-%d')
data_temp_ili['Fecha'] = data_temp_ili['date'].dt.strftime('%Y-%m-%d')

In [138]:
data_ari_humidity.head()

Unnamed: 0,date,relative_humidity_2m,country,Fecha
0,2014-10-04 23:00:00+00:00,93.623543,BE,2014-10-04
1,2014-10-05 00:00:00+00:00,93.61628,BE,2014-10-05
2,2014-10-05 01:00:00+00:00,92.990158,BE,2014-10-05
3,2014-10-05 02:00:00+00:00,92.056679,BE,2014-10-05
4,2014-10-05 03:00:00+00:00,90.515244,BE,2014-10-05


In [139]:
data_ari_humidity.columns

Index(['date', 'relative_humidity_2m', 'country', 'Fecha'], dtype='object')

In [140]:
#mean by day for humidity
data_ili_hum_by_day = data_ili_humidity[['relative_humidity_2m', 'country', 'Fecha']].groupby(['country', 'Fecha'])['relative_humidity_2m'].mean().reset_index()
data_ari_hum_by_day = data_ari_humidity[['relative_humidity_2m', 'country', 'Fecha']].groupby(['country','Fecha'])['relative_humidity_2m'].mean().reset_index()


In [141]:
data_ari_hum_by_day['Fecha'] = pd.to_datetime(data_ari_hum_by_day['Fecha'])
data_ili_hum_by_day['Fecha'] = pd.to_datetime(data_ili_hum_by_day['Fecha'])

In [142]:
ari_incidence.head()

Unnamed: 0,location,truth_date,year_week,value
0,BE,2024-10-13,2024-W41,1384.1
1,BE,2024-10-06,2024-W40,1182.0
2,BE,2024-09-29,2024-W39,1216.6
3,BE,2024-09-22,2024-W38,993.0
4,BE,2024-09-15,2024-W37,927.0


In [143]:
data_ari_hum_by_day.dtypes

country                         object
Fecha                   datetime64[ns]
relative_humidity_2m           float32
dtype: object

In [144]:
data_temp_ili.dtypes

date                  datetime64[ns, UTC]
temperature_2m_max                float32
temperature_2m_min                float32
country                            object
Fecha                              object
dtype: object

In [145]:
#add variable Week of years
data_ari_hum_by_day['week_of_year'] = data_ari_hum_by_day['Fecha'].dt.strftime('%W')
data_ari_hum_by_day['year'] = data_ari_hum_by_day['Fecha'].dt.strftime('%Y')
data_ari_hum_by_day['year_week'] = data_ari_hum_by_day['year'] + '-W'+data_ari_hum_by_day['week_of_year']

data_ili_hum_by_day['week_of_year'] = data_ili_hum_by_day['Fecha'].dt.strftime('%W')
data_ili_hum_by_day['year'] = data_ili_hum_by_day['Fecha'].dt.strftime('%Y')
data_ili_hum_by_day['year_week'] = data_ili_hum_by_day['year'] + '-W'+data_ili_hum_by_day['week_of_year']

data_temp_ari['week_of_year'] = pd.to_datetime(data_temp_ari['Fecha']).dt.strftime('%W')
data_temp_ari['year'] = pd.to_datetime(data_temp_ari['Fecha']).dt.strftime('%Y')
data_temp_ari['year_week'] = data_temp_ari['year'] + '-W'+data_temp_ari['week_of_year']

data_temp_ili['week_of_year'] = pd.to_datetime(data_temp_ili['Fecha']).dt.strftime('%W')
data_temp_ili['year'] = pd.to_datetime(data_temp_ili['Fecha']).dt.strftime('%Y')
data_temp_ili['year_week'] = data_temp_ili['year'] + '-W'+data_temp_ili['week_of_year']


In [146]:
data_ili_hum_by_day.head()

Unnamed: 0,country,Fecha,relative_humidity_2m,week_of_year,year,year_week
0,AT,2014-10-04,94.026505,39,2014,2014-W39
1,AT,2014-10-05,87.661407,39,2014,2014-W39
2,AT,2014-10-06,81.162392,40,2014,2014-W40
3,AT,2014-10-07,78.742058,40,2014,2014-W40
4,AT,2014-10-08,85.323982,40,2014,2014-W40


In [147]:
data_temp_ari.head()

Unnamed: 0,date,temperature_2m_max,temperature_2m_min,country,Fecha,week_of_year,year,year_week
0,2014-10-04 23:00:00+00:00,13.5865,11.3365,BE,2014-10-04,39,2014,2014-W39
1,2014-10-05 23:00:00+00:00,13.936501,12.186501,BE,2014-10-05,39,2014,2014-W39
2,2014-10-06 23:00:00+00:00,15.236501,9.8365,BE,2014-10-06,40,2014,2014-W40
3,2014-10-07 23:00:00+00:00,16.886499,9.5865,BE,2014-10-07,40,2014,2014-W40
4,2014-10-08 23:00:00+00:00,17.936499,12.8865,BE,2014-10-08,40,2014,2014-W40


In [148]:
data_temp_ari.columns

Index(['date', 'temperature_2m_max', 'temperature_2m_min', 'country', 'Fecha',
       'week_of_year', 'year', 'year_week'],
      dtype='object')

In [149]:
# mean by week for humidity
data_ari_hum_by_week = data_ari_hum_by_day[['country', 'Fecha', 'relative_humidity_2m','year_week']].groupby(['country', 'year_week'],as_index=False).agg({'relative_humidity_2m':'mean'})
data_ili_hum_by_week = data_ili_hum_by_day[['country', 'Fecha', 'relative_humidity_2m','year_week']].groupby(['country', 'year_week'],as_index=False).agg({'relative_humidity_2m':'mean'})


In [150]:
#mean by week for temp


data_ari_temp_by_week = data_temp_ari[['temperature_2m_max', 'temperature_2m_min', 'country', 'Fecha','year_week']].groupby(['country', 'year_week'],as_index=False).agg({'temperature_2m_max': 'mean', 'temperature_2m_min': 'mean'})

data_ili_temp_by_week = data_temp_ili[['temperature_2m_max', 'temperature_2m_min', 'country', 'Fecha','year_week']].groupby(['country', 'year_week'],as_index=False).agg({'temperature_2m_max': 'mean', 'temperature_2m_min': 'mean'})


In [151]:
ari_incidence.columns

Index(['location', 'truth_date', 'year_week', 'value'], dtype='object')

In [152]:
data_ari_temp_by_week.head()

Unnamed: 0,country,year_week,temperature_2m_max,temperature_2m_min
0,BE,2014-W39,13.7615,11.7615
1,BE,2014-W40,16.800785,10.5865
2,BE,2014-W41,18.193644,12.500786
3,BE,2014-W42,14.386499,9.779358
4,BE,2014-W43,16.029356,10.650786


In [153]:
data_ari_hum_by_week.columns

Index(['country', 'year_week', 'relative_humidity_2m'], dtype='object')

In [154]:
print(data_ari_hum_by_week.shape)
print(ari_incidence.shape)

(6916, 3)
(6685, 4)


In [155]:
ari_hum = pd.merge(ari_incidence,data_ari_hum_by_week,left_on=['location','year_week'],
                   right_on = ['country','year_week'],how = 'left')
ari_hum = ari_hum.drop(columns = ['country'])

ari = pd.merge(ari_hum,data_ari_temp_by_week,left_on=['location','year_week'],
               right_on = ['country','year_week'],how='left')
ari = ari.drop(columns=['country'])
ari.head()

Unnamed: 0,location,truth_date,year_week,value,relative_humidity_2m,temperature_2m_max,temperature_2m_min
0,BE,2024-10-13,2024-W41,1384.1,82.940979,14.331166,9.156167
1,BE,2024-10-06,2024-W40,1182.0,80.857178,15.843072,7.521643
2,BE,2024-09-29,2024-W39,1216.6,80.408249,16.443071,10.043071
3,BE,2024-09-22,2024-W38,993.0,79.679146,22.028788,13.171644
4,BE,2024-09-15,2024-W37,927.0,80.122276,16.550215,8.150214


In [156]:
ili_hum = pd.merge(ili_incidence,data_ili_hum_by_week,left_on=['location','year_week'],
                   right_on = ['country','year_week'],how = 'left')
ili_hum = ili_hum.drop(columns = ['country'])

ili = pd.merge(ili_hum,data_ili_temp_by_week,left_on=['location','year_week'],
               right_on = ['country','year_week'],how='left')
ili = ili.drop(columns=['country'])
ari.head()

Unnamed: 0,location,truth_date,year_week,value,relative_humidity_2m,temperature_2m_max,temperature_2m_min
0,BE,2024-10-13,2024-W41,1384.1,82.940979,14.331166,9.156167
1,BE,2024-10-06,2024-W40,1182.0,80.857178,15.843072,7.521643
2,BE,2024-09-29,2024-W39,1216.6,80.408249,16.443071,10.043071
3,BE,2024-09-22,2024-W38,993.0,79.679146,22.028788,13.171644
4,BE,2024-09-15,2024-W37,927.0,80.122276,16.550215,8.150214


In [157]:
ili.to_csv("data_ili.csv",sep=",")
ari.to_csv("data_ari.csv",sep=",")