In [1]:
import pandas as pd
import numpy as np
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
from datetime import datetime
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


In [2]:
ari_incidence = pd.read_csv("latest-ARI_incidence.csv",sep=',')
ili_incidence = pd.read_csv("latest-ILI_incidence.csv",sep=',')

In [3]:
ari_incidence.head()

Unnamed: 0,location,truth_date,year_week,value
0,BE,2024-10-13,2024-W41,1384.1
1,BE,2024-10-06,2024-W40,1182.0
2,BE,2024-09-29,2024-W39,1216.6
3,BE,2024-09-22,2024-W38,993.0
4,BE,2024-09-15,2024-W37,927.0


In [4]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [5]:
url = "https://archive-api.open-meteo.com/v1/archive"

In [6]:
#capitals ARI
params_ari = {
    "latitude": [50.8503, 42.6977, 50.0755, 52.52, 59.437, 40.4168, 48.8566, 47.4979, 54.6872, 49.8153, 56.9496, 44.4268, 46.0569],
    "longitude": [4.3517, 23.3219, 14.4378, 13.405, 24.7536, -3.7038, 2.3522, 19.0402, 25.2797, 6.1296, 24.1052, 26.1025, 14.5058],
    "hourly": "relative_humidity_2m",
    "timezone": "auto",
    "start_date": "2014-10-05",
    "end_date": "2024-10-13"
}
country_names_ari = [
    "BE", "BG", "CZ", "DE", "EE",
    "ES", "FR", "HU", "LT", "LU",
    "LV", "RO", "SI"
]



In [7]:
ari_incidence = ari_incidence[ari_incidence['location'].isin(country_names_ari)]
ari_incidence.groupby('location').count()

Unnamed: 0_level_0,truth_date,year_week,value
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BE,524,524,524
BG,514,514,514
CZ,523,523,523
DE,524,524,524
EE,506,506,506
ES,158,158,158
FR,167,167,167
HU,66,66,66
LT,508,508,508
LU,405,405,405


In [9]:

responses = openmeteo.weather_api(url, params=params_ari) 
all_data_ari = []

# Loop through all responses
for country, response in zip(country_names_ari, responses):
    # Check if the response is valid
    if response is None:
        print(f"No data available for {country}.")
        continue

    # Process the hourly data
    hourly = response.Hourly()
    hourly_relative_humidity_2m = hourly.Variables(0).ValuesAsNumpy()

    # Create a DataFrame for the current location
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "relative_humidity_2m": hourly_relative_humidity_2m,
        "country": [country] * len(hourly_relative_humidity_2m)  # Add country column
    }

    all_data_ari.append(pd.DataFrame(data=hourly_data))

data_ari_humidity = pd.concat(all_data_ari, ignore_index=True)


# Optionally save to CSV
data_ari_humidity.to_csv("data_humidity_ari.csv", index=False)


In [10]:
data_ari_humidity.groupby('country').count()

Unnamed: 0_level_0,date,relative_humidity_2m
country,Unnamed: 1_level_1,Unnamed: 2_level_1
BE,87888,87888
BG,87888,87888
CZ,87888,87888
DE,87888,87888
EE,87888,87888
ES,87888,87888
FR,87888,87888
HU,87888,87888
LT,87888,87888
LU,87888,87888


In [11]:
params_ari_temp = {
 	"latitude": [50.8503, 42.6977, 50.0755, 52.52, 59.437, 40.4168, 48.8566, 47.4979, 54.6872, 49.8153, 56.9496, 44.4268, 46.0569],
    "longitude": [4.3517, 23.3219, 14.4378, 13.405, 24.7536, -3.7038, 2.3522, 19.0402, 25.2797, 6.1296, 24.1052, 26.1025, 14.5058],
	"daily": ["temperature_2m_max", "temperature_2m_min"],
	"timezone": "auto",
	"start_date": "2014-10-05",
	"end_date": "2024-10-13"
}

In [12]:

responses = openmeteo.weather_api(url, params=params_ari_temp) 
all_data_temp_ari = []

# Loop through all responses
for country, response in zip(country_names_ari, responses):
    # Check if the response is valid
    if response is None:
        print(f"No data available for {country}.")
        continue

    # Process the daily data
    daily = response.Daily()
    daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
    daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()

    # Create a DataFrame for the current location
    daily_data = {
        "date": pd.date_range(
            start=pd.to_datetime(daily.Time(), unit="s", utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive="left"
        ),
        "temperature_2m_max": daily_temperature_2m_max,
        "temperature_2m_min": daily_temperature_2m_min,
        "country": [country] * len(daily_temperature_2m_max)  # Add country column
    }

    # Convert to DataFrame and append to the list
    all_data_temp_ari.append(pd.DataFrame(data=daily_data))

data_temp_ari = pd.concat(all_data_temp_ari, ignore_index=True)

data_temp_ari.to_csv("data_temp_ari.csv", index=False)


In [13]:
data_temp_ari.groupby('country').count()

Unnamed: 0_level_0,date,temperature_2m_max,temperature_2m_min
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BE,3662,3662,3662
BG,3662,3662,3662
CZ,3662,3662,3662
DE,3662,3662,3662
EE,3662,3662,3662
ES,3662,3662,3662
FR,3662,3662,3662
HU,3662,3662,3662
LT,3662,3662,3662
LU,3662,3662,3662


In [14]:
#params for ili humidity
params_ili = {
    "latitude": [
        47.5162, 50.8503, 50.0755, 55.6761, 59.437,
        48.8566, 37.9838, 45.815, 47.4979, 53.3498,
        54.6872, 49.8153, 56.9496, 35.8997, 52.3676,
        59.9139, 52.2297, 44.4268, 46.0569
    ],
    "longitude": [
        14.5501, 4.3517, 14.4378, 12.5683, 24.7536,
        2.3522, 23.7275, 15.9819, 19.0402, -6.2603,
        25.2797, 6.1296, 24.1052, 14.5146, 4.9041,
        10.7522, 21.0122, 26.1025, 14.5058
    ],
    "hourly": "relative_humidity_2m",
    "timezone": "auto",
    "start_date": "2014-10-05",
    "end_date": "2024-10-13"
}
country_names_ili = [
    "AT","BE","CZ","DK",
    "EE","FR","GR","HR",
    "HU","IE","LT","LU",
    "LV","MT","NL","NO",
    "PL","RO","SI"
]


In [15]:
ili_incidence = ili_incidence[ili_incidence['location'].isin(country_names_ili)]
ili_incidence.groupby('location').count()

Unnamed: 0_level_0,truth_date,year_week,value
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AT,262,262,262
BE,524,524,524
CZ,523,523,523
DK,406,406,406
EE,514,514,514
FR,349,349,349
GR,514,514,514
HR,326,326,326
HU,324,324,324
IE,523,523,523


In [None]:
"""
responses = openmeteo.weather_api(url, params=params_ili) 
all_data_ili = []

for country, response in zip(country_names_ili, responses):
    # Check if the response is valid
    if response is None:
        print(f"No data available for {country}.")
        continue

    # Process the hourly data
    hourly = response.Hourly()
    hourly_relative_humidity_2m = hourly.Variables(0).ValuesAsNumpy()

    # Create a DataFrame for the current location
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "relative_humidity_2m": hourly_relative_humidity_2m,
        "country": [country] * len(hourly_relative_humidity_2m)  # Add country column
    }

    # Convert to DataFrame and append to the list
    all_data_ili.append(pd.DataFrame(data=hourly_data))

data_ili_humidity = pd.concat(all_data_ili, ignore_index=True)

data_ili_humidity.to_csv("data_humidity_ili .csv", index=False)
"""

In [16]:
import time

responses = openmeteo.weather_api(url, params=params_ili) 
all_data_ili = []

for country, response in zip(country_names_ili, responses):
    if response is None:
        print(f"No data available for {country}.")
        continue

    hourly = response.Hourly()
    hourly_relative_humidity_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "relative_humidity_2m": hourly_relative_humidity_2m,
        "country": [country] * len(hourly_relative_humidity_2m)
    }

    all_data_ili.append(pd.DataFrame(data=hourly_data))
    # Wait a bit before the next request to avoid rate limiting
    time.sleep(5)  # adjust the delay as needed

data_ili_humidity = pd.concat(all_data_ili, ignore_index=True)
data_ili_humidity.to_csv("data_humidity_ili.csv", index=False)


In [17]:
data_ili_humidity.groupby('country').count()

Unnamed: 0_level_0,date,relative_humidity_2m
country,Unnamed: 1_level_1,Unnamed: 2_level_1
AT,87888,87888
BE,87888,87888
CZ,87888,87888
DK,87888,87888
EE,87888,87888
FR,87888,87888
GR,87888,87888
HR,87888,87888
HU,87888,87888
IE,87888,87888


In [18]:
#params for temp ili
params_ili_temp = {
    "latitude": [
        47.5162, 50.8503, 50.0755, 55.6761, 59.437,
        48.8566, 37.9838, 45.815, 47.4979, 53.3498,
        54.6872, 49.8153, 56.9496, 35.8997, 52.3676,
        59.9139, 52.2297, 44.4268, 46.0569
    ],
    "longitude": [
        14.5501, 4.3517, 14.4378, 12.5683, 24.7536,
        2.3522, 23.7275, 15.9819, 19.0402, -6.2603,
        25.2797, 6.1296, 24.1052, 14.5146, 4.9041,
        10.7522, 21.0122, 26.1025, 14.5058
    ],
	"daily": ["temperature_2m_max", "temperature_2m_min"],
	"timezone": "auto",
	"start_date": "2014-10-05",
	"end_date": "2024-10-13"
}

In [19]:

responses = openmeteo.weather_api(url, params=params_ili_temp)  
all_data_temp_ili = []

for country, response in zip(country_names_ili, responses):
    # Check if the response is valid
    if response is None:
        print(f"No data available for {country}.")
        continue

    # Process the daily data
    daily = response.Daily()
    daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
    daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()

    # Create a DataFrame for the current location
    daily_data = {
        "date": pd.date_range(
            start=pd.to_datetime(daily.Time(), unit="s", utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive="left"
        ),
        "temperature_2m_max": daily_temperature_2m_max,
        "temperature_2m_min": daily_temperature_2m_min,
        "country": [country] * len(daily_temperature_2m_max)  # Add country column
    }

    # Convert to DataFrame and append to the list
    all_data_temp_ili.append(pd.DataFrame(data=daily_data))

# Combine all DataFrames into one
data_temp_ili = pd.concat(all_data_temp_ili, ignore_index=True)

# Optionally save to CSV
data_temp_ili.to_csv("data_temp_ili.csv", index=False)


In [20]:
data_temp_ili.groupby('country').count()

Unnamed: 0_level_0,date,temperature_2m_max,temperature_2m_min
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AT,3662,3662,3662
BE,3662,3662,3662
CZ,3662,3662,3662
DK,3662,3662,3662
EE,3662,3662,3662
FR,3662,3662,3662
GR,3662,3662,3662
HR,3662,3662,3662
HU,3662,3662,3662
IE,3662,3662,3662


In [None]:
data_ari_humidity.dtypes

In [21]:
data_ari_humidity['Fecha'] = data_ari_humidity['date'].dt.strftime('%Y-%m-%d')
data_ili_humidity['Fecha'] = data_ili_humidity['date'].dt.strftime('%Y-%m-%d')
data_temp_ari['Fecha'] = data_temp_ari['date'].dt.strftime('%Y-%m-%d')
data_temp_ili['Fecha'] = data_temp_ili['date'].dt.strftime('%Y-%m-%d')

In [None]:
data_ari_humidity.head()

In [None]:
data_ari_humidity.columns

In [22]:
#mean by day for humidity
data_ili_hum_by_day = data_ili_humidity[['relative_humidity_2m', 'country', 'Fecha']].groupby(['country', 'Fecha'])['relative_humidity_2m'].mean().reset_index()
data_ari_hum_by_day = data_ari_humidity[['relative_humidity_2m', 'country', 'Fecha']].groupby(['country','Fecha'])['relative_humidity_2m'].mean().reset_index()


In [23]:
data_ari_hum_by_day['Fecha'] = pd.to_datetime(data_ari_hum_by_day['Fecha'])
data_ili_hum_by_day['Fecha'] = pd.to_datetime(data_ili_hum_by_day['Fecha'])

In [24]:
#add variable Week of years
data_ari_hum_by_day['week_of_year'] = data_ari_hum_by_day['Fecha'].dt.isocalendar().week.astype('str').str.zfill(2)

data_ari_hum_by_day['year'] = data_ari_hum_by_day['Fecha'].dt.strftime('%Y')
data_ari_hum_by_day['year_week'] = data_ari_hum_by_day['year'] + '-W'+data_ari_hum_by_day['week_of_year']

data_ili_hum_by_day['week_of_year'] = data_ili_hum_by_day['Fecha'].dt.isocalendar().week.astype('str').str.zfill(2)
data_ili_hum_by_day['year'] = data_ili_hum_by_day['Fecha'].dt.strftime('%Y')
data_ili_hum_by_day['year_week'] = data_ili_hum_by_day['year'] + '-W'+data_ili_hum_by_day['week_of_year']

data_temp_ari['week_of_year'] = pd.to_datetime(data_temp_ari['Fecha']).dt.isocalendar().week.astype('str').str.zfill(2)
data_temp_ari['year'] = pd.to_datetime(data_temp_ari['Fecha']).dt.strftime('%Y')
data_temp_ari['year_week'] = data_temp_ari['year'] + '-W'+data_temp_ari['week_of_year']

data_temp_ili['week_of_year'] = pd.to_datetime(data_temp_ili['Fecha']).dt.isocalendar().week.astype('str').str.zfill(2)
data_temp_ili['year'] = pd.to_datetime(data_temp_ili['Fecha']).dt.strftime('%Y')
data_temp_ili['year_week'] = data_temp_ili['year'] + '-W'+data_temp_ili['week_of_year']


In [None]:
data_ili_hum_by_day.head()

In [None]:
data_temp_ari.to_csv("dat.csv")

In [25]:
# mean by week for humidity
data_ari_hum_by_week = data_ari_hum_by_day[['country', 'Fecha', 'relative_humidity_2m','year_week']].groupby(['country', 'year_week'],as_index=False).agg({'relative_humidity_2m':'mean'})
data_ili_hum_by_week = data_ili_hum_by_day[['country', 'Fecha', 'relative_humidity_2m','year_week']].groupby(['country', 'year_week'],as_index=False).agg({'relative_humidity_2m':'mean'})


In [26]:
#mean by week for temp

data_ari_temp_by_week = data_temp_ari[['temperature_2m_max', 'temperature_2m_min', 'country', 'Fecha','year_week']].groupby(['country', 'year_week'],as_index=False).agg({'temperature_2m_max': 'mean', 'temperature_2m_min': 'mean'})
data_ili_temp_by_week = data_temp_ili[['temperature_2m_max', 'temperature_2m_min', 'country', 'Fecha','year_week']].groupby(['country', 'year_week'],as_index=False).agg({'temperature_2m_max': 'mean', 'temperature_2m_min': 'mean'})


In [27]:
ari_incidence = ari_incidence[ari_incidence['location'].isin(country_names_ari)]
ili_incidence = ili_incidence[ili_incidence['location'].isin(country_names_ili)]

In [28]:
ari_hum = pd.merge(ari_incidence,data_ari_hum_by_week,left_on=['location','year_week'],
                   right_on = ['country','year_week'],how = 'left')
ari_hum = ari_hum.drop(columns = ['country'])

ari = pd.merge(ari_hum,data_ari_temp_by_week,left_on=['location','year_week'],
               right_on = ['country','year_week'],how='left')
ari = ari.drop(columns=['country'])
ari['truth_date'] = pd.to_datetime(ari['truth_date'])
ari.head()

Unnamed: 0,location,truth_date,year_week,value,relative_humidity_2m,temperature_2m_max,temperature_2m_min
0,BE,2024-10-13,2024-W41,1384.1,82.940979,14.331166,9.156167
1,BE,2024-10-06,2024-W40,1182.0,80.857178,15.843072,7.521643
2,BE,2024-09-29,2024-W39,1216.6,80.408249,16.443071,10.043071
3,BE,2024-09-22,2024-W38,993.0,79.679146,22.028788,13.171644
4,BE,2024-09-15,2024-W37,927.0,80.122276,16.550215,8.150214


In [29]:
ili_hum = pd.merge(ili_incidence,data_ili_hum_by_week,left_on=['location','year_week'],
                   right_on = ['country','year_week'],how = 'left')
ili_hum = ili_hum.drop(columns = ['country'])

ili = pd.merge(ili_hum,data_ili_temp_by_week,left_on=['location','year_week'],
               right_on = ['country','year_week'],how='left')
ili = ili.drop(columns=['country'])
ili['truth_date'] = pd.to_datetime(ili['truth_date'])
ili.head()

Unnamed: 0,location,truth_date,year_week,value,relative_humidity_2m,temperature_2m_max,temperature_2m_min
0,AT,2024-10-13,2024-W41,3214.2,82.496323,13.673167,4.914833
1,AT,2024-04-07,2024-W14,1512.6,68.04641,16.792215,3.120786
2,AT,2024-03-31,2024-W13,1629.1,73.683411,11.385071,1.5065
3,AT,2024-03-24,2024-W12,2179.8,80.15226,9.327929,-1.2435
4,AT,2024-03-17,2024-W11,1798.0,83.39875,8.677928,1.085071


In [30]:
ili = ili.sort_values(by='truth_date',ascending=True).reset_index(drop=True)
ari = ari.sort_values(by='truth_date',ascending=True).reset_index(drop=True)


In [31]:
#pandemic from march 2020 to may 2024
ili['covid']=np.where((ili['truth_date']>='2020-03-01') & (ili['truth_date']<='2024-05-31'),1,0)
ari['covid']=np.where((ari['truth_date']>='2020-03-01') & (ari['truth_date']<='2024-05-31'),1,0)

In [32]:
ili.to_csv("data_ili.csv",sep=",")
ari.to_csv("data_ari.csv",sep=",")

In [None]:
name_ari = ari["location"].unique()
name_ili = ili["location"].unique()

In [None]:
for i in name_ari:
    print(f"\n📍 ADF Test for: {i}")
    data = ari[ari['location'] == i]['value'].dropna()

    result = adfuller(data, autolag='AIC')
    test_stat, p_value, lags, n_obs = result[:4]
    crit_values = result[4]

    print(f"Test Statistic      : {test_stat:.4f}")
    print(f"p-value             : {p_value:.4f}")
    print(f"# Lags Used         : {lags}")
    print(f"# Observations Used : {n_obs}")
    
    for key, value in crit_values.items():
        print(f"Critical Value ({key}) : {value:.4f}")

    if p_value < 0.05:
        print("✅ Likely Stationary (reject H0)")
    else:
        print("❌ Likely Non-stationary (fail to reject H0)")


In [None]:
for i in name_ili:
    print(f"\n📍 ADF Test for: {i}")
    data = ili[ili['location'] == i]['value'].dropna()

    result = adfuller(data, autolag='AIC')
    test_stat, p_value, lags, n_obs = result[:4]
    crit_values = result[4]

    print(f"Test Statistic      : {test_stat:.4f}")
    print(f"p-value             : {p_value:.4f}")
    print(f"# Lags Used         : {lags}")
    print(f"# Observations Used : {n_obs}")
    
    for key, value in crit_values.items():
        print(f"Critical Value ({key}) : {value:.4f}")

    if p_value < 0.05:
        print("✅ Likely Stationary (reject H0)")
    else:
        print("❌ Likely Non-stationary (fail to reject H0)")