In [85]:
import pandas as pd
import numpy as np
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
from datetime import datetime
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf


In [None]:
ari_incidence = pd.read_csv("latest-ARI_incidence.csv",sep=',')
ili_incidence = pd.read_csv("latest-ILI_incidence.csv",sep=',')

In [None]:
ari_incidence.head()

In [88]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [89]:
url = "https://archive-api.open-meteo.com/v1/archive"

In [90]:
#capitals ARI
params_ari = {
    "latitude": [50.8503, 42.6977, 50.0755, 52.52, 59.437, 40.4168, 48.8566, 47.4979, 54.6872, 49.8153, 56.9496, 44.4268, 46.0569],
    "longitude": [4.3517, 23.3219, 14.4378, 13.405, 24.7536, -3.7038, 2.3522, 19.0402, 25.2797, 6.1296, 24.1052, 26.1025, 14.5058],
    "hourly": "relative_humidity_2m",
    "timezone": "auto",
    "start_date": "2014-10-05",
    "end_date": "2024-10-13"
}
country_names_ari = [
    "BE", "BG", "CZ", "DE", "EE",
    "ES", "FR", "HU", "LT", "LU",
    "LV", "RO", "SI"
]



In [None]:
ari_incidence = ari_incidence[ari_incidence['location'].isin(country_names_ari)]
ari_incidence.groupby('location').count()

In [None]:

responses = openmeteo.weather_api(url, params=params_ari) 
all_data_ari = []

# Loop through all responses
for country, response in zip(country_names_ari, responses):
    # Check if the response is valid
    if response is None:
        print(f"No data available for {country}.")
        continue

    # Process the hourly data
    hourly = response.Hourly()
    hourly_relative_humidity_2m = hourly.Variables(0).ValuesAsNumpy()

    # Create a DataFrame for the current location
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "relative_humidity_2m": hourly_relative_humidity_2m,
        "country": [country] * len(hourly_relative_humidity_2m)  # Add country column
    }

    all_data_ari.append(pd.DataFrame(data=hourly_data))

data_ari_humidity = pd.concat(all_data_ari, ignore_index=True)


# Optionally save to CSV
data_ari_humidity.to_csv("data_humidity_ari .csv", index=False)


In [None]:
data_ari_humidity.groupby('country').count()

In [10]:
params_ari_temp = {
 	"latitude": [50.8503, 42.6977, 50.0755, 52.52, 59.437, 40.4168, 48.8566, 47.4979, 54.6872, 49.8153, 56.9496, 44.4268, 46.0569],
    "longitude": [4.3517, 23.3219, 14.4378, 13.405, 24.7536, -3.7038, 2.3522, 19.0402, 25.2797, 6.1296, 24.1052, 26.1025, 14.5058],
	"daily": ["temperature_2m_max", "temperature_2m_min"],
	"timezone": "auto",
	"start_date": "2014-10-05",
	"end_date": "2024-10-13"
}

In [11]:

responses = openmeteo.weather_api(url, params=params_ari_temp) 
all_data_temp_ari = []

# Loop through all responses
for country, response in zip(country_names_ari, responses):
    # Check if the response is valid
    if response is None:
        print(f"No data available for {country}.")
        continue

    # Process the daily data
    daily = response.Daily()
    daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
    daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()

    # Create a DataFrame for the current location
    daily_data = {
        "date": pd.date_range(
            start=pd.to_datetime(daily.Time(), unit="s", utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive="left"
        ),
        "temperature_2m_max": daily_temperature_2m_max,
        "temperature_2m_min": daily_temperature_2m_min,
        "country": [country] * len(daily_temperature_2m_max)  # Add country column
    }

    # Convert to DataFrame and append to the list
    all_data_temp_ari.append(pd.DataFrame(data=daily_data))

data_temp_ari = pd.concat(all_data_temp_ari, ignore_index=True)

data_temp_ari.to_csv("data_temp_ari.csv", index=False)


In [None]:
data_temp_ari.head()

In [13]:
#params for ili humidity
params_ili = {
    "latitude": [
        47.5162, 50.8503, 50.0755, 55.6761, 59.437,
        48.8566, 37.9838, 45.815, 47.4979, 53.3498,
        54.6872, 49.8153, 56.9496, 35.8997, 52.3676,
        59.9139, 52.2297, 44.4268, 46.0569
    ],
    "longitude": [
        14.5501, 4.3517, 14.4378, 12.5683, 24.7536,
        2.3522, 23.7275, 15.9819, 19.0402, -6.2603,
        25.2797, 6.1296, 24.1052, 14.5146, 4.9041,
        10.7522, 21.0122, 26.1025, 14.5058
    ],
    "hourly": "relative_humidity_2m",
    "timezone": "auto",
    "start_date": "2014-10-05",
    "end_date": "2024-10-13"
}
country_names_ili = [
    "AT","BE","CZ","DK",
    "EE","FR","GR","HR",
    "HU","IE","LT","LU",
    "LV","MT","NL","NO",
    "PL","RO","SI"
]


In [None]:
ili_incidence = ili_incidence[ili_incidence['location'].isin(country_names_ili)]
ili_incidence.groupby('location').count()

In [15]:

responses = openmeteo.weather_api(url, params=params_ili) 
all_data_ili = []

for country, response in zip(country_names_ili, responses):
    # Check if the response is valid
    if response is None:
        print(f"No data available for {country}.")
        continue

    # Process the hourly data
    hourly = response.Hourly()
    hourly_relative_humidity_2m = hourly.Variables(0).ValuesAsNumpy()

    # Create a DataFrame for the current location
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "relative_humidity_2m": hourly_relative_humidity_2m,
        "country": [country] * len(hourly_relative_humidity_2m)  # Add country column
    }

    # Convert to DataFrame and append to the list
    all_data_ili.append(pd.DataFrame(data=hourly_data))

data_ili_humidity = pd.concat(all_data_ili, ignore_index=True)

data_ili_humidity.to_csv("data_humidity_ili .csv", index=False)


In [None]:
data_ili_humidity.head()

In [17]:
#params for temp ili
params_ili_temp = {
    "latitude": [
        47.5162, 50.8503, 50.0755, 55.6761, 59.437,
        48.8566, 37.9838, 45.815, 47.4979, 53.3498,
        54.6872, 49.8153, 56.9496, 35.8997, 52.3676,
        59.9139, 52.2297, 44.4268, 46.0569
    ],
    "longitude": [
        14.5501, 4.3517, 14.4378, 12.5683, 24.7536,
        2.3522, 23.7275, 15.9819, 19.0402, -6.2603,
        25.2797, 6.1296, 24.1052, 14.5146, 4.9041,
        10.7522, 21.0122, 26.1025, 14.5058
    ],
	"daily": ["temperature_2m_max", "temperature_2m_min"],
	"timezone": "auto",
	"start_date": "2014-10-05",
	"end_date": "2024-10-13"
}

In [18]:

responses = openmeteo.weather_api(url, params=params_ili_temp)  
all_data_temp_ili = []

for country, response in zip(country_names_ili, responses):
    # Check if the response is valid
    if response is None:
        print(f"No data available for {country}.")
        continue

    # Process the daily data
    daily = response.Daily()
    daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
    daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()

    # Create a DataFrame for the current location
    daily_data = {
        "date": pd.date_range(
            start=pd.to_datetime(daily.Time(), unit="s", utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive="left"
        ),
        "temperature_2m_max": daily_temperature_2m_max,
        "temperature_2m_min": daily_temperature_2m_min,
        "country": [country] * len(daily_temperature_2m_max)  # Add country column
    }

    # Convert to DataFrame and append to the list
    all_data_temp_ili.append(pd.DataFrame(data=daily_data))

# Combine all DataFrames into one
data_temp_ili = pd.concat(all_data_temp_ili, ignore_index=True)

# Optionally save to CSV
data_temp_ili.to_csv("data_temp_ili.csv", index=False)


In [None]:
data_temp_ili.nunique()

In [None]:
data_ari_humidity.dtypes

In [21]:
data_ari_humidity['Fecha'] = data_ari_humidity['date'].dt.strftime('%Y-%m-%d')
data_ili_humidity['Fecha'] = data_ili_humidity['date'].dt.strftime('%Y-%m-%d')
data_temp_ari['Fecha'] = data_temp_ari['date'].dt.strftime('%Y-%m-%d')
data_temp_ili['Fecha'] = data_temp_ili['date'].dt.strftime('%Y-%m-%d')

In [None]:
data_ari_humidity.head()

In [None]:
data_ari_humidity.columns

In [24]:
#mean by day for humidity
data_ili_hum_by_day = data_ili_humidity[['relative_humidity_2m', 'country', 'Fecha']].groupby(['country', 'Fecha'])['relative_humidity_2m'].mean().reset_index()
data_ari_hum_by_day = data_ari_humidity[['relative_humidity_2m', 'country', 'Fecha']].groupby(['country','Fecha'])['relative_humidity_2m'].mean().reset_index()


In [25]:
data_ari_hum_by_day['Fecha'] = pd.to_datetime(data_ari_hum_by_day['Fecha'])
data_ili_hum_by_day['Fecha'] = pd.to_datetime(data_ili_hum_by_day['Fecha'])

In [None]:
ari_incidence.head()

In [None]:
data_ari_hum_by_day.dtypes

In [None]:
data_temp_ili.dtypes

In [29]:
#add variable Week of years
data_ari_hum_by_day['week_of_year'] = data_ari_hum_by_day['Fecha'].dt.isocalendar().week.astype('str').str.zfill(2)

data_ari_hum_by_day['year'] = data_ari_hum_by_day['Fecha'].dt.strftime('%Y')
data_ari_hum_by_day['year_week'] = data_ari_hum_by_day['year'] + '-W'+data_ari_hum_by_day['week_of_year']

data_ili_hum_by_day['week_of_year'] = data_ili_hum_by_day['Fecha'].dt.isocalendar().week.astype('str').str.zfill(2)
data_ili_hum_by_day['year'] = data_ili_hum_by_day['Fecha'].dt.strftime('%Y')
data_ili_hum_by_day['year_week'] = data_ili_hum_by_day['year'] + '-W'+data_ili_hum_by_day['week_of_year']

data_temp_ari['week_of_year'] = pd.to_datetime(data_temp_ari['Fecha']).dt.isocalendar().week.astype('str').str.zfill(2)
data_temp_ari['year'] = pd.to_datetime(data_temp_ari['Fecha']).dt.strftime('%Y')
data_temp_ari['year_week'] = data_temp_ari['year'] + '-W'+data_temp_ari['week_of_year']

data_temp_ili['week_of_year'] = pd.to_datetime(data_temp_ili['Fecha']).dt.isocalendar().week.astype('str').str.zfill(2)
data_temp_ili['year'] = pd.to_datetime(data_temp_ili['Fecha']).dt.strftime('%Y')
data_temp_ili['year_week'] = data_temp_ili['year'] + '-W'+data_temp_ili['week_of_year']


In [None]:
data_ili_hum_by_day.head()

In [31]:
data_temp_ari.to_csv("dat.csv")

In [None]:
a = data_temp_ari[data_temp_ari['Fecha']>='2015-01-01']
b= a[a['Fecha']<='2016-01-30']
b[b['year_week']=='2015-W01']

In [None]:
data_temp_ari.isna().sum()/data_temp_ari.shape[0]

In [None]:
"""data_temp_ari['iso_year'] = pd.to_datetime(data_temp_ari['Fecha']).dt.isocalendar().year
data_temp_ari['iso_week'] = pd.to_datetime(data_temp_ari['Fecha']).dt.isocalendar().week
data_temp_ari[data_temp_ari['iso_week']==53]"""

In [35]:
# mean by week for humidity
data_ari_hum_by_week = data_ari_hum_by_day[['country', 'Fecha', 'relative_humidity_2m','year_week']].groupby(['country', 'year_week'],as_index=False).agg({'relative_humidity_2m':'mean'})
data_ili_hum_by_week = data_ili_hum_by_day[['country', 'Fecha', 'relative_humidity_2m','year_week']].groupby(['country', 'year_week'],as_index=False).agg({'relative_humidity_2m':'mean'})


In [36]:
#mean by week for temp


data_ari_temp_by_week = data_temp_ari[['temperature_2m_max', 'temperature_2m_min', 'country', 'Fecha','year_week']].groupby(['country', 'year_week'],as_index=False).agg({'temperature_2m_max': 'mean', 'temperature_2m_min': 'mean'})

data_ili_temp_by_week = data_temp_ili[['temperature_2m_max', 'temperature_2m_min', 'country', 'Fecha','year_week']].groupby(['country', 'year_week'],as_index=False).agg({'temperature_2m_max': 'mean', 'temperature_2m_min': 'mean'})


In [None]:
ari_incidence.columns

In [None]:
data_ari_temp_by_week.head()

In [None]:
data_ari_hum_by_week.columns

In [None]:
print(data_ari_hum_by_week.shape)
print(ari_incidence.shape)

In [41]:
ari_incidence = ari_incidence[ari_incidence['location'].isin(country_names_ari)]
ili_incidence = ili_incidence[ili_incidence['location'].isin(country_names_ili)]

In [None]:
ari_hum = pd.merge(ari_incidence,data_ari_hum_by_week,left_on=['location','year_week'],
                   right_on = ['country','year_week'],how = 'left')
ari_hum = ari_hum.drop(columns = ['country'])

ari = pd.merge(ari_hum,data_ari_temp_by_week,left_on=['location','year_week'],
               right_on = ['country','year_week'],how='left')
ari = ari.drop(columns=['country'])
ari['truth_date'] = pd.to_datetime(ari['truth_date'])
ari.head()

In [None]:
ili_hum = pd.merge(ili_incidence,data_ili_hum_by_week,left_on=['location','year_week'],
                   right_on = ['country','year_week'],how = 'left')
ili_hum = ili_hum.drop(columns = ['country'])

ili = pd.merge(ili_hum,data_ili_temp_by_week,left_on=['location','year_week'],
               right_on = ['country','year_week'],how='left')
ili = ili.drop(columns=['country'])
ili['truth_date'] = pd.to_datetime(ili['truth_date'])
ili.head()

In [44]:
ili.to_csv("data_ili.csv",sep=",")
ari.to_csv("data_ari.csv",sep=",")

In [45]:
#pandemic from march 2020 to may 2024
ili['covid']=np.where((ili['truth_date']>='2020-03-01') & (ili['truth_date']<='2024-05-31'),1,0)

In [None]:
ili.columns

In [None]:
import matplotlib.pyplot as plt

# Group the data by country
countries = ili['location'].unique()

# Plot data for each country
plt.figure(figsize=(12, 8))
for country in countries:
    country_data = ili[ili['location'] == country]
    plt.plot(country_data['truth_date'], country_data['value'], label=country)

# Customize the plot
plt.title('Values Over Time by Country')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend(title='Country', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()

# Show the plot
plt.show()


In [57]:
# Group the data by country
countries_ari = ['BE', 'BG', 'CZ', 'EE', 'FR', 'DE', 'HU', 'LT', 'RO',
       'SI', 'ES','LV']
countries_ari2 = ['LU']

In [None]:
# Plot data for each country
plt.figure(figsize=(12, 8))
for country in countries_ari:
    country_data = ari[ari['value']<6000]
    country_data = country_data[country_data['location'] == country]
    plt.plot(country_data['truth_date'], country_data['value'], label=country)

# Customize the plot
plt.title('Values Over Time by Country')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend(title='Country', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
# Plot data for each country
plt.figure(figsize=(12, 8))
for country in countries_ari2:
    country_data = ari[ari['location'] == country]
    plt.plot(country_data['truth_date'], country_data['value'], label=country)

# Customize the plot
plt.title('Values Over Time by Country')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend(title='Country', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
ari[ari['value']>50000]

In [64]:
country_names_ili2 = [
    "AT","BE","CZ","DK",
    "EE","FR","GR","HR",
    "HU","IE","LT","LV",
    "NL","NO","PL","RO",
    "SI"
]
country_ili_2 = ['LU','MT']

In [None]:
# Plot data for each country
plt.figure(figsize=(12, 8))
for country in country_names_ili2:
    country_data = ili[ili['value']<50000]
    country_data = country_data[country_data['location'] == country]
    plt.plot(country_data['truth_date'], country_data['value'], label=country)

# Customize the plot
plt.title('Values Over Time by Country')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend(title='Country', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
# Plot data for each country
plt.figure(figsize=(12, 8))
for country in country_ili_2:
    country_data = ili[ili['location'] == country]
    plt.plot(country_data['truth_date'], country_data['value'], label=country)

# Customize the plot
plt.title('Values Over Time by Country')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend(title='Country', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()

# Show the plot
plt.show()

Separete the data for each country 

In [None]:
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
def adcor(data,country):
    dat = data[data['location']==country]
    adf_test = adfuller(dat['value'])
    # Output the results
    #print('ADF Statistic: %f' % adf_test[0])
    print(f'p-value: %f for {country}' % adf_test[1])
for i in country_names_ili:
    adcor(ili,i)


p-value <<<0.05 this indicates the existence of stationarity. This means that we can use the arima model

In [None]:
for i in country_names_ari:
    adcor(ari,i)

In [None]:
for i in country_names_ari:
    exec(f"data_ari_{i} = ari[ari['location'] == '{i}']")
    print(f"data_ari_{i}")
for i in country_names_ili:
    exec(f"data_ili_{i} = ili[ili['location'] == '{i}']")
    print(f"data_ili_{i}")


p-value <<<0.05 this indicates the existence of stationarity. This means that we can use the arima model


In [None]:
for country in country_names_ari:
    country_data = ari[ari['location'] == country]['value']
    
    # Create a figure for the plots
    plt.figure(figsize=(12, 6))
    plt.suptitle(f"ACF and PACF for {country}", fontsize=16)
    
    # Plot ACF
    plt.subplot(1, 2, 1)
    plot_acf(country_data, lags=40, ax=plt.gca())
    plt.title("ACF")
    
    # Plot PACF
    plt.subplot(1, 2, 2)
    plot_pacf(country_data, lags=40, ax=plt.gca(), method='ywm')
    plt.title("PACF")
    
    # Show the plots for the current country
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()

In [None]:
for country in country_names_ili:
    country_data = ili[ili['location'] == country]['value']
    
    # Create a figure for the plots
    plt.figure(figsize=(12, 6))
    plt.suptitle(f"ACF and PACF for {country}", fontsize=16)
    
    # Plot ACF
    plt.subplot(1, 2, 1)
    plot_acf(country_data, lags=40, ax=plt.gca())
    plt.title("ACF")
    
    # Plot PACF
    plt.subplot(1, 2, 2)
    plot_pacf(country_data, lags=40, ax=plt.gca(), method='ywm')
    plt.title("PACF")
    
    # Show the plots for the current country
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()

In [None]:
plot_acf(ari['value'], lags=40)
plot_pacf(ari['value'], lags=40)
plt.show()

In [None]:
plot_acf(ili['value'], lags=40)
plot_pacf(ili['value'], lags=40)
plt.show()