## 1. Prepare Python environment

In [2]:
# Install custom Python packages
!pip3 install pyeucountrycodes --quiet
!pip3 install calplot --quiet
!pip3 install gradio --quiet


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m


In [3]:
# Load required Python packages
import os, re, requests, logging, calplot
import pandas as pd
import dask.dataframe as dd
import multiprocessing as mp
from dask.multiprocessing import get
from dask.diagnostics import ProgressBar
ProgressBar().register()
from tqdm import tqdm
from itertools import product
from eu_country_codes import COUNTRY_CODES
import gradio as gr
import plotly.express as px

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



## 2. Download data files

In [4]:
# Function to download remote file to the disk
def urlDownload(urlLink, showProgress = False):
  with requests.get(urlLink, stream=True) as r:
    fileSize = int(r.headers.get('Content-Length'))
    fileName = r.headers.get('Content-Disposition').split("filename=")[1]
    if not os.path.exists(fileName) or os.path.getsize(fileName) != fileSize:
      block_size = 1024
      if showProgress:
        print(f"Downloading {fileName}")
        progress_bar = tqdm(total=fileSize, unit='iB', unit_scale=True)
      with open(fileName, 'wb') as file:
        for data in r.iter_content(block_size):
          if showProgress:
            progress_bar.update(len(data))
          file.write(data)
      if showProgress:
        progress_bar.close()
    return fileName

In [5]:
# Download the newest data
urlLocation = 'https://aqicn.org/data-platform/covid19/report/39374-7694ec07/'
csvFile = urlDownload(urlLocation, showProgress=True)
csvFile

Downloading waqi-covid19-airqualitydata-2024.csv


100%|██████████| 105M/105M [00:19<00:00, 5.30MiB/s] 


'waqi-covid19-airqualitydata-2024.csv'

In [6]:
# Create lists of year and quarter names
yNames = [str(i) for i in range(2019, 2024)]
qNames = ["Q" + str(i) for i in range(1, 5)]

# Create a data frame with the url locations and year/quarter combinations
DF = pd.DataFrame(list(product(yNames, qNames)),columns=['yNames', 'qNames'])
DF.insert(loc=0, column='urlLocation', value=urlLocation)

# Combine url location and year/quarter combinations into a single column
DF = pd.DataFrame({'urlLocations': DF.agg(''.join, axis=1)})
DF

Unnamed: 0,urlLocations
0,https://aqicn.org/data-platform/covid19/report...
1,https://aqicn.org/data-platform/covid19/report...
2,https://aqicn.org/data-platform/covid19/report...
3,https://aqicn.org/data-platform/covid19/report...
4,https://aqicn.org/data-platform/covid19/report...
5,https://aqicn.org/data-platform/covid19/report...
6,https://aqicn.org/data-platform/covid19/report...
7,https://aqicn.org/data-platform/covid19/report...
8,https://aqicn.org/data-platform/covid19/report...
9,https://aqicn.org/data-platform/covid19/report...


In [7]:
# Download legacy data (in parallel)
DDF = dd.from_pandas(DF, npartitions=mp.cpu_count())
csvFiles = DDF.apply(lambda x : urlDownload(x[0]), axis=1, meta=pd.Series(dtype="str")).compute(scheduler='threads')

[                                        ] | 0% Completed | 106.14 ms

  csvFiles = DDF.apply(lambda x : urlDownload(x[0]), axis=1, meta=pd.Series(dtype="str")).compute(scheduler='threads')


[########################################] | 100% Completed | 41.52 s


## 3. Load and prepare data

In [8]:
# Define the columns to load
meta_cols = ['Date', 'Country', 'City', 'Specie']
main_column = 'median' # 'count', 'min', 'max', 'median', 'variance'
selected_cols = meta_cols + [main_column]

# Read the newest data file and skip the first 4 lines
DF = pd.read_csv(csvFile, skiprows=4, usecols=selected_cols)

# Leave EU data, rename main column to Value
selectNL = DF['Country'] == 'NL'
newTable = DF[selectNL].rename(columns={main_column: 'Value'})
print(newTable)

               Date Country       City     Specie  Value
1345408  2024-02-17      NL    Utrecht  wind-gust    0.9
1345409  2024-03-08      NL    Utrecht  wind-gust    3.3
1345410  2024-05-28      NL    Utrecht  wind-gust    2.6
1345411  2024-06-30      NL    Utrecht  wind-gust    2.0
1345412  2024-01-27      NL    Utrecht  wind-gust    0.5
...             ...     ...        ...        ...    ...
1383189  2024-09-28      NL  The Hague       pm10   13.0
1383190  2024-10-20      NL  The Hague       pm10   10.0
1383191  2024-11-05      NL  The Hague       pm10   27.0
1383192  2024-11-16      NL  The Hague       pm10   15.0
1383193  2024-11-29      NL  The Hague       pm10   17.0

[37786 rows x 5 columns]


In [9]:
# Read legacy data files (in parallel)
fileNamesQ = [f for f in os.listdir('.') if re.match(r'^.*Q\d.csv$', f)]
DF = dd.compute(dd.read_csv(fileNamesQ, skiprows=4, usecols=selected_cols))[0]
selectNL = DF['Country'] == 'NL'
oldTable = DF[selectNL].rename(columns={main_column: 'Value'})
print(oldTable)

[########################################] | 100% Completed | 1.96 sms
              Date Country       City     Specie  Value
99767   2021-04-23      NL    Utrecht   humidity   64.6
99768   2021-04-25      NL    Utrecht   humidity   60.1
99769   2021-04-26      NL    Utrecht   humidity   60.5
99770   2021-04-30      NL    Utrecht   humidity   86.1
99771   2021-05-12      NL    Utrecht   humidity   76.5
...            ...     ...        ...        ...    ...
488233  2019-01-17      NL  The Hague  wind-gust   14.5
488234  2019-01-26      NL  The Hague  wind-gust   18.0
488235  2019-02-07      NL  The Hague  wind-gust   22.4
488236  2019-02-19      NL  The Hague  wind-gust   14.2
488237  2019-03-23      NL  The Hague  wind-gust    7.7

[188031 rows x 5 columns]


In [10]:
# Append old (2018-2023) and new (2024) data tables, sort, remove duplicates
DF = pd.concat([oldTable, newTable])
dataTableEU = DF.sort_values(by=['Country', 'City', 'Date']).drop_duplicates()
print(dataTableEU)

               Date Country       City       Specie   Value
482829   2018-12-31      NL  Amsterdam          no2    10.1
482923   2018-12-31      NL  Amsterdam   wind-speed     2.7
482957   2018-12-31      NL  Amsterdam    wind-gust     6.5
483094   2018-12-31      NL  Amsterdam  temperature     9.4
483166   2018-12-31      NL  Amsterdam     humidity    91.6
...             ...     ...        ...          ...     ...
1347330  2024-12-11      NL    Utrecht     pressure  1025.4
1347548  2024-12-11      NL    Utrecht   wind-speed     2.0
1347879  2024-12-11      NL    Utrecht     humidity    87.6
1348151  2024-12-11      NL    Utrecht          so2     0.1
1348554  2024-12-11      NL    Utrecht         pm25    45.0

[216106 rows x 5 columns]


In [18]:
dataTableEU.to_csv('output.csv', index=False)

In [155]:
dataTableEU = pd.read_csv('output.csv')

## 4. Check and select vars

In [156]:
# Weather conditions and pollutants (PM10, PM2.5, NO2, Ozone, SO2, CO)

# Calculate the proportion of each Species in the data table
all_vars = 100 * pd.value_counts(dataTableEU.Specie) / len(dataTableEU)

# Drop the variables that are not needed
drop_weat = [] #['pressure', 'wind-speed', 'wind-gust', 'wind speed', 'wind gust', 'dew', 'precipitation'] # AR TIKRAI REIKIA SITUS PASALINT???!!!!
drop_poll = [] #['wd', 'aqi', 'uvi', 'pm1', 'neph', 'mepaqi']
keep_vars = set(all_vars.index) - set(drop_weat + drop_poll)

# Create a new data table with the info on kept variables
new_data_table = pd.DataFrame([all_vars[list(keep_vars)].sort_values(ascending=False)])
new_data_table.style.hide(axis="index")


pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.



no2,pm10,pressure,humidity,temperature,pm25,o3,wind-speed,wind-gust,so2,dew,co,wind gust
10.24613,10.242414,10.143924,10.14346,10.142995,9.786669,9.636612,9.395035,8.758571,5.006225,3.873135,2.245275,0.379555


In [157]:
# Descriptive statistics for daily values of selected variables
selectedVars = ['temperature', 'humidity', 'no2', 'pm10', 'pressure', 'pm25', 'o3', 'wind-speed', 'wind-gust', 'so2', 'dew', 'co', 'wind gust']
#selectedVars = ['no2', 'pm10','pm25', 'o3','so2','co']
selectedIdx = dataTableEU['Specie'].isin(selectedVars)
dataTableEU = dataTableEU[selectedIdx]
dataTableEU.groupby('Specie')['Value'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Specie,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
co,4833.0,2.539292,0.768974,0.7,2.0,2.5,3.0,7.9
dew,8337.0,7.701259,5.118997,-12.0,4.0,8.0,11.8,21.0
humidity,21834.0,79.34193,12.313332,1.0,72.2,82.0,88.7,100.0
no2,22055.0,8.060521,4.348462,0.9,5.0,7.1,10.1,36.9
o3,20743.0,20.292429,8.462526,0.1,15.2,21.0,25.95,64.9
pm10,22047.0,16.262303,6.623625,3.0,12.0,15.0,19.0,64.0
pm25,21066.0,35.89699,18.61716,1.0,22.0,31.0,47.0,158.0
pressure,21835.0,1013.546586,11.6364,858.1,1007.0,1014.5,1021.0,1053.9
so2,10776.0,0.463827,1.107822,0.1,0.2,0.3,0.5,102.1
temperature,21833.0,11.91642,6.006536,-7.0,7.5,11.6,16.5,33.8


In [158]:
# 2021-10-03 Barcelona fix
print(dataTableEU)
dataTableEU = dataTableEU.groupby(['Date', 'Country', 'City', 'Specie'])[['Value']].mean().reset_index()
print(dataTableEU)

              Date Country       City       Specie   Value
0       2018-12-31      NL  Amsterdam           co     2.5
1       2018-12-31      NL  Amsterdam          dew     8.0
2       2018-12-31      NL  Amsterdam     humidity    91.6
3       2018-12-31      NL  Amsterdam          no2    10.1
4       2018-12-31      NL  Amsterdam           o3    12.1
...            ...     ...        ...          ...     ...
215247  2024-12-11      NL    Utrecht     pressure  1025.4
215248  2024-12-11      NL    Utrecht          so2     0.1
215249  2024-12-11      NL    Utrecht  temperature     3.8
215250  2024-12-11      NL    Utrecht    wind-gust     2.6
215251  2024-12-11      NL    Utrecht   wind-speed     2.0

[215252 rows x 5 columns]
              Date Country       City       Specie   Value
0       2018-12-31      NL  Amsterdam           co     2.5
1       2018-12-31      NL  Amsterdam          dew     8.0
2       2018-12-31      NL  Amsterdam     humidity    91.6
3       2018-12-31      NL  A

## 5. Pivot and calculate THI

In [159]:
# Create pivot table, calculate THI for each row, drop rows with missing THI values
dataTableTHI = dataTableEU.pivot_table(index=['Date', 'Country', 'City'], columns='Specie', values='Value').reset_index()
dataTableTHI["THI"] = 0.8 * dataTableTHI.temperature + (dataTableTHI.humidity/100)*(dataTableTHI.temperature-14.4) + 46.4
dataTableTHI = dataTableTHI.dropna(subset=["THI"])
print(dataTableTHI)

Specie        Date Country        City   co  dew  humidity   no2    o3  pm10  \
0       2018-12-31      NL   Amsterdam  2.5  8.0      91.6  10.1  12.1  19.0   
1       2018-12-31      NL       Breda  NaN  7.8      93.1   9.2   9.7  16.0   
2       2018-12-31      NL   Dordrecht  NaN  NaN      97.0   7.8   9.5  15.0   
3       2018-12-31      NL   Eindhoven  NaN  7.7      98.0   8.2   8.3  24.0   
4       2018-12-31      NL   Groningen  NaN  8.0      91.0   6.1  16.5  23.0   
...            ...     ...         ...  ...  ...       ...   ...   ...   ...   
22050   2024-12-11      NL  Maastricht  NaN  2.0      89.0   6.0  11.8  15.0   
22051   2024-12-11      NL    Nijmegen  NaN  NaN      84.0   6.0  15.7  12.0   
22052   2024-12-11      NL   Rotterdam  NaN  1.5      86.0   5.6  17.3  18.0   
22053   2024-12-11      NL   The Hague  NaN  NaN      89.5   4.3  18.5  18.0   
22054   2024-12-11      NL     Utrecht  NaN  NaN      87.6   3.7  19.7  13.0   

Specie  pm25  pressure  so2  temperatur

In [184]:
dataTableAPI = dataTableEU.pivot_table(index=['Date', 'Country', 'City'], 
                                       columns='Specie', 
                                       values='Value').reset_index()

# Apskaičiuojame API kaip MAX tarp teršalų, ignoruodami NaN
dataTableAPI['API'] = dataTableAPI[['pm10', 'pm25', 'no2', 'o3', 'so2', 'co']].max(axis=1, skipna=True)

# Nurodome dominuojantį teršalą, kuris lėmė API
dataTableAPI['Dominant_Specie'] = dataTableAPI[['pm10', 'pm25', 'no2', 'o3', 'so2', 'co']].idxmax(axis=1)

# Pašaliname eilutes, kuriose API yra NaN (jeigu visos reikšmės buvo NaN)
dataTableAPI = dataTableAPI.dropna(subset=['API'])

# Rezultatas
print(dataTableAPI[['Date', 'City', 'API', 'Dominant_Specie']].head())

Specie        Date       City   API Dominant_Specie
0       2018-12-31  Amsterdam  44.0            pm25
1       2018-12-31      Breda  46.0            pm25
2       2018-12-31  Dordrecht  34.0            pm25
3       2018-12-31  Eindhoven  59.0            pm25
4       2018-12-31  Groningen  38.0            pm25


In [171]:
print(dataTableAPI['API'].mean())

36.81244389027431


In [None]:
'''
# Ribinės reikšmės ir API skalė
dataTableAPI = dataTableEU.pivot_table(index=['Date', 'Country', 'City'],
                                       columns='Specie', 
                                       values='Value').reset_index()


thresholds = {
    'pm10': [(0, 50), (50, 100)],   # Ribos ir API reikšmės intervalams
    'pm25': [(0, 15), (15, 35)],
    'no2':  [(0, 40), (40, 100)],
    'so2':  [(0, 20), (20, 75)],
    'o3':   [(0, 100), (100, 200)],
    'co':   [(0, 4), (4, 10)]
}

# Funkcija API skaičiavimui
def calculate_api(value, limits):
    for (low, high), (api_low, api_high) in zip(limits, [(0, 50), (50, 100)]):
        if low <= value <= high:
            return (value - low) / (high - low) * (api_high - api_low) + api_low
    return None  # Grąžina None, jei reikšmė nepatenka į ribas

# Taikome API skaičiavimui kiekvienam teršalui
for Specie, limits in thresholds.items():
    dataTableAPI[f'API_{Specie}'] = dataTableAPI[Specie].apply(lambda x: calculate_api(x, limits))

# Bendras API – didžiausia reikšmė tarp visų teršalų
dataTableAPI['API'] = dataTableAPI[[f'API_{p}' for p in thresholds.keys()]].max(axis=1)

# Pridėti stulpelį, kuris nurodo teršalą (Specie), atsakingą už didžiausią API reikšmę
dataTableAPI['Dominant_Specie'] = dataTableAPI[[f'API_{p}' for p in thresholds.keys()]].idxmax(axis=1).str.replace('API_', '')

# Patikriname galutinę lentelę
print(dataTableAPI[['Date', 'City', 'API', 'Dominant_Specie']].head())
'''


In [203]:
# Pridėti stulpelį su sąlyga
WHO_limit = 50
dataTableAPI['Above_WHO'] = dataTableAPI['API'] > WHO_limit

# Suskaičiuoti, kiek yra 'True' reikšmių (dienos viršijančios ribą)
true_count = dataTableAPI['Above_WHO'].sum()

# Apskaičiuoti vidutinę API reikšmę
mean_api = dataTableAPI['API'].mean()

# Rezultatų išvedimas
print(f"Viršijimų skaičius (Above_WHO = True): {true_count}")
print(f"Vidutinė API reikšmė: {mean_api:.2f}")

Viršijimų skaičius (Above_WHO = True): 4495
Vidutinė API reikšmė: 36.81


## 6. API statistics and plots

In [216]:
import pandas as pd
import matplotlib as mpl
import plotly.express as px
import gradio as gr
import plotly.graph_objects as go
import plotly.express as px

# --- Data Preparation ---
dataTableAPI['Date'] = pd.to_datetime(dataTableAPI['Date'])
dataTableAPI['Year'] = dataTableAPI['Date'].dt.year
dataTableAPI['Month'] = dataTableAPI['Date'].dt.to_period("M").astype(str)
dataTableAPI['Week'] = dataTableAPI['Date'].dt.to_period("W").astype(str)

# Variables for selection
selectedVars = ['temperature', 'humidity', 'no2', 'pm10', 'pressure', 'pm25',
                'o3', 'wind-speed', 'wind-gust', 'so2', 'dew', 'co']

# --- Descriptive Statistics Function ---
def descriptive_stats_all(myYear):
    """Calculate descriptive statistics for API grouped by City."""
    filtered_data = dataTableAPI.copy()
    if myYear:
        filtered_data = filtered_data[filtered_data['Year'].isin(myYear)]
    
    stats = filtered_data.groupby('City')['API'].describe().reset_index()
    stats = stats.rename(columns={
        "count": "Count", "mean": "Mean", "std": "Std Dev",
        "min": "Min", "25%": "25%", "50%": "Median",
        "75%": "75%", "max": "Max"
    })
    return stats

# --- Monthly API Trend Line Chart ---
def monthly_api_trend_all(myYear):
    """Create a line chart showing monthly API trends for all cities."""
    filtered_data = dataTableAPI.copy()
    if myYear:
        filtered_data = filtered_data[filtered_data['Year'].isin(myYear)]
    
    monthly_trend = filtered_data.groupby(['City', 'Month'])['API'].mean().reset_index()
    monthly_trend['Month'] = pd.to_datetime(monthly_trend['Month'])  # Convert to datetime
    
    # Plot line chart with quarterly labels
    fig = px.line(monthly_trend, x='Month', y='API', color='City',
                  title="Monthly API Trend for All Cities",
                  labels={"API": "Average API", "Month": "Month"})
    fig.update_xaxes(
        tickformat="%Y-Q%q",  # Quarterly labels
        dtick="M3",           # Every 3 months
        tickangle=45
    )
    return fig

# --- Line Chart for Selected Variables with Aggregation ---
def line_chart_selected_vars(myVars, myYear, aggregation):
    """Plot line charts for selected variables with aggregation (weekly/monthly)."""
    if not myVars:
        return px.line(title="No variables selected")
    
    filtered_data = dataTableAPI.copy()
    if myYear:
        filtered_data = filtered_data[filtered_data['Year'].isin(myYear)]
    
    # Apply aggregation
    if aggregation == "Weekly":
        filtered_data['Time'] = filtered_data['Date'].dt.to_period("W").apply(lambda x: x.start_time)
    else:  # Monthly
        filtered_data['Time'] = filtered_data['Date'].dt.to_period("M").apply(lambda x: x.start_time)
    
    # Melt the data to long format for plotting
    melted_data = filtered_data.melt(id_vars=['Time'], value_vars=myVars, 
                                     var_name='Variable', value_name='Value')
    
    # Ensure Value column is numeric
    melted_data = melted_data[pd.to_numeric(melted_data['Value'], errors='coerce').notnull()]
    melted_data['Value'] = pd.to_numeric(melted_data['Value'])
    
    # Aggregate by Time and Variable
    aggregated_data = melted_data.groupby(['Time', 'Variable']).mean().reset_index()
    
    # Plot the line chart
    fig = px.line(aggregated_data, x='Time', y='Value', color='Variable',
                  title=f"Line Chart for Selected Variables ({aggregation} Aggregation)")
    fig.update_xaxes(tickformat="%Y-%m" if aggregation == "Monthly" else "%Y-%U", tickangle=45)
    return fig

def weekly_api_plot(cities, years, plot_type):
    """Generate API analysis for days of the week."""
    print("Before Filtering API Mean:", dataTableAPI['API'].mean())

    # Filtruojame duomenis pagal pasirinktus miestus
    if isinstance(cities, list) and cities:  # Patikriname, ar miestai buvo pasirinkti
        filtered_data = dataTableAPI[dataTableAPI['City'].isin(cities)]
    else:
        filtered_data = dataTableAPI.copy()  # Jei miestai nepasirinkti, naudojame visus duomenis
    
    print(f"After Filtering by Cities {cities} API Mean:", filtered_data['API'].mean())

    # Filtruojame pagal metus, jei pasirinkti
    if years:
        filtered_data = filtered_data[filtered_data['Year'].isin(years)]
        print(f"After Filtering by Years {years} API Mean:", filtered_data['API'].mean())

    # Išfiltruojame trūkstamas API reikšmes
    filtered_data = filtered_data.dropna(subset=['API'])
    print("After Dropping NaNs API Mean:", filtered_data['API'].mean())

    # Pridedame savaitės dieną
    filtered_data['DayOfWeek'] = filtered_data['Date'].dt.day_name()

    if plot_type == "calplot":
        # Kalendoriaus grafikas
        pdTimeSeries = pd.Series(filtered_data['API'].values, index=pd.DatetimeIndex(filtered_data['Date']))
        cp = calplot.calplot(pdTimeSeries, dropzero=True, cmap='coolwarm',
                             yearlabel_kws={'color': 'black', 'fontsize': 9})
        return cp[0]
    else:
        # Grupavimas vidurkiui
        day_avg = filtered_data.groupby('DayOfWeek')['API'].mean().reset_index()
        print("Grouped Weekly Averages:\n", day_avg)

        # Užtikriname savaitės dienų tvarką
        day_avg['DayOfWeek'] = pd.Categorical(day_avg['DayOfWeek'], 
                                              categories=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 
                                                          'Friday', 'Saturday', 'Sunday'], ordered=True)
        day_avg = day_avg.sort_values('DayOfWeek')
        
        # Dinaminės spalvų skalės ribos
        overall_mean = day_avg['API'].mean()
        min_val = overall_mean - 10
        max_val = overall_mean + 10
        
        # Heatmap grafikas
        fig = px.imshow(
            day_avg[['API']].T,  
            labels=dict(x="Day of the Week", y="Average API", color="Average API"),
            x=day_avg['DayOfWeek'],
            color_continuous_scale="viridis",
            zmin=min_val,
            zmax=max_val
        )
        
        fig.update_layout(
            title="Weekly API Heatmap",
            xaxis_title="Day of the Week",
            yaxis_title="",
            coloraxis_colorbar=dict(title="Average API")
        )
        return fig

In [192]:
# Papildome WHO Threshold analizę, kad pridėtume miestus iš city_who_summary
city_coords = {
    "Amsterdam": [52.3676, 4.9041],
    "Breda": [51.5719, 4.7683],
    "Dordrecht": [51.8133, 4.6901],
    "Eindhoven": [51.4416, 5.4697],
    "Groningen": [53.2194, 6.5665],
    "Haarlem": [52.3874, 4.6462],
    "Maastricht": [50.8514, 5.6910],
    "Nijmegen": [51.8126, 5.8372],
    "Rotterdam": [51.9225, 4.4792],
    "The Hague": [52.0705, 4.3007],
    "Utrecht": [52.0907, 5.1214]
}

def who_threshold_map(years):
    """Generates a bubble map showing days above WHO threshold for multiple years."""
    # Ensure 'years' is a list
    if not isinstance(years, list):
        years = [years]
    
    # Filter data for the selected years
    filtered_data = dataTableAPI[dataTableAPI['Year'].isin(years)]
    
    # Calculate Days Above WHO and Average API
    days_above = filtered_data[filtered_data['Above_WHO']].groupby('City')['Above_WHO'].sum()
    avg_api = filtered_data.groupby('City')['API'].mean()  # Use all rows for API average

    # Combine into a summary table
    city_summary = pd.concat([days_above, avg_api], axis=1).reset_index()
    city_summary.columns = ['City', 'Days_Above_WHO', 'Average_API']
    
    # Fill missing values for cities without Above_WHO days
    city_summary['Days_Above_WHO'] = city_summary['Days_Above_WHO'].fillna(0)
    
    # Add coordinates from city_coords
    city_summary['Latitude'] = city_summary['City'].apply(lambda x: city_coords.get(x, [None, None])[0])
    city_summary['Longitude'] = city_summary['City'].apply(lambda x: city_coords.get(x, [None, None])[1])
    
    # Filter out cities without coordinates
    city_summary = city_summary.dropna(subset=['Latitude', 'Longitude'])
    
    # Min-Max Scaling for bubble sizes
    min_days = city_summary['Days_Above_WHO'].min()
    max_days = city_summary['Days_Above_WHO'].max()
    city_summary['Scaled_Size'] = city_summary['Days_Above_WHO'].apply(
        lambda x: 10 + 40 * (x - min_days) / (max_days - min_days) if max_days > min_days else 20
    )

    # Create a scatter mapbox with scaled circles
    fig = px.scatter_mapbox(
        city_summary,
        lat='Latitude',
        lon='Longitude',
        size='Scaled_Size',  # Use scaled size
        color='Average_API',
        size_max=50,
        zoom=6,
        mapbox_style="carto-positron",
        title=f"Cities with WHO Threshold Exceedance ({', '.join(map(str, years))})",
        hover_name='City',
        hover_data={'Days_Above_WHO': True, 'Average_API': True, 'Latitude': False, 'Longitude': False}
    )
    
    # Add text annotations for days above WHO threshold
    for i, row in city_summary.iterrows():
        fig.add_trace(
            go.Scattermapbox(
                lat=[row['Latitude']],
                lon=[row['Longitude']],
                mode='text',
                text=[str(row['Days_Above_WHO'])],  # Days Above WHO value
                textfont=dict(size=12, color='black'),
                showlegend=False
            )
        )

    return fig

In [193]:
# --- Function to Identify Dominant Pollutant ---
def dominant_pollutant_summary(years):
    """Apskaičiuoja dažniausiai dominuojantį teršalą kiekvienais metais."""
    # Filtruojame duomenis pagal pasirinktus metus
    filtered_data = dataTableAPI[dataTableAPI['Year'].isin(years)]
    
    # Identifikuojame teršalą, kuris sudaro API (MAX)
    pollutants = ['no2', 'pm10', 'pm25', 'o3', 'so2', 'co']
    filtered_data['Dominant_Pollutant'] = filtered_data[pollutants].idxmax(axis=1).str.upper()

    # Skaičiuojame dažnius kiekvieno teršalo
    dominant_counts = filtered_data['Dominant_Pollutant'].value_counts().reset_index()
    dominant_counts.columns = ['Pollutant', 'Frequency']

    # Pie chart
    fig = px.pie(
        dominant_counts,
        names='Pollutant',
        values='Frequency',
        title="Frequency of Dominant Pollutants Contributing to API",
        color='Pollutant',
        color_discrete_sequence=px.colors.sequential.RdBu
    )
    return fig

def dominant_pollutant_trend(years):
    """Generuoja stacked bar chart, kaip keitėsi dominuojantys teršalai per metus."""
    # Filtruojame duomenis pagal pasirinktus metus
    filtered_data = dataTableAPI[dataTableAPI['Year'].isin(years)]
    
    # Identifikuojame dominuojantį teršalą
    pollutants = ['no2', 'pm10', 'pm25', 'o3', 'so2', 'co']
    filtered_data['Dominant_Pollutant'] = filtered_data[pollutants].idxmax(axis=1).str.upper()

    # Grupavimas pagal metus ir dominuojantį teršalą
    trend_data = filtered_data.groupby(['Year', 'Dominant_Pollutant']).size().reset_index(name='Count')

    # Stacked Bar Chart
    fig = px.bar(
        trend_data,
        x='Year',
        y='Count',
        color='Dominant_Pollutant',
        title="Trend of Dominant Pollutants Over Years",
        labels={'Count': 'Frequency', 'Year': 'Year'},
        barmode='stack',
        color_discrete_sequence=px.colors.sequential.RdBu
    )
    return fig

In [194]:
import scipy.stats as stats
import seaborn as sns
import matplotlib.pyplot as plt

def pearson_correlation_analysis(selected_vars):
    """
    Skaičiuoja Pearson koreliacijos koeficientus tarp API ir pasirinktų meteorologinių kintamųjų.
    """
    # Filtruojame tik reikalingus kintamuosius
    correlation_data = dataTableAPI[['API'] + selected_vars].dropna()

    # Skaičiuojame koreliacijos matricą
    corr_matrix = correlation_data.corr(method='pearson')

    # Atliekame p-value reikšmingumo testą
    p_values = correlation_data.corr(method=lambda x, y: stats.pearsonr(x, y)[1]) - np.eye(len(correlation_data.columns))

    # Vizualizuojame koreliacijos matricą kaip heatmap
    plt.figure(figsize=(10, 8))
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", cbar_kws={'label': 'Correlation Coefficient'})
    plt.title("Pearson Correlation Heatmap")
    plt.tight_layout()
    
    # Gražiname matplotlib objektą kaip išvestį
    return plt, corr_matrix, p_values

In [195]:
import pandas as pd
import numpy as np
import gradio as gr
import plotly.express as px
import plotly.graph_objects as go
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from prophet import Prophet
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt
from scipy.stats import shapiro, normaltest
from plotly.subplots import make_subplots

# --- Data Preparation ---
dataTableAPI['Date'] = pd.to_datetime(dataTableAPI['Date'])
numeric_columns = dataTableAPI.select_dtypes(include=[np.number]).columns.tolist()

weekly_data = (
    dataTableAPI[['Date'] + numeric_columns]  # Paimame tik skaitinius stulpelius ir datą
    .resample('W', on='Date')
    .mean()
    .reset_index()
)
weekly_data[numeric_columns] = weekly_data[numeric_columns].interpolate(method='linear')

# 1. Stationarity Check (ADF Test)
def check_stationarity():
    result = adfuller(weekly_data['API'].dropna())
    output = pd.DataFrame({
        'Test Statistic': [result[0]],
        'P-Value': [result[1]],
        'Critical Values': [result[4]]
    })
    return output

# 2. Distribution Check
def check_distribution():
    fig, ax = plt.subplots()
    ax.hist(weekly_data['API'].dropna(), bins=20, density=True, alpha=0.6, color='gray')
    weekly_data['API'].dropna().plot(kind='kde', color='red', ax=ax)
    ax.set_title("API Distribution Check")
    ax.set_xlabel("API")
    ax.set_ylabel("Density")
    plt.tight_layout()
    return fig

# 3. Time Series Decomposition
def decompose_time_series():
    # Decompose with period=52
    decomposition = seasonal_decompose(weekly_data['API'], model='additive', period=52)
    
    # Extract components and interpolate missing values
    trend = decomposition.trend.interpolate(method='linear')  # Interpolate missing trend values
    seasonal = decomposition.seasonal
    residual = decomposition.resid.interpolate(method='linear')  # Interpolate missing residuals

    # Create subplots for components
    fig = make_subplots(
        rows=3, cols=1, 
        subplot_titles=("Trend", "Seasonal", "Residuals"), 
        shared_xaxes=True
    )
    
    # Trend Component
    fig.add_trace(go.Scatter(x=weekly_data['Date'], y=trend, name="Trend", line=dict(color='blue')), row=1, col=1)
    
    # Seasonal Component
    fig.add_trace(go.Scatter(x=weekly_data['Date'], y=seasonal, name="Seasonal", line=dict(color='red')), row=2, col=1)
    
    # Residuals Component
    fig.add_trace(go.Scatter(x=weekly_data['Date'], y=residual, name="Residuals", line=dict(color='green')), row=3, col=1)
    
    # Update layout
    fig.update_layout(
        title="Time Series Decomposition",
        height=800,
        xaxis_title="Date",
        showlegend=False
    )
    return fig

# 4. ACF and PACF Plots
def plot_acf_pacf():
    fig, axes = plt.subplots(2, 1, figsize=(12, 8))
    plot_acf(weekly_data['API'].dropna(), ax=axes[0], lags=20, title="Autocorrelation Function (ACF)")
    plot_pacf(weekly_data['API'].dropna(), ax=axes[1], lags=20, title="Partial Autocorrelation Function (PACF)")
    plt.tight_layout()
    return fig



In [219]:
import pandas as pd
import numpy as np
import gradio as gr
import plotly.graph_objects as go
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.metrics import mean_squared_error
from math import sqrt

# --- Modeliai Prognozavimui ---
def forecast_models(horizon):
    global forecast_data
    forecast_data = {}
    train = weekly_data.copy()
    
    # Generate future dates
    last_date = train['Date'].iloc[-1]
    future_dates = pd.date_range(start=last_date + pd.Timedelta(weeks=1), periods=horizon, freq='W')
    
    # Naive Forecast
    naive_forecast = np.full(horizon, train['API'].iloc[-1])
    forecast_data['Naive'] = (future_dates, naive_forecast)
    
    # Holt-Winters
    model_hw = ExponentialSmoothing(train['API'], seasonal='add', seasonal_periods=52).fit()
    forecast_hw = model_hw.forecast(horizon)
    forecast_data['Holt-Winter'] = (future_dates, forecast_hw)
    
    # ARIMA
    model_arima = ARIMA(train['API'], order=(2, 1, 2)).fit()
    forecast_arima = model_arima.forecast(horizon)
    forecast_data['ARIMA'] = (future_dates, forecast_arima)
    
    # Prophet
    prophet_df = train.rename(columns={"Date": "ds", "API": "y"})
    model_prophet = Prophet()
    model_prophet.fit(prophet_df)
    future = model_prophet.make_future_dataframe(periods=horizon, freq='W')
    forecast_prophet = model_prophet.predict(future)
    future_forecast = forecast_prophet['yhat'][-horizon:]  # Only future predictions
    forecast_data['Prophet'] = (future_dates, future_forecast)
    
    # Results Table
    return pd.DataFrame({
        'Naive': [sqrt(mean_squared_error(train['API'].iloc[-horizon:], naive_forecast))],
        'Holt-Winter': [sqrt(mean_squared_error(train['API'].iloc[-horizon:], forecast_hw))],
        'ARIMA': [sqrt(mean_squared_error(train['API'].iloc[-horizon:], forecast_arima))],
        'Prophet': [sqrt(mean_squared_error(train['API'].iloc[-horizon:], future_forecast))],
    }, index=["RMSE"])

# --- Grafikai Prognozėms ---
def plot_forecast(model_name, horizon=12):
    train = weekly_data.copy()
    future_dates = pd.date_range(start=train['Date'].iloc[-1] + pd.Timedelta(weeks=1), periods=horizon, freq='W')
    
    # Initialize plot
    fig = go.Figure()

    # Training Data
    fig.add_trace(go.Scatter(
        x=train['Date'], y=train['API'], mode='lines', name='Training Data'
    ))

    # Forecast Data (Future Values)
    if model_name in forecast_data:
        dates, forecast_values = forecast_data[model_name]
        fig.add_trace(go.Scatter(
            x=dates, y=forecast_values, mode='lines', name=f'{model_name} Forecast'
        ))

    fig.update_layout(
        title=f"Forecast Plot for {model_name} - {horizon} Weeks Ahead",
        xaxis_title="Date",
        yaxis_title="API",
        height=500
    )
    return fig

def backtesting_model():
    horizons = [4, 12, 24]
    results = []

    for h in horizons:
        result = forecast_models(h)
        results.append(result.T)

    combined = pd.concat(results, axis=1)
    combined.columns = ['Horizon 4', 'Horizon 12', 'Horizon 24']
    return combined

In [197]:
def compare_models(horizon):
    """Apskaičiuoja RMSE ir MAE kiekvienam modeliui."""
    train = weekly_data[:-horizon]
    test = weekly_data[-horizon:]
    actual = test['API'].reset_index(drop=True)

    models = {}
    
    # Naive Forecast
    naive_forecast = np.full(horizon, train['API'].iloc[-1])
    models['Naive'] = {
        'RMSE': mean_squared_error(actual, naive_forecast, squared=False),
        'MAE': mean_absolute_error(actual, naive_forecast)
    }

    # Holt-Winters
    model_hw = ExponentialSmoothing(train['API'], seasonal='add', seasonal_periods=52).fit()
    forecast_hw = model_hw.forecast(horizon)
    models['Holt-Winter'] = {
        'RMSE': mean_squared_error(actual, forecast_hw, squared=False),
        'MAE': mean_absolute_error(actual, forecast_hw)
    }

    # ARIMA
    model_arima = ARIMA(train['API'], order=(2,1,2)).fit()
    forecast_arima = model_arima.forecast(horizon)
    models['ARIMA'] = {
        'RMSE': mean_squared_error(actual, forecast_arima, squared=False),
        'MAE': mean_absolute_error(actual, forecast_arima)
    }

    # Prophet
    prophet_df = train.rename(columns={"Date": "ds", "API": "y"})
    model_prophet = Prophet()
    model_prophet.fit(prophet_df)
    future = model_prophet.make_future_dataframe(periods=horizon, freq='W')
    forecast_prophet = model_prophet.predict(future)['yhat'][-horizon:]
    models['Prophet'] = {
        'RMSE': mean_squared_error(actual, forecast_prophet, squared=False),
        'MAE': mean_absolute_error(actual, forecast_prophet)
    }

    # Konvertuojame į tinkamą formatą
    comparison_data = [{'Model': model, 'RMSE': values['RMSE'], 'MAE': values['MAE']}
                       for model, values in models.items()]
    return comparison_data

In [226]:
from statsmodels.tsa.api import VAR
# --- Koreliacijos Skaičiavimas ---
def calculate_correlation():
    """Apskaičiuoja koreliaciją tarp API ir kitų kintamųjų bei grąžina pilną lentelę."""
    # Koreliacijos matrica
    correlation_matrix = weekly_data.corr()

    # Gauta koreliacija su API (be pačio API stulpelio)
    api_correlation = correlation_matrix['API'].drop('API')

    # Lentelė su visais kintamaisiais, surūšiuotais pagal absoliutų dydį
    correlation_df = pd.DataFrame({
        'Variable': api_correlation.index,
        'Correlation': api_correlation.values
    }).sort_values(by='Correlation', key=abs, ascending=False).reset_index(drop=True)

    # Grąžiname tvarkingą lentelę
    return correlation_df

def multivariate_forecast(horizon):
    """Atlieka multivariate forecasting naudodamas VAR su rankiniu būdu parinktais kintamaisiais."""
    # Rankiniu būdu parinkti kintamieji
    manual_vars = ['API', 'no2', 'pm25', 'co', 'wind-gust']  # Čia įrašyk pasirinktus kintamuosius
    
    # Išspausdiname įtrauktus kintamuosius
    print(f"Selected Variables for Forecasting: {manual_vars}")

    # Paruošiame duomenis ir užtikriname, kad indeksas yra datų formatas
    data = weekly_data[['Date'] + manual_vars].dropna()  # Include 'Date' in the filtered data
    data['Date'] = pd.to_datetime(data['Date'])          # Ensure 'Date' is datetime
    data = data.set_index('Date') 
    
    # Mokymo duomenys
    train = data.copy()

    # Mokome VAR modelį
    model_var = VAR(train)
    results = model_var.fit(maxlags=5)

    # Generuojame ateities datas
    last_date = train.index[-1]
    future_dates = pd.date_range(start=last_date + pd.Timedelta(weeks=1), periods=horizon, freq='W')

    # Prognozuojame ateities reikšmes
    forecast = results.forecast(train.values[-5:], steps=horizon)
    forecast_df = pd.DataFrame(forecast, index=future_dates, columns=manual_vars)

    # Grafikas
    fig = go.Figure()
    # Original API
    fig.add_trace(go.Scatter(x=train.index, y=train['API'], mode='lines', name="Training Data"))
    fig.add_trace(go.Scatter(x=future_dates, y=forecast_df['API'], mode='lines', name="Forecasted API"))

    fig.update_layout(
        title="Multivariate Forecasting into the Future",
        xaxis_title="Date",
        yaxis_title="API Value",
        height=500
    )

    return fig, forecast_df

In [227]:
# --- Gradio Dashboard ---
with gr.Blocks() as demo:
    with gr.Tab("Dashboard"):
        # Filters
        yearSelect = gr.CheckboxGroup(choices=dataTableAPI['Year'].unique().tolist(), label="Select Years:")
        
        # Descriptive Statistics Table
        statsTable = gr.Dataframe(label="Descriptive Statistics for All Cities", interactive=True)
        
        # Monthly API Trend Line Chart
        monthlyTrend = gr.Plot(label="Monthly API Trend for All Cities", show_label=True)
        
        # Event Handlers
        yearSelect.change(descriptive_stats_all, inputs=[yearSelect], outputs=[statsTable])
        yearSelect.change(monthly_api_trend_all, inputs=[yearSelect], outputs=[monthlyTrend])
        
        # Load initial values
        demo.load(descriptive_stats_all, inputs=[yearSelect], outputs=[statsTable])
        demo.load(monthly_api_trend_all, inputs=[yearSelect], outputs=[monthlyTrend])

    with gr.Tab("Variable Trends"):
        # Variable Selection and Line Chart
        varSelect = gr.CheckboxGroup(choices=selectedVars, label="Select Variables:")
        yearSelectVars = gr.CheckboxGroup(choices=dataTableAPI['Year'].unique().tolist(), label="Select Years:")
        aggregationSelect = gr.Radio(choices=["Weekly", "Monthly"], label="Select Aggregation Level:", value="Monthly")
        varChart = gr.Plot(label="Variable Line Chart")

        # Event Handlers for Line Chart
        varSelect.change(line_chart_selected_vars, inputs=[varSelect, yearSelectVars, aggregationSelect], outputs=[varChart])
        yearSelectVars.change(line_chart_selected_vars, inputs=[varSelect, yearSelectVars, aggregationSelect], outputs=[varChart])
        aggregationSelect.change(line_chart_selected_vars, inputs=[varSelect, yearSelectVars, aggregationSelect], outputs=[varChart])
        
    with gr.Tab("Weekly API Analysis"):
        # Filtrai
        citySelectW = gr.CheckboxGroup(choices=dataTableAPI['City'].unique().tolist(), 
                                       label="Select Cities:", 
                                       value=["Amsterdam"])
        yearSelectW = gr.CheckboxGroup(choices=dataTableAPI['Year'].unique().tolist(), 
                                       label="Select Years:")
        plotSelectW = gr.Radio(label="Select Plot Type:", 
                               choices=['calplot', 'barchart'], 
                               value="calplot")
    
        # Grafiko išvestis
        weeklyPlot = gr.Plot(label="Weekly API Plot", show_label=True)
    
        # Įvykiai filtrams
        citySelectW.change(weekly_api_plot, inputs=[citySelectW, yearSelectW, plotSelectW], outputs=[weeklyPlot])
        yearSelectW.change(weekly_api_plot, inputs=[citySelectW, yearSelectW, plotSelectW], outputs=[weeklyPlot])
        plotSelectW.change(weekly_api_plot, inputs=[citySelectW, yearSelectW, plotSelectW], outputs=[weeklyPlot])
    
        # Pradinė būsena
        demo.load(weekly_api_plot, inputs=[citySelectW, yearSelectW, plotSelectW], outputs=[weeklyPlot])
    
    with gr.Tab("WHO Threshold Analysis"):
        # Year Selection Checkbox Group
        yearSelectWHO = gr.CheckboxGroup(choices=dataTableAPI['Year'].unique().tolist(), 
                                         label="Select Years:", value=[dataTableAPI['Year'].min()])
        # WHO Threshold Map Plot
        mapPlotWHO = gr.Plot(label="WHO Threshold Exceedance Map", show_label=True)
        # Event Handler
        yearSelectWHO.change(who_threshold_map, inputs=[yearSelectWHO], outputs=[mapPlotWHO])
        # Pradinė būsena
        demo.load(who_threshold_map, inputs=[yearSelectWHO], outputs=[mapPlotWHO])

    with gr.Tab("Dominant Pollutant Analysis"):
        # Filtrai
        yearSelectDom = gr.CheckboxGroup(choices=dataTableAPI['Year'].unique().tolist(), label="Select Years:")
        pieChartDom = gr.Plot(show_label=False, container=False)
        barChartDom = gr.Plot(show_label=False, container=False)
    
        # Event Handlers
        yearSelectDom.change(dominant_pollutant_summary, inputs=[yearSelectDom], outputs=[pieChartDom])
        yearSelectDom.change(dominant_pollutant_trend, inputs=[yearSelectDom], outputs=[barChartDom])
        
        # Pradinė būsena
        demo.load(dominant_pollutant_summary, inputs=[yearSelectDom], outputs=[pieChartDom])
        demo.load(dominant_pollutant_trend, inputs=[yearSelectDom], outputs=[barChartDom])
        
    with gr.Tab("Correlation Analysis"):
        # Filtras pasirenkant kintamuosius
        varSelectCorr = gr.CheckboxGroup(choices=selectedVars, label="Select Meteorological Variables:")
        corrHeatmap = gr.Plot(show_label=True, label="Pearson Correlation Heatmap")
        corrTable = gr.Dataframe(label="Correlation Matrix", interactive=False)
        pValueTable = gr.Dataframe(label="P-Values for Significance Test", interactive=False)
    
        # Event Handler
        varSelectCorr.change(pearson_correlation_analysis, inputs=[varSelectCorr], outputs=[corrHeatmap, corrTable, pValueTable])
        
    with gr.Tab("Forecast Preparation"):
            # Stationarity Check
            adf_check = gr.Button("Check Stationarity")
            stationarity_output = gr.Dataframe(label="Stationarity Test Results")
            
            # Distribution Check
            dist_check = gr.Button("Check Distribution")
            dist_plot = gr.Plot(label="Distribution Check")
    
            # Decomposition
            decompose_btn = gr.Button("Decompose Time Series")
            decomposition_plot = gr.Plot(label="Time Series Decomposition")
    
            # ACF/PACF
            acf_pacf_btn = gr.Button("Plot ACF & PACF")
            acf_pacf_plot = gr.Plot(label="ACF and PACF")
    
            # Event Handlers
            adf_check.click(check_stationarity, outputs=stationarity_output)
            dist_check.click(check_distribution, outputs=dist_plot)
            decompose_btn.click(decompose_time_series, outputs=decomposition_plot)
            acf_pacf_btn.click(plot_acf_pacf, outputs=acf_pacf_plot)

    with gr.Tab("Forecasting & Backtesting"):
        forecast_horizon = gr.Slider(label="Forecast Horizon", minimum=4, maximum=24, step=4, value=12)
        forecast_table = gr.Dataframe(label="Forecast Results")
        backtesting_button = gr.Button("Run Backtesting")
        backtesting_table = gr.Dataframe(label="Backtesting Results")
        
        model_dropdown = gr.Dropdown(
            choices=['Naive', 'Holt-Winter', 'ARIMA', 'Prophet'], 
            label="Select Forecast Model", 
            value='Naive'
        )
        forecast_plot = gr.Plot(label="Forecast Visualization")
    
        # Event Handlers
        forecast_horizon.change(forecast_models, inputs=[forecast_horizon], outputs=forecast_table)
        model_dropdown.change(plot_forecast, inputs=[model_dropdown, forecast_horizon], outputs=forecast_plot)
        backtesting_button.click(backtesting_model, outputs=backtesting_table)
        
        # Gradio Tab for Multivariate Forecasting
    with gr.Tab("Multivariate Forecasting"):
        # Koreliacijos Skaičiavimas
        correlation_button = gr.Button("Calculate Correlation")
        correlation_table = gr.Dataframe(label="Top Variables by Correlation")
        
        # Multivariate Forecasting
        forecast_horizon_mv = gr.Slider(label="Forecast Horizon", minimum=4, maximum=24, step=4, value=12)
        mv_forecast_plot = gr.Plot(label="Multivariate Forecast Visualization")
        mv_forecast_table = gr.Dataframe(label="Forecast Results")

        # Event Handlers
        correlation_button.click(calculate_correlation, outputs=correlation_table)
        forecast_horizon_mv.change(multivariate_forecast, 
                                   inputs=[forecast_horizon_mv], 
                                   outputs=[mv_forecast_plot, mv_forecast_table])
    

if __name__ == "__main__":
    demo.launch(debug=True, share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://7406e05a648d60bcb9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Before Filtering API Mean: 36.81244389027431
After Filtering by Cities ['Amsterdam'] API Mean: 36.7043391521197
After Dropping NaNs API Mean: 36.7043391521197
Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']



No frequency information was provided, so inferred frequency W-SUN will be used.



Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']
Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']



No frequency information was provided, so inferred frequency W-SUN will be used.


No frequency information was provided, so inferred frequency W-SUN will be used.



Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']



No frequency information was provided, so inferred frequency W-SUN will be used.



Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']



No frequency information was provided, so inferred frequency W-SUN will be used.



Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']



No frequency information was provided, so inferred frequency W-SUN will be used.



Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']



No frequency information was provided, so inferred frequency W-SUN will be used.



Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']



No frequency information was provided, so inferred frequency W-SUN will be used.



Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']



No frequency information was provided, so inferred frequency W-SUN will be used.



Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']



No frequency information was provided, so inferred frequency W-SUN will be used.



Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']



No frequency information was provided, so inferred frequency W-SUN will be used.



Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']



No frequency information was provided, so inferred frequency W-SUN will be used.



Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']



No frequency information was provided, so inferred frequency W-SUN will be used.



Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']



No frequency information was provided, so inferred frequency W-SUN will be used.



Selected Variables for Forecasting: ['API', 'no2', 'pm25', 'co', 'wind-gust']



No frequency information was provided, so inferred frequency W-SUN will be used.



Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://7406e05a648d60bcb9.gradio.live
