# Data_download_MILANO_hourly_meteo

In [2]:
#----------------Utils--------------------------
import pandas as pd
import plotly
import plotly.graph_objects as go
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import requests

#----------------Interpolation--------------------------
from shapely.geometry import box
from scipy.interpolate import griddata, interpn

from scipy.interpolate import Rbf
from scipy.interpolate import RBFInterpolator
import pykrige

#----------------Machine Learning--------------------------
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from sklearn.preprocessing import MinMaxScaler
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error
pd.set_option('display.max_columns', None)


#------------------Export raster-----------------------------
import rasterio
from rasterio.features import rasterize
from rasterio.transform import from_origin

2023-10-09 10:16:44.270715: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-09 10:16:44.272190: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-09 10:16:44.296947: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-09 10:16:44.297644: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
def show_plot(df, x_col, y_col, name="Unnamed Plot", additional_traces=[]):
    if type(df) != list:
        df = [df]
        x_col = [x_col]
        y_col = [y_col]
        name = [name]
    
    fig = go.Figure()
    for i, single_df in enumerate(df):
        x = single_df[x_col[i]]
        y = single_df[y_col[i]]
        fig_name = name[i]
        fig.add_trace(go.Scatter(x=x, y=y, mode='lines+markers',name=fig_name))
    
    if len(additional_traces) > 0:
        for trace in additional_traces:
            fig.add_trace(trace)
    fig.show()
    
def import_df(path, date_format="%Y-%m-%dT%H:%M:%S"):
    df = pd.read_csv(path, index_col = 0)
    if date_format is not None:
        df['date'] = pd.to_datetime(df['date'],  format=date_format)
    return df


#Remove Outliers function
#This remove a point if it exceeds +-3 std deviations in a window of "window" observations 
# at column "value_column"
def filter_outliers(input_df, sensor_list, value_column='value'):
    filtered_df = pd.DataFrame()
    for sensor in sensor_list:
        df = input_df.copy()
        df = df.loc[df['sensor_id'] == sensor]
        #iterate all the df
        df['mean']= df[value_column].mean()
        df['std'] = df[value_column].std()
        df
        #filter setup
        df = df[(df[value_column] <= df['mean']+3*df['std']) & (df[value_column] >= df['mean']-3*df['std'])]
        
        filtered_df = pd.concat([filtered_df, df])
    
    filtered_df = filtered_df.drop(["mean", "std"], axis=1)
    return filtered_df

In [5]:
#GLOBAL VARIABLES
headers = {'Accept': 'application/json'}
milano_data_basepath = "http://api.harmonia.info.uvt.ro/weather_data_view?"
milano_sensor_base_path = "http://api.harmonia.info.uvt.ro/weather_stations?"

download = True

date_format = '%Y-%m-%dT%H:%M:%S'

meteo_variables = [
    "Temperatura",
    "Precipitazione",
    "Direzione Vento",
    "Velocità Vento",
    "Livello Idrometrico",
    "Radiazione Globale",
    "Umidità Relativa"
]

#build the meteo datasets object with the value column of each dataset and the csv name
meteo_datasets = {
    "Temperatura": {"value_column": "medio", "csv_name": "temperature"},
    "Precipitazione": {"value_column": "valore_cumulato", "csv_name": "precipitation"},
    "Direzione Vento": {"value_column": "medio", "csv_name": "wind_direction"},
    "Velocità Vento": {"value_column": "medio", "csv_name": "wind_velocity"},
    "Livello Idrometrico": {"value_column": "medio", "csv_name": "hydrometric_level"},
    "Radiazione Globale": {"value_column": "medio", "csv_name": "radiation"},
    "Umidità Relativa": {"value_column": "medio", "csv_name": "humidity"},
}

stations_data = []
#fetch the paths to be called to the HARMONIA API
for variable in meteo_variables:
    print(f"fetching paths for {variable}")
    sensor_req = requests.get(
        f'{milano_sensor_base_path}tipologia=eq.{variable}&provincia=eq.MI', 
        headers=headers
    )
    print(f'{milano_sensor_base_path}tipologia=eq.{variable}&provincia=eq.MI')
    sensors_list = sensor_req.json()
    variable_stations = list(map(lambda a: a['idsensore'], sensors_list))
    variable_paths = []
    for station in variable_stations:
        variable_paths.append(f"{milano_data_basepath}sensor_id=eq.{station}")

    meteo_datasets[variable]['paths'] = variable_paths.copy()
    meteo_datasets[variable]['sensors'] = variable_stations.copy()
    stations_data = stations_data + sensors_list.copy()
    print(f"END fetching paths for {variable}")
    print(f"----------------------------------------")

#extract information of the meteo stations from the sensors list.
# this data is then merged with the meteo observations to complement the list.
drop_cols = ['nomestazione', 'datastart', 'storico', 'cgb_nord', 'cgb_est', 'location', ':@computed_region_6hky_swhk', ':@computed_region_ttgh_9sm5', 'datastop', 'wkb_geometry']
stations_data_df = pd.DataFrame(stations_data)
stations_data_df = stations_data_df.drop(columns=drop_cols)


fetching paths for Temperatura
http://api.harmonia.info.uvt.ro/weather_stations?tipologia=eq.Temperatura&provincia=eq.MI
END fetching paths for Temperatura
----------------------------------------
fetching paths for Precipitazione
http://api.harmonia.info.uvt.ro/weather_stations?tipologia=eq.Precipitazione&provincia=eq.MI
END fetching paths for Precipitazione
----------------------------------------
fetching paths for Direzione Vento
http://api.harmonia.info.uvt.ro/weather_stations?tipologia=eq.Direzione Vento&provincia=eq.MI
END fetching paths for Direzione Vento
----------------------------------------
fetching paths for Velocità Vento
http://api.harmonia.info.uvt.ro/weather_stations?tipologia=eq.Velocità Vento&provincia=eq.MI
END fetching paths for Velocità Vento
----------------------------------------
fetching paths for Livello Idrometrico
http://api.harmonia.info.uvt.ro/weather_stations?tipologia=eq.Livello Idrometrico&provincia=eq.MI
END fetching paths for Livello Idrometrico
--

In [6]:
#request data from the HARMONIA API
headers = {'Accept': 'application/json'}

for variable in meteo_variables:
    print(f"fetching data for {variable}")
    variable_data = []

    for variable_path in meteo_datasets[variable]['paths']:
        print(f'fetching {variable_path}')
        r = requests.get(variable_path, headers=headers)
        req_data = r.json()
        variable_data += req_data
    print(f"END fetching data for {variable}")

    print(f'Building DF for {variable}')
    df = pd.DataFrame(variable_data)
    meteo_datasets[variable]['raw'] = df.copy()
    df = df.replace(-999.0, np.nan).dropna(subset=[meteo_datasets[variable]['value_column']])
    df['date'] = pd.to_datetime(df['date'],  format=date_format)
    df = filter_outliers(df, meteo_datasets[variable]['sensors'], meteo_datasets[variable]['value_column'])
    df = df.sort_values(by='date')
    meteo_datasets[variable]['df'] = df.copy()
    print(f'END Building DF for {variable}')
    print(f"----------------------------------------")


fetching data for Temperatura
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.12757
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.12759
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.14742
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.17488
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.2001
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.2039
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.2063
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.4001
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.4058
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.4066
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.5897
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.5903
fetching http://api.harmonia.info.uvt.ro/w

fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.6179
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.6180
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.6183
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.6184
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.6185
fetching http://api.harmonia.info.uvt.ro/weather_data_view?sensor_id=eq.6597
END fetching data for Umidità Relativa
Building DF for Umidità Relativa
END Building DF for Umidità Relativa
----------------------------------------


In [8]:
stations_data_df

Unnamed: 0,idsensore,tipologia,unit_dimisura,idstazione,quota,provincia,lng,lat
0,12757,Temperatura,°C,1511,215,MI,8.952897,45.607845
1,12759,Temperatura,°C,1512,160,MI,9.005200,45.535577
2,14742,Temperatura,°C,1546,143,MI,9.087923,45.517811
3,17488,Temperatura,°C,1874,137,MI,8.854409,45.458065
4,2001,Temperatura,°C,100,120,MI,9.257515,45.496780
...,...,...,...,...,...,...,...,...
93,6180,Umidità Relativa,%,535,97,MI,9.134517,45.324517
94,6183,Umidità Relativa,%,614,112,MI,9.353497,45.472580
95,6184,Umidità Relativa,%,513,199,MI,9.508122,45.613692
96,6185,Umidità Relativa,%,503,122,MI,9.141786,45.476063


In [9]:
# Building the process-ready datasets
#datasets -> meteo_datasets[variable]['df']
#stations -> stations_data_df
#meteo variables -> meteo_variables

for variable in meteo_variables:
    print(f'Building data-ready dataset for {variable}')
    var_station_data = stations_data_df.loc[stations_data_df['tipologia'] == variable]
    var_df = meteo_datasets[variable]['df'].copy()

    #common columns to drop
    columns_to_drop = ['cgb_nord', 'cgb_est', 'massimo', 'valore_medio_giornaliero', 'minimo_valore_medio_orario', 'massimo_valore_medio_orario']
    var_df = var_df.drop(columns=columns_to_drop)
    var_df = var_df.rename(columns={"sensor_id": "idsensore"})

    #Specific column to drop for precitipation and rename the data column to Valore
    if variable == "Precipitazione":
        var_df = var_df.drop(columns=['medio'])
        var_df = var_df.rename(columns={"valore_cumulato": "valore"})
    else:
        var_df = var_df.drop(columns=['valore_cumulato'])
        var_df = var_df.rename(columns={"medio": "valore"})

    #merge the data with the station information
    var_station_data_merge = var_station_data[['idsensore', 'tipologia', 'unit_dimisura', 'idstazione', 'quota', 'provincia', 'lng', 'lat']]
    var_df = var_df.merge(var_station_data_merge, on=['idsensore', 'lat', 'lng'])

    #rename columns
    column_rename = {
        "idsensore": "sensorID",
        "date": "date",
        "valore": "value",
        "tipologia": "type",
        "unit_dimisura": "unit",
        "idstazione": "stationID",
        "quota": "altitude",
        "provincia": "province"
    }
    var_df = var_df.rename(columns=column_rename)

    meteo_datasets[variable]['df'] = var_df.copy()
    print(f"saving to file {meteo_datasets[variable]['csv_name']}.csv")
    meteo_datasets[variable]['df'].to_csv(f'../data/milano_meteo_data/{meteo_datasets[variable]["csv_name"]}.csv')


Building data-ready dataset for Temperatura
saving to file AAA_READY_temperature
Building data-ready dataset for Precipitazione
saving to file AAA_READY_precipitation
Building data-ready dataset for Direzione Vento
saving to file AAA_READY_wind_direction
Building data-ready dataset for Velocità Vento
saving to file AAA_READY_wind_velocity
Building data-ready dataset for Livello Idrometrico
saving to file AAA_READY_hydrometric_level
Building data-ready dataset for Radiazione Globale
saving to file AAA_READY_radiation
Building data-ready dataset for Umidità Relativa
saving to file AAA_READY_humidity


In [7]:
a = import_df(f'../data/milano_meteo_data/temperature.csv')

In [8]:
a

Unnamed: 0,sensorID,lat,lng,date,value,type,unit,stationID,altitude,province
0,5920,45.476063,9.141786,1989-01-01 01:00:00,2.1,Temperatura,°C,503,122,MI
1,5920,45.476063,9.141786,1989-01-01 02:00:00,2.5,Temperatura,°C,503,122,MI
2,5920,45.476063,9.141786,1989-01-01 03:00:00,2.0,Temperatura,°C,503,122,MI
3,5920,45.476063,9.141786,1989-01-01 04:00:00,1.8,Temperatura,°C,503,122,MI
4,5920,45.476063,9.141786,1989-01-01 05:00:00,1.5,Temperatura,°C,503,122,MI
...,...,...,...,...,...,...,...,...,...,...
3907430,12757,45.607845,8.952897,2022-03-12 20:00:00,7.3,Temperatura,°C,1511,215,MI
3907431,12757,45.607845,8.952897,2022-03-12 21:00:00,6.9,Temperatura,°C,1511,215,MI
3907432,12757,45.607845,8.952897,2022-03-12 22:00:00,6.6,Temperatura,°C,1511,215,MI
3907433,12757,45.607845,8.952897,2022-03-12 23:00:00,5.8,Temperatura,°C,1511,215,MI
