<div class="alert alert-info">
<u><strong>Authors:</strong></u> <b>Ahmed Mukhtar</b> (ahmed.mukhtar@mail.polimi.it),and <b>Ahmed Yassin</b> (ahmedmohamed1@mail.polimi.it) - 2023 - Politecnico di Milano, Italy <br>
</div>

## Netatmo temperature time series cleaning (part 3)

In [None]:
import os
import folium
import pandas as pd
import geopandas as gpd
import plotly.graph_objects as go
from shapely.geometry import Point
import ipywidgets as widgets
%load_ext autoreload

In [None]:
year_w = widgets.Dropdown(
    options = [i for i in range(2014, 2024)],
    value = 2022,
    description = 'Year:',
    disabled = False,
    layout = {'width': 'max-content'},
    style = {'description_width': 'initial'}
)
year_w

In [None]:
year = year_w.value

In [None]:
folder_path = 'Netatmo_csv_files/'

## Reliability_index (year)

In [None]:
def count_observations(folder_path, year,file_type):
    # Create an empty DataFrame to store the information
    columns = ['module_id', 'device_id', 'lat', 'long', 'timezone', 'country', 'altitude', 'city', 'street', 'geometry']
    empty_df = pd.DataFrame(columns=columns)

    if year == 2023:
        months = range(1, 10)  # January to September
    else:
        months = range(1, 13)  # All months

    for month in months:
        # Read the CSV file
        files_path = (folder_path + f'temp_Net_milan_{year}-{month}_{file_type}.csv')
        data = pd.read_csv(files_path)
        #count the no.observations for each sensor by month
        sensor_counts = data.groupby('module_id').size().reset_index(name=f'n.obs_{month}')
        df = pd.merge(sensor_counts, data.drop_duplicates('module_id')[columns], on='module_id', how='left')
        globals()[f'df_{month}'] = df # Get the dataframe df_month

    for month in months:
        # Iterate over df_1 to df_12 to extract module information
        df = globals()[f'df_{month}']  # Get the dataframe df_month
        empty_df = pd.concat([empty_df, df[columns]], ignore_index=True)

    empty_df.drop_duplicates(subset='module_id', inplace=True)
    empty_df.reset_index(drop=True, inplace=True)

    result = empty_df

    for month in months:
        df_temp = globals()[f"df_{month}"][[ 'module_id', f'n.obs_{month}']]  
        result = pd.merge(result, df_temp, on='module_id', how='outer')

    result = result.fillna(0)
    result['total_obs'] = result[[f'n.obs_{month}' for month in months]].sum(axis=1)

    return result

In [None]:
sens_count_initial = count_observations(folder_path, year,'clip')

In [None]:
sens_count_initial

In [None]:
sens_count_cleaned = count_observations(folder_path, year,'clean')

In [None]:
sens_count_cleaned

In [None]:
if year == 2023:
    months = range(1, 10)  # January to September
else:
    months = range(1, 13)  # All months
    
# List of columns to merge from df1 to df2
columns_to_merge = [f'n.obs_{month}' for month in months] + ["total_obs"]

# First, merge only the specified columns from df1 to df2 based on the common key
merged_columns = pd.merge(sens_count_initial[['module_id'] + columns_to_merge], 
                          sens_count_cleaned[['module_id']], 
                          on='module_id', 
                          how='left')

# Rename the merged columns
rename_mapping = {f'n.obs_{month}': f'n.obs_{month}_init' for month in months}
rename_mapping['total_obs'] = 'total_obs_init'
merged_columns = merged_columns.rename(columns=rename_mapping)

# Merge the merged_columns back to df2 based on the common key
sens_count_merged = pd.merge(sens_count_cleaned, 
                             merged_columns, 
                             on='module_id', 
                             how='left')

# Now sens_count_merged contains the specified columns from sens_count_initial merged to sens_count_cleaned

for month in months:
    sens_count_merged[f'removed_data-{month}'] = sens_count_merged[f'n.obs_{month}_init']-sens_count_merged[f'n.obs_{month}'] 
    sens_count_merged[f'percentage-{month}'] = sens_count_merged[f'removed_data-{month}'] / sens_count_merged[f'n.obs_{month}_init']
    #sens_count_merged.drop(columns=[f'n_obs-{month}',f'n_obs-{month}_init'], inplace=True)
sens_count_merged[f'removed_data-{year}'] = sens_count_merged['total_obs_init'] - sens_count_merged['total_obs'] 
sens_count_merged[f'percentage-{year}'] = sens_count_merged[f'removed_data-{year}'] / sens_count_merged['total_obs_init']
#sens_count_merged.drop(columns=['total_obs','total_obs_init'], inplace=True)
sens_count_merged = sens_count_merged.fillna(0)

for month in months:
    sens_count_merged[f'sens_reliability-{month}'] = sens_count_merged[f'n.obs_{month}'] / sens_count_merged[f'n.obs_{month}_init']
    sens_count_merged.drop(columns=[f'n.obs_{month}',f'n.obs_{month}_init'], inplace=True)
sens_count_merged[f'sens_reliability-{year}'] = sens_count_merged['total_obs'] / sens_count_merged['total_obs_init']
sens_count_merged.drop(columns=['total_obs','total_obs_init'], inplace=True)
sens_count_merged = sens_count_merged.fillna(0)

In [None]:
sens_count_merged

In [None]:
def plot_map(selected_df, year, aoi_filepath='aoi.gpkg'):
    
    try:
        aoi_gdf = gpd.read_file(aoi_filepath)
    except Exception as e:
        print(f"Failed to read file {aoi_filepath}: {e}")
        return None

    # Create 'geometry' column as Point objects
    geometry_array = [Point(xy) for xy in zip(selected_df['long'], selected_df['lat'])]
    # Create a GeoDataFrame
    selected_gdf = gpd.GeoDataFrame(selected_df, geometry=geometry_array, crs='EPSG:4326')

    # Set up the base map
    m = folium.Map(
        location=[selected_gdf.geometry.y.mean(), selected_gdf.geometry.x.mean()],
        zoom_start=9,
        tiles='OpenStreetMap')

    folium.GeoJson(
        aoi_gdf,
        name='Area of Interest',
        style_function=lambda x: {'fillColor': 'SkyBlue', 'color': 'Blue', 'fillOpacity': 0.6, 'weight': 2}
    ).add_to(m)

    # Classify 'sens_reliability' into categories
    bins = [0, 0.25, 0.5, 0.75, 1]
    labels = ['0-0.25', '0.25-0.5', '0.5-0.75', '0.75-1']
    color_map = {
        '0-0.25': 'black',
        '0.25-0.5': 'red',
        '0.5-0.75': 'yellow',
        '0.75-1': 'green'
    }
    selected_gdf['Reliability Class'] = pd.cut(selected_gdf[f'sens_reliability-{year}'], bins=bins, labels=labels, include_lowest=True)

    # Apply custom colors based on 'Reliability Class'
    for _, row in selected_gdf.iterrows():
        popup_text = f'Station: {row["module_id"]} ({row["city"]},{row["street"]})\nReliability:({row[f"sens_reliability-{year}"]:.2f})'
        folium.CircleMarker(
            location=[row.geometry.y, row.geometry.x],
            radius=5,
            color=color_map[row['Reliability Class']],
            fill=True,
            fill_color=color_map[row['Reliability Class']],
            fill_opacity=0.7,
            popup= popup_text
            #popup=f'Reliability: {row["sens_reliability"]}'
        ).add_to(m)

    # Add a legend
    legend_html = '''
     <div style="position: fixed; 
                 bottom: 15px; right: 10px; width: 160px; height: 110px; 
                 border:2px solid black; background-color: white; z-index:9999; font-size:14px;
                 "&nbsp; <b>Stations_reliability</b><br>
                 &nbsp; <i style="background-color:black;">&nbsp;&nbsp;&nbsp;&nbsp;</i> unreliable <br>
                 &nbsp; <i style="background-color:red;">&nbsp;&nbsp;&nbsp;&nbsp;</i> low(0.25-0.5) <br>
                 &nbsp; <i style="background-color:yellow;">&nbsp;&nbsp;&nbsp;&nbsp;</i> moderate(0.5-0.75) <br>
                 &nbsp; <i style="background-color:green;">&nbsp;&nbsp;&nbsp;&nbsp;</i> high(0.75-1.0)
              </div>
     '''
    m.get_root().html.add_child(folium.Element(legend_html))
    
    # Add layer control
    folium.LayerControl().add_to(m)

    return m


In [None]:
plot_map(sens_count_merged, year)

## Reliability_index (month)

In [None]:
month_w = widgets.Dropdown(
    options = [i for i in range(1, 13)],
    value = 1,
    description = 'Month:',
    disabled = False,
    layout = {'width': 'max-content'},
    style = {'description_width': 'initial'}
)
month_w

In [None]:
month = month_w.value

In [None]:
def plot_map_montly(selected_df, month, aoi_filepath='aoi.gpkg'):
    
    try:
        aoi_gdf = gpd.read_file(aoi_filepath)
    except Exception as e:
        print(f"Failed to read file {aoi_filepath}: {e}")
        return None

    # Create 'geometry' column as Point objects
    geometry_array = [Point(xy) for xy in zip(selected_df['long'], selected_df['lat'])]
    # Create a GeoDataFrame
    selected_gdf = gpd.GeoDataFrame(selected_df, geometry=geometry_array, crs='EPSG:4326')

    # Set up the base map
    m = folium.Map(
        location=[selected_gdf.geometry.y.mean(), selected_gdf.geometry.x.mean()],
        zoom_start=9,
        tiles='OpenStreetMap')

    folium.GeoJson(
        aoi_gdf,
        name='Area of Interest',
        style_function=lambda x: {'fillColor': 'SkyBlue', 'color': 'Blue', 'fillOpacity': 0.6, 'weight': 2}
    ).add_to(m)

    # Classify 'sens_reliability' into categories
    bins = [0, 0.25, 0.5, 0.75, 1]
    labels = ['0-0.25', '0.25-0.5', '0.5-0.75', '0.75-1']
    color_map = {
        '0-0.25': 'black',
        '0.25-0.5': 'red',
        '0.5-0.75': 'yellow',
        '0.75-1': 'green'
    }
    selected_gdf['Reliability Class'] = pd.cut(selected_gdf[f'sens_reliability-{month}'], bins=bins, labels=labels, include_lowest=True)

    # Apply custom colors based on 'Reliability Class'
    for _, row in selected_gdf.iterrows():
        popup_text = f'Station: {row["module_id"]} ({row["city"]},{row["street"]})\nReliability:({row[f"sens_reliability-{month}"]:.2f})'
        folium.CircleMarker(
            location=[row.geometry.y, row.geometry.x],
            radius=5,
            color=color_map[row['Reliability Class']],
            fill=True,
            fill_color=color_map[row['Reliability Class']],
            fill_opacity=0.7,
            popup= popup_text
            #popup=f'Reliability: {row["sens_reliability"]}'
        ).add_to(m)

    # Add a legend
    legend_html = '''
     <div style="position: fixed; 
                 bottom: 15px; right: 10px; width: 160px; height: 110px; 
                 border:2px solid black; background-color: white; z-index:9999; font-size:14px;
                 "&nbsp; <b>Stations_reliability</b><br>
                 &nbsp; <i style="background-color:black;">&nbsp;&nbsp;&nbsp;&nbsp;</i> unreliable <br>
                 &nbsp; <i style="background-color:red;">&nbsp;&nbsp;&nbsp;&nbsp;</i> low(0.25-0.5) <br>
                 &nbsp; <i style="background-color:yellow;">&nbsp;&nbsp;&nbsp;&nbsp;</i> moderate(0.5-0.75) <br>
                 &nbsp; <i style="background-color:green;">&nbsp;&nbsp;&nbsp;&nbsp;</i> high(0.75-1.0)
              </div>
     '''
    m.get_root().html.add_child(folium.Element(legend_html))
    
    # Add layer control
    folium.LayerControl().add_to(m)

    return m


In [None]:
plot_map_montly(sens_count_merged, month)