In [61]:
import sys
sys.path.append("../..")

#Defining libraries
import os
import math
from datetime import date, timedelta
import pandas as pd
import xarray as xr
import plotly.graph_objects as go
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
from shapely.geometry import box
from scipy.interpolate import griddata, interpn
import datacube
from copy import deepcopy
import statsmodels.api as sm

import rasterio
from rasterio.plot import show
from rasterio.mask import mask
from rasterio.windows import Window
from rasterio.warp import reproject, Resampling
#from rasterio.enums import Resampling
from rasterio.transform import from_origin
from rasterio.windows import Window

import matplotlib.pyplot as plt

from modules import processing_module as processing

In [62]:
'''
1-11-> Residential urban areas 
2-121,13->Industrial and abbandoned urban areas
3-122,123,124 Transportation infrastructure (streets, highways, airports, and ports)
4-14->Urban green areas
5-2->Agricultural areas
6-3->Forest
7-4/5->Hydro and humid bodies
'''
#Convert from copernicus code 2018 to an internal code
URBAN = 1
INDUSTRIAL = 2
TRANSPORTATION = 3
URBAN_VEGETATION = 4
RURAL = 5
FOREST = 6
WATER = 7
    
# Function to check if the file is a tiff and must be read.
def check_wrong_files(file_name):
    if f == 'clip': return True #avoid entering the "clip" folder
    if 'ipynb' in f: return True #avoid entering the "ipynb_checkpoint" file
    if 'tar' in f: return True #avoid entering "tar" files
    if 'aux' in f: return True #avoid entering "aux" files
    return False


In [63]:
# City parameters and global variables
city_info = {
    "resolution": 5,
    "epsg": 32632,
    "capitalized": "Milan"
}

city = 'MILANO'
current_city_info =  city_info
city_epsg = current_city_info['epsg']
data_folder = "data"
landcover_path = f"{data_folder}/landcover/DUSAF_2021_milano_mapped.tif"
# total_samples_per_raster = 1000

In [64]:
#process...

print(f'Processing for {city}')
with rasterio.open(landcover_path, driver="GTiff") as lancover_raster:
    landcover_array = lancover_raster.read(1)
    print(lancover_raster.profile)
    rows, cols = landcover_array.shape
    x_positions = np.arange(0, cols)
    y_positions = np.arange(0, rows)
    x, y = np.meshgrid(x_positions, y_positions)
    x_flat = x.flatten()
    y_flat = y.flatten()
    values_flat = landcover_array.flatten()

    # Create a DataFrame for the Landcover 
    landcover_df = pd.DataFrame({'x': x_flat, 'y': y_flat, 'landcover': values_flat})
    #landcover_df['landcover'] = landcover_df['landcover'].apply(map_urban_atlas_class).astype('int16')

lst_folder = f"{data_folder}"

lst_path = f"{lst_folder}/2022_LST_comune_milano.tif"
with rasterio.open(lst_path, driver="GTiff") as lst_raster:
    lst_array = lst_raster.read(1)
    print(lst_raster.profile)
    rows, cols = landcover_array.shape
    x_positions = np.arange(0, cols)[:-1]
    y_positions = np.arange(0, rows)
    x, y = np.meshgrid(x_positions, y_positions)
    x_flat = x.flatten()
    y_flat = y.flatten()
    values_flat = lst_array.flatten()

    # Create a DataFrame for the lst
    lst_df = pd.DataFrame({'x': x_flat, 'y': y_flat, 'lst': values_flat})
    lst_df['landcover'] =  landcover_df.copy()['landcover']

    #lst_df = lst_df.loc[
        #(lst_df['landcover'] != -9999) & (lst_df['lst'] > 273)
    #]
    urban_veg_mean = lst_df.loc[(lst_df['landcover'] == URBAN_VEGETATION) & (lst_df['lst'] > 273)]['lst'].mean()
    print(urban_veg_mean)

    #uhi_raster = np.where(lst_array > 273, (lst_array > rural_mean).astype('int16'), -9999) #setting nodata from LST
    #uhi_raster = np.where(landcover_array != -9999, uhi_raster, -9999) #setting nodata from landcover
    #uhi_raster = np.where(landcover_array < 40000, uhi_raster, -9999) #setting the water as nodata

    #uhi_meta = lst_raster.profile.copy()
    
    #uhi_meta['dtype'] = np.int16
    #uhi_meta['nodata'] = -9999

    #uhi_raster_path = f"{lst_folder}/{f}/{f}_uhi.tif"
    #with rasterio.open(uhi_raster_path, 'w', **uhi_meta) as dest:
        #dest.write(uhi_raster, 1)
            
            

Processing for MILANO
{'driver': 'GTiff', 'dtype': 'uint16', 'nodata': 9999.0, 'width': 3708, 'height': 3314, 'count': 1, 'crs': CRS.from_epsg(32632), 'transform': Affine(5.0, 0.0, 503176.11479999963,
       0.0, -5.0, 5042500.992000001), 'tiled': False, 'interleave': 'band'}
{'driver': 'GTiff', 'dtype': 'float32', 'nodata': 0.0, 'width': 3707, 'height': 3314, 'count': 1, 'crs': CRS.from_epsg(32632), 'transform': Affine(5.0, 0.0, 503180.0,
       0.0, -5.0, 5042505.0), 'tiled': False, 'interleave': 'band'}
315.85037


In [65]:
lst_df 

Unnamed: 0,x,y,lst,landcover
0,0,0,0.0,9999
1,1,0,0.0,9999
2,2,0,0.0,9999
3,3,0,0.0,9999
4,4,0,0.0,9999
...,...,...,...,...
12284993,3702,3313,0.0,9999
12284994,3703,3313,0.0,9999
12284995,3704,3313,0.0,9999
12284996,3705,3313,0.0,9999


In [66]:
simulated_vegetation_path = f"{lst_folder}/30_random_parks_15000_20000.tif"

with rasterio.open(simulated_vegetation_path, driver="GTiff") as simulated_vegetation_raster:
    simulated_vegetation_array = simulated_vegetation_raster.read(1)
    print(simulated_vegetation_raster.profile)
    rows, cols = landcover_array.shape
    x_positions = np.arange(0, cols)[:-1]
    y_positions = np.arange(0, rows)
    x, y = np.meshgrid(x_positions, y_positions)
    x_flat = x.flatten()
    y_flat = y.flatten()
    values_flat = simulated_vegetation_array.flatten()

    # Add column to LST DF
    #veg_df = pd.DataFrame({'x': x_flat, 'y': y_flat, 'new_areas': values_flat})
    #lst_df['new_veg'] =  values_flat

{'driver': 'GTiff', 'dtype': 'uint8', 'nodata': 0.0, 'width': 3708, 'height': 3314, 'count': 1, 'crs': CRS.from_epsg(32632), 'transform': Affine(5.0, 0.0, 503176.11479999963,
       0.0, -5.0, 5042500.992000001), 'tiled': False, 'interleave': 'band'}


In [67]:
# LANDCOVER Raster
rows, cols = landcover_array.shape
x_positions = np.arange(0, cols)  # Use full width
y_positions = np.arange(0, rows)
x, y = np.meshgrid(x_positions, y_positions)
landcover_df = pd.DataFrame({'x': x.flatten(), 'y': y.flatten(), 'landcover': landcover_array.flatten()})

# LST Raster (One pixel less in width)
rows, cols = lst_array.shape
x_positions = np.arange(0, cols)  # This already excludes the last column
y_positions = np.arange(0, rows)
x, y = np.meshgrid(x_positions, y_positions)
lst_df = pd.DataFrame({'x': x.flatten(), 'y': y.flatten(), 'lst': lst_array.flatten()})

# SIMULATED VEGETATION Raster
rows, cols = simulated_vegetation_array.shape
x_positions = np.arange(0, cols)  # Full width
y_positions = np.arange(0, rows)
x, y = np.meshgrid(x_positions, y_positions)
veg_df = pd.DataFrame({'x': x.flatten(), 'y': y.flatten(), 'new_veg': simulated_vegetation_array.flatten()})

# Merge LST and Landcover
lst_df = lst_df.merge(landcover_df, on=['x', 'y'], how='inner')

# Merge with Vegetation Data (ensure correct matching)
lst_df = lst_df.merge(veg_df, on=['x', 'y'], how='inner')

In [68]:
lst_df.loc[lst_df['new_veg'] == 1]

Unnamed: 0,x,y,lst,landcover,new_veg
1532382,1391,413,316.294983,1,1
1532383,1392,413,316.294983,1,1
1532384,1393,413,316.294983,1,1
1536084,1386,414,316.482971,1,1
1536085,1387,414,316.482971,1,1
...,...,...,...,...,...
11290411,2596,3045,314.620148,2,1
11290412,2597,3045,314.620148,2,1
11290413,2598,3045,314.620148,2,1
11294118,2596,3046,314.620148,2,1


In [69]:
lst_folder = f"{data_folder}"

lst_path = f"{lst_folder}/2022_NDVI_comune_milano.tif"
with rasterio.open(lst_path, driver="GTiff") as ndvi_raster:
    ndvi_array = ndvi_raster.read(1)
    print(ndvi_raster.profile)
    rows, cols = landcover_array.shape
    x_positions = np.arange(0, cols)
    y_positions = np.arange(0, rows)
    x, y = np.meshgrid(x_positions, y_positions)
    x_flat = x.flatten()
    y_flat = y.flatten()
    values_flat = ndvi_array.flatten()
    
    lst_df['ndvi'] =  values_flat

{'driver': 'GTiff', 'dtype': 'float32', 'nodata': -9999.0, 'width': 3707, 'height': 3314, 'count': 1, 'crs': CRS.from_epsg(32632), 'transform': Affine(5.0, 0.0, 503180.0,
       0.0, -5.0, 5042505.0), 'tiled': False, 'interleave': 'band'}


In [70]:
# Remove null and invalid values
lst_df = lst_df.loc[
    (lst_df['landcover'] != 9999) & (lst_df['lst'] > 273)
]
#Set land cover values to keep as 0 in new_veg
lst_df.loc[lst_df['new_veg'] != 1, 'new_veg'] = 0
lst_df

Unnamed: 0,x,y,lst,landcover,new_veg,ndvi
1973,1973,0,316.944427,2,0,0.018478
2086,2086,0,315.047424,4,0,0.222084
2087,2087,0,315.047424,4,0,0.222084
2088,2088,0,315.047424,4,0,0.222084
2089,2089,0,315.047424,4,0,0.222084
...,...,...,...,...,...,...
12279969,2385,3312,306.956970,5,0,0.375333
12279970,2386,3312,306.283600,5,0,0.378146
12283672,2381,3313,306.662994,5,0,0.360845
12283673,2382,3313,306.662994,5,0,0.360845


In [71]:
reference_lst = lst_df.loc[
    (lst_df['landcover'] == URBAN_VEGETATION)
]['lst'].mean()
print(reference_lst)
reference_ndvi = lst_df.loc[
    (lst_df['landcover'] == URBAN_VEGETATION)
]['ndvi'].mean()
print(reference_ndvi)

315.9066
0.22731388


In [72]:
lst_df.loc[lst_df['new_veg'] == 1, 'lst'] = reference_lst
lst_df.loc[lst_df['new_veg'] == 1, 'ndvi'] = reference_ndvi
lst_df.loc[lst_df['new_veg'] == 1, 'landcover'] = URBAN_VEGETATION

In [73]:
samples_base_path = f'training_samples'    
predict_path = f'{samples_base_path}/{city}_predict_simulated_vegetation_30_random_parks_15000_20000.csv'
print(f'Saving predict in {predict_path}')
predict_df = lst_df.reset_index(drop=True)
predict_df.to_csv(predict_path)
    

Saving predict in training_samples/MILANO_predict_simulated_vegetation_30_random_parks_15000_20000.csv


In [74]:
predict_df

Unnamed: 0,x,y,lst,landcover,new_veg,ndvi
0,1973,0,316.944427,2,0,0.018478
1,2086,0,315.047424,4,0,0.222084
2,2087,0,315.047424,4,0,0.222084
3,2088,0,315.047424,4,0,0.222084
4,2089,0,315.047424,4,0,0.222084
...,...,...,...,...,...,...
7262387,2385,3312,306.956970,5,0,0.375333
7262388,2386,3312,306.283600,5,0,0.378146
7262389,2381,3313,306.662994,5,0,0.360845
7262390,2382,3313,306.662994,5,0,0.360845
