In [1]:
import sys
sys.path.append("../..")

#Defining libraries
import os
import pandas as pd
import numpy as np
import datacube
from copy import deepcopy
import statsmodels.api as sm

import rasterio
from rasterio.warp import reproject, Resampling

from modules import processing_module as processing

2025-02-17 10:37:55.703493: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-17 10:37:55.704836: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-17 10:37:55.728598: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-17 10:37:55.729615: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
'''
1-11-> Residential urban areas 
2-121,13->Industrial and abbandoned urban areas
3-122,123,124 Transportation infrastructure (streets, highways, airports, and ports)
4-14->Urban green areas
5-2->Agricultural areas
6-3->Forest
7-4/5->Hydro and humid bodies
'''
#Convert from copernicus code 2018 to an internal code
URBAN = 1
INDUSTRIAL = 2
TRANSPORTATION = 3
URBAN_VEGETATION = 4
RURAL = 5
FOREST = 6
WATER = 7

LC_NO_DATA = 9999
NO_DATA = -9999

    
# Function to check if the file is a tiff and must be read.
def check_wrong_files(f):
    if f == 'clip': return True #avoid entering the "clip" folder
    if f in ['2017']: return True #exclude 2017 data
    if 'ipynb' in f: return True #avoid entering the "ipynb_checkpoint" file
    if 'tar' in f: return True #avoid entering "tar" files
    if 'aux' in f: return True #avoid entering "aux" files
    if 'csv' in f: return True #avoid entering "csv" files
    return False

def match_landsat_to_landcover(landsat):
    year = int(landsat[17:21])
    if year in [2015,2016]:
        return str(2015)
    elif year in [2017,2018,2019]:
        return str(2018)
    elif year in [2020,2021,2022]:
        return str(2021)

In [4]:
# City parameters and global variables
city_info = {
    "resolution": 5,
    "epsg": 32632,
    "capitalized": "Milan"
}

city = 'MILANO'
current_city_info = city_info
city_epsg = current_city_info['epsg']
data_folder = "data"
landcover_path = f'{data_folder}/landcover/DUSAF_2021_milano_mapped.tif'

landsat_raster_folder = "/home/user/ODC_harmonia/Landsat/Milan/data"
sat_images_path = f"{landsat_raster_folder}/clip"
file_list = os.listdir(f"{sat_images_path}")

#landsat_raster_file_list = os.listdir(f"{landsat_raster_folder}")



## Legacy simulation base from all Landsat scenes

In [7]:
predict_n = 0
predict_lst = None
predict_ndvi = None

for f in file_list:
    if check_wrong_files(f): continue

    file_date_string = f.split('_')[3]  # example: LC08_L2SP_194028_20160825_20200906_02_T1_LST
    year = match_landsat_to_landcover(f)
    if year == '2021':
        print(f'Processing {f}')
    
        with rasterio.open(landcover_path, driver="GTiff") as landcover_raster:
            landcover_array = landcover_raster.read(1)
            landcover_transform = landcover_raster.transform
            landcover_crs = landcover_raster.crs
            print('Read land cover')

        if not isinstance(predict_lst, np.ndarray) and not isinstance(predict_ndvi, np.ndarray):
            predict_lst = np.zeros_like(landcover_array, dtype=np.float32)
            predict_ndvi = np.zeros_like(landcover_array, dtype=np.float32)

        # Add the NDVI column
        with rasterio.open(f"{sat_images_path}/{f}/{f}_NDVI.TIF", driver="GTiff") as ndvi_raster:
            print('Read NDVI')
            ndvi_resampled = np.empty_like(landcover_array, dtype=np.float32)

            reproject(
                source=rasterio.band(ndvi_raster, 1),
                destination=ndvi_resampled,
                src_transform=ndvi_raster.transform,
                src_crs=ndvi_raster.crs,
                dst_transform=landcover_transform,
                dst_crs=landcover_crs,
                resampling=Resampling.bilinear
            )

        # Add the LST column
        with rasterio.open(f"{sat_images_path}/{f}/{f}_LST.TIF", driver="GTiff") as lst_raster:
            print('Read LST')
            lst_resampled = np.empty_like(landcover_array, dtype=np.float32)

            reproject(
                source=rasterio.band(lst_raster, 1),
                destination=lst_resampled,
                src_transform=lst_raster.transform,
                src_crs=lst_raster.crs,
                dst_transform=landcover_transform,
                dst_crs=landcover_crs,
                resampling=Resampling.bilinear
            )

        if int(year) >= 2020:
            predict_n += 1
            predict_lst = np.where(landcover_array != 9999, (predict_lst + lst_resampled), 9999)
            predict_ndvi = np.where(landcover_array != 9999, (predict_ndvi + ndvi_resampled), 9999)

Processing LC08_L2SP_194028_20220725_20220802_02_T1
Read land cover
Read NDVI
Read LST
Processing LC08_L2SP_194028_20220709_20220721_02_T1
Read land cover
Read NDVI
Read LST
Processing LC08_L2SP_194028_20210706_20210713_02_T1
Read land cover
Read NDVI
Read LST
Processing LC08_L2SP_194028_20200820_20200905_02_T1
Read land cover
Read NDVI
Read LST
Processing LC08_L2SP_194028_20220810_20220818_02_T1
Read land cover
Read NDVI
Read LST
Processing LC08_L2SP_194028_20210722_20210729_02_T1
Read land cover
Read NDVI
Read LST
Processing LC08_L2SP_194028_20200719_20200911_02_T1
Read land cover
Read NDVI
Read LST


In [10]:
# save the lst and ndvi predict
if predict_n == 0: predict_n = 0.0000001
predict_lst = np.where(landcover_array != 9999, (predict_lst / predict_n), 9999)
predict_ndvi = np.where(landcover_array != 9999, (predict_ndvi / predict_n), 9999)

predict_df = pd.DataFrame({'x': x_flat, 'y': y_flat})
predict_df['landcover'] = pd.Series(landcover_array.flatten()).astype('int32')
predict_df['x'] = predict_df['x'].astype('uint32')
predict_df['y'] = predict_df['y'].astype('uint32')
predict_df['lst'] = pd.Series(predict_lst.flatten()).astype('float32')
predict_df['ndvi'] = pd.Series(predict_ndvi.flatten()).astype('float32')

predict_df = predict_df.loc[
    (predict_df['landcover'] != 9999) & (predict_df['lst'] != 0) & (predict_df['ndvi'] != 0)
]
predict_df


Unnamed: 0,x,y,landcover,lst,ndvi
1968,1968,0,2,319.631439,0.043480
1969,1969,0,2,319.631439,0.043480
1970,1970,0,2,319.631439,0.043480
1971,1971,0,2,319.631439,0.043480
1972,1972,0,2,319.631439,0.043480
...,...,...,...,...,...
12283281,2385,3312,5,307.238220,0.340860
12283282,2386,3312,5,307.238220,0.340860
12286985,2381,3313,5,306.947662,0.383189
12286986,2382,3313,5,306.947662,0.383189


In [11]:
sufix = '_simulation_base'
predict_path = f'{samples_base_path}/{city}_predict{sufix}.csv'
print(f'Saving predict in {predict_path}')
predict_df = predict_df.reset_index(drop=True)

predict_df.loc[
    (predict_df['landcover'] != 9999)]
predict_df.to_csv(predict_path)

Saving predict in training_samples/MILANO_predict_simulation_base.csv


## Build simulation base from summer mosaics

The simulation base is created for the city of Milan (comune), as the vegetation simulations will be carried out only at this level.

In [6]:
lst_folder = f"{landsat_raster_folder}/summer_avg"
lst_file_list = os.listdir(f"{lst_folder}")

In [9]:
predict_n= None

with rasterio.open(landcover_path, driver="GTiff") as landcover_raster:
    landcover_array = landcover_raster.read(1)
    landcover_transform = landcover_raster.transform
    landcover_crs = landcover_raster.crs
    print('Read land cover')

    predict_n = np.zeros_like(landcover_array, dtype=int)
    predict_lst = np.zeros_like(landcover_array, dtype=float)
    predict_ndvi = np.zeros_like(landcover_array, dtype=float)
    predict_ndbi = np.zeros_like(landcover_array, dtype=float)
    predict_albedo = np.zeros_like(landcover_array, dtype=float)
    predict_uhii = np.zeros_like(landcover_array, dtype=float)
    

for f in lst_file_list:
    if check_wrong_files(f):
        continue

    print(f'Processing {f}')
    year = int(f)
    
    if year >= 2020:
        # Load Landsat-derived rasters
        
        with rasterio.open(f"{lst_folder}/{f}/uhi_int_{year}.tif", driver="GTiff") as uhii_raster:
            print('read UHII')
            #uhii_array = uhii_raster.read(1)
            uhii_resampled = np.empty_like(landcover_array, dtype=np.float32)

            reproject(
                source=rasterio.band(uhii_raster, 1),
                destination=uhii_resampled,
                src_transform=uhii_raster.transform,
                src_crs=uhii_raster.crs,
                dst_transform=landcover_transform,
                dst_crs=landcover_crs,
                resampling=Resampling.bilinear
            )
            
        with rasterio.open(f"{lst_folder}/{f}/NDVI_{year}.tif", driver="GTiff") as ndvi_raster:
            #ndvi_array = ndvi_raster.read(1)
            print('Read NDVI')
            ndvi_resampled = np.empty_like(landcover_array, dtype=np.float32)

            reproject(
                source=rasterio.band(ndvi_raster, 1),
                destination=ndvi_resampled,
                src_transform=ndvi_raster.transform,
                src_crs=ndvi_raster.crs,
                dst_transform=landcover_transform,
                dst_crs=landcover_crs,
                resampling=Resampling.bilinear
            )

        with rasterio.open(f"{lst_folder}/{f}/NDBI_{year}.tif", driver="GTiff") as ndbi_raster:
            #ndbi_array = ndbi_raster.read(1)
            ndbi_resampled = np.empty_like(landcover_array, dtype=np.float32)

            reproject(
                source=rasterio.band(ndbi_raster, 1),
                destination=ndbi_resampled,
                src_transform=ndbi_raster.transform,
                src_crs=ndbi_raster.crs,
                dst_transform=landcover_transform,
                dst_crs=landcover_crs,
                resampling=Resampling.bilinear
            )

        with rasterio.open(f"{lst_folder}/{f}/albedo_{year}.tif", driver="GTiff") as albedo_raster:
            #albedo_array = albedo_raster.read(1)

            albedo_resampled = np.empty_like(landcover_array, dtype=np.float32)

            reproject(
                source=rasterio.band(albedo_raster, 1),
                destination=albedo_resampled,
                src_transform=albedo_raster.transform,
                src_crs=albedo_raster.crs,
                dst_transform=landcover_transform,
                dst_crs=landcover_crs,
                resampling=Resampling.bilinear
            )

        with rasterio.open(f"{lst_folder}/{f}/LST_{year}.tif", driver="GTiff") as lst_raster:
            #lst_array = lst_raster.read(1)

            lst_resampled = np.empty_like(landcover_array, dtype=np.float32)

            reproject(
                source=rasterio.band(lst_raster, 1),
                destination=lst_resampled,
                src_transform=lst_raster.transform,
                src_crs=lst_raster.crs,
                dst_transform=landcover_transform,
                dst_crs=landcover_crs,
                resampling=Resampling.bilinear
            )

        # Exclude invalid pixels from predictions
        valid_pixels = (landcover_array != LC_NO_DATA) & (ndvi_resampled != NO_DATA) & (ndbi_resampled != NO_DATA) & (ndbi_resampled != NO_DATA) & (lst_resampled != NO_DATA) & (uhii_resampled != NO_DATA)

        #Add cumulative pixels if valid
        predict_n += valid_pixels  # Track valid pixel count
        predict_lst[valid_pixels] += lst_resampled[valid_pixels]
        predict_ndvi[valid_pixels] += ndvi_resampled[valid_pixels]
        predict_ndbi[valid_pixels] += ndbi_resampled[valid_pixels]
        predict_albedo[valid_pixels] += albedo_resampled[valid_pixels]
        predict_uhii[valid_pixels] += uhii_resampled[valid_pixels]
        


Read land cover
Processing 2016
Processing 2021
read UHII
Read NDVI
Processing 2022
read UHII
Read NDVI
Processing 2015
Processing 2019
Processing 2020
read UHII
Read NDVI
Processing 2018


In [10]:
# Compute final prediction values, avoiding division by zero
valid_mask = predict_n > 0
predict_lst[valid_mask] /= predict_n[valid_mask]
predict_ndvi[valid_mask] /= predict_n[valid_mask]
predict_ndbi[valid_mask] /= predict_n[valid_mask]
predict_albedo[valid_mask] /= predict_n[valid_mask]
predict_uhii[valid_mask] /= predict_n[valid_mask]

# Convert to DataFrame
predict_df = pd.DataFrame({
    'x': np.tile(np.arange(landcover_array.shape[1]), landcover_array.shape[0]),
    'y': np.repeat(np.arange(landcover_array.shape[0]), landcover_array.shape[1]),
    'landcover': landcover_array.flatten().astype('int32'),
    'lst': predict_lst.flatten().astype('float32'),
    'ndvi': predict_ndvi.flatten().astype('float32'),
    'ndbi': predict_ndbi.flatten().astype('float32'),
    'albedo': predict_albedo.flatten().astype('float32'),
    'uhii': predict_uhii.flatten().astype('float32')
})

In [11]:
predict_df = predict_df.loc[
    (predict_df['landcover'] != LC_NO_DATA) & (predict_df['lst'] != 0) 
]
predict_df

Unnamed: 0,x,y,landcover,lst,ndvi,ndbi,albedo,uhii
1968,1968,0,2,319.141266,0.080655,0.082474,0.213476,8.545837
1969,1969,0,2,319.141266,0.080655,0.082474,0.213476,8.545837
1970,1970,0,2,319.141266,0.080655,0.082474,0.213476,8.545837
1971,1971,0,2,319.141266,0.080655,0.082474,0.213476,8.545837
1972,1972,0,2,319.141266,0.080655,0.082474,0.213476,8.545837
...,...,...,...,...,...,...,...,...
12283281,2385,3312,5,306.759674,0.743142,-0.341891,0.127842,-3.835744
12283282,2386,3312,5,306.759674,0.743142,-0.341891,0.127842,-3.835744
12286985,2381,3313,5,306.472748,0.773112,-0.405859,0.149351,-4.122671
12286986,2382,3313,5,306.472748,0.773112,-0.405859,0.149351,-4.122671


In [14]:
predict_df.describe()

Unnamed: 0,x,y,landcover,lst,ndvi,ndbi,albedo,uhii
count,7201808.0,7201808.0,7201808.0,7201808.0,7201808.0,7201808.0,7201808.0,7201808.0
mean,2073.025,1496.416,2.701101,315.8794,0.4258152,-0.09488178,0.1391834,5.283631
std,839.8601,744.1084,1.549181,3.443218,0.1973479,0.1427672,0.03267613,3.443218
min,0.0,0.0,1.0,302.1594,-0.2089615,-0.5308375,0.007120991,-8.436025
25%,1460.0,885.0,1.0,314.4652,0.2592635,-0.1771116,0.1157809,3.869792
50%,2145.0,1482.0,2.0,316.4763,0.4093181,-0.07365351,0.1373317,5.88091
75%,2737.0,2050.0,4.0,317.9955,0.5710024,0.007526055,0.1599618,7.400034
max,3707.0,3313.0,6.0,329.1477,5.756988,0.3684908,0.8368582,18.55228


In [15]:
sufix = '_simulation_base_summer_mosaic'
samples_base_path = 'training_samples'
predict_path = f'{samples_base_path}/{city}_predict{sufix}.csv'
print(f'Saving predict in {predict_path}')
predict_df = predict_df.reset_index(drop=True)

predict_df.to_csv(predict_path)

Saving predict in training_samples/MILANO_predict_simulation_base_summer_mosaic.csv
