In [1]:
import sys
sys.path.append("../..")

#Defining libraries
import os
import math
from datetime import date, timedelta
import pandas as pd
import xarray as xr
import plotly.graph_objects as go
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
from shapely.geometry import box
from scipy.interpolate import griddata, interpn
import datacube
from copy import deepcopy
import statsmodels.api as sm

import rasterio
from rasterio.plot import show
from rasterio.mask import mask
from rasterio.windows import Window
from rasterio.warp import reproject, Resampling
#from rasterio.enums import Resampling
from rasterio.transform import from_origin
from rasterio.windows import Window

import matplotlib.pyplot as plt

from modules import processing_module as processing

2025-02-10 18:15:30.204838: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-10 18:15:30.206095: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-10 18:15:30.228928: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-10 18:15:30.229374: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
'''
1-11-> Residential urban areas 
2-121,13->Industrial and abbandoned urban areas
3-122,123,124 Transportation infrastructure (streets, highways, airports, and ports)
4-14->Urban green areas
5-2->Agricultural areas
6-3->Forest
7-4/5->Hydro and humid bodies
'''
#Convert from copernicus code 2018 to an internal code
URBAN = 1
INDUSTRIAL = 2
TRANSPORTATION = 3
URBAN_VEGETATION = 4
RURAL = 5
FOREST = 6
WATER = 7

    
# Function to check if the file is a tiff and must be read.
def check_wrong_files(file_name):
    if f == 'clip': return True #avoid entering the "clip" folder
    if 'ipynb' in f: return True #avoid entering the "ipynb_checkpoint" file
    if 'tar' in f: return True #avoid entering "tar" files
    if 'aux' in f: return True #avoid entering "aux" files
    return False

def match_landsat_to_landcover(landsat):
    year = int(landsat[17:21])
    if year in [2015,2016]:
        return str(2015)
    elif year in [2017,2018,2019]:
        return str(2018)
    elif year in [2020,2021,2022]:
        return str(2021)

In [3]:
# City parameters and global variables
city_info = {
    "resolution": 5,
    "epsg": 32632,
    "capitalized": "Milan"
}

city = 'MILANO'
current_city_info = city_info
city_epsg = current_city_info['epsg']
data_folder = "data"
landcover_path = f'{data_folder}/landcover/DUSAF_2021_milano_mapped.tif'

landsat_raster_folder = "/home/user/ODC_harmonia/Landsat/Milan/data"
sat_images_path = f"{landsat_raster_folder}/clip"
file_list = os.listdir(f"{sat_images_path}")

#landsat_raster_file_list = os.listdir(f"{landsat_raster_folder}")



In [4]:
predict_lst = None
predict_ndvi = None

In [6]:

predict_n = 0
predict_lst = None
predict_ndvi = None

for f in file_list:
    if check_wrong_files(f): continue

    file_date_string = f.split('_')[3] #example: LC08_L2SP_194028_20160825_20200906_02_T1_LST
    year = match_landsat_to_landcover(f)
    if year == '2021':
        print(f'Processing {f}')
    
        with rasterio.open(landcover_path, driver="GTiff") as landcover_raster:
            landcover_array = landcover_raster.read(1)
            #print(landcover_raster.profile)
            print('Read land cover')
            rows, cols = landcover_array.shape
            x_positions = np.arange(0, cols)
            y_positions = np.arange(0, rows)
            x, y = np.meshgrid(x_positions, y_positions)
            x_flat = x.flatten()
            y_flat = y.flatten()
            values_flat = landcover_array.flatten()

            # Create a DataFrame for the Landcover 
            landcover_df = pd.DataFrame({'x': x_flat, 'y': y_flat, 'landcover': values_flat})
            landcover_df['landcover'] = landcover_df['landcover']
            
        if not isinstance(predict_lst,np.ndarray) and not isinstance(predict_ndvi,np.ndarray):
            predict_lst = np.zeros_like(landcover_array)
            predict_ndvi = np.zeros_like(landcover_array)

        #columns in the end: x,y,landcover,ndvi,raster
        train_df = landcover_df.copy()

        #add the uhi column
        with rasterio.open(f"{sat_images_path}/{f}/{f}_uhi.tif", driver="GTiff") as uhi_raster:
            print('read UHI')
            uhi_array = uhi_raster.read(1) #UHI band
            uhi_flat = uhi_array.flatten()
            train_df['uhi'] = pd.Series(uhi_flat).astype('int16')

        #add the ndvi column
        with rasterio.open(f"{sat_images_path}/{f}/{f}_NDVI.TIF", driver="GTiff") as ndvi_raster:
            
            # Prepare an empty array with the same shape as land cover
            ndvi_resampled = np.empty_like(landcover_array, dtype=np.float32)

            # Perform reprojection and resampling
            reproject(
                source=rasterio.band(lst_raster, 1),
                destination=lst_resampled,
                src_transform=lst_raster.transform,
                src_crs=lst_raster.crs,
                dst_transform=landcover_transform,
                dst_crs=landcover_crs,
                resampling=Resampling.bilinear  # Bilinear for continuous data like temperature
            )
            ndvi_array = lst_resampled
            
            print('read NDVI')
            ndvi_array = ndvi_raster.read(1) #UHI band
            ndvi_flat = ndvi_array.flatten()
            train_df['ndvi'] = pd.Series(ndvi_flat).astype('float32')

        #add the LST column
        with rasterio.open(f"{sat_images_path}/{f}/{f}_LST.TIF", driver="GTiff") as lst_raster:
            print('read LST')
            
            # Prepare an empty array with the same shape as land cover
            lst_resampled = np.empty_like(landcover_array, dtype=np.float32)

            # Perform reprojection and resampling
            reproject(
                source=rasterio.band(lst_raster, 1),
                destination=lst_resampled,
                src_transform=lst_raster.transform,
                src_crs=lst_raster.crs,
                dst_transform=landcover_transform,
                dst_crs=landcover_crs,
                resampling=Resampling.bilinear  # Bilinear for continuous data like temperature
            )
            lst_array = lst_resampled
            
            #lst_array = lst_raster.read(1) #UHI band
            lst_flat = lst_array.flatten()
            train_df['lst'] = pd.Series(lst_flat).astype('float32')

        if int(year) >= 2020:
            predict_n += 1
            predict_lst = np.where(landcover_array != 9999, (predict_lst + lst_array), 9999)
            predict_ndvi = np.where(landcover_array != 9999, (predict_ndvi + ndvi_array), 9999)


    

Processing LC08_L2SP_194028_20220725_20220802_02_T1
Read land cover
read UHI
read NDVI
read LST


ValueError: operands could not be broadcast together with shapes (3314,3708) (10691,13202) 

In [7]:
predict_n = 0
predict_lst = None
predict_ndvi = None

for f in file_list:
    if check_wrong_files(f): continue

    file_date_string = f.split('_')[3]  # example: LC08_L2SP_194028_20160825_20200906_02_T1_LST
    year = match_landsat_to_landcover(f)
    if year == '2021':
        print(f'Processing {f}')
    
        with rasterio.open(landcover_path, driver="GTiff") as landcover_raster:
            landcover_array = landcover_raster.read(1)
            landcover_transform = landcover_raster.transform
            landcover_crs = landcover_raster.crs
            print('Read land cover')

        if not isinstance(predict_lst, np.ndarray) and not isinstance(predict_ndvi, np.ndarray):
            predict_lst = np.zeros_like(landcover_array, dtype=np.float32)
            predict_ndvi = np.zeros_like(landcover_array, dtype=np.float32)

        # Add the NDVI column
        with rasterio.open(f"{sat_images_path}/{f}/{f}_NDVI.TIF", driver="GTiff") as ndvi_raster:
            print('Read NDVI')
            ndvi_resampled = np.empty_like(landcover_array, dtype=np.float32)

            reproject(
                source=rasterio.band(ndvi_raster, 1),
                destination=ndvi_resampled,
                src_transform=ndvi_raster.transform,
                src_crs=ndvi_raster.crs,
                dst_transform=landcover_transform,
                dst_crs=landcover_crs,
                resampling=Resampling.bilinear
            )

        # Add the LST column
        with rasterio.open(f"{sat_images_path}/{f}/{f}_LST.TIF", driver="GTiff") as lst_raster:
            print('Read LST')
            lst_resampled = np.empty_like(landcover_array, dtype=np.float32)

            reproject(
                source=rasterio.band(lst_raster, 1),
                destination=lst_resampled,
                src_transform=lst_raster.transform,
                src_crs=lst_raster.crs,
                dst_transform=landcover_transform,
                dst_crs=landcover_crs,
                resampling=Resampling.bilinear
            )

        if int(year) >= 2020:
            predict_n += 1
            predict_lst = np.where(landcover_array != 9999, (predict_lst + lst_resampled), 9999)
            predict_ndvi = np.where(landcover_array != 9999, (predict_ndvi + ndvi_resampled), 9999)

Processing LC08_L2SP_194028_20220725_20220802_02_T1
Read land cover
Read NDVI
Read LST
Processing LC08_L2SP_194028_20220709_20220721_02_T1
Read land cover
Read NDVI
Read LST
Processing LC08_L2SP_194028_20210706_20210713_02_T1
Read land cover
Read NDVI
Read LST
Processing LC08_L2SP_194028_20200820_20200905_02_T1
Read land cover
Read NDVI
Read LST
Processing LC08_L2SP_194028_20220810_20220818_02_T1
Read land cover
Read NDVI
Read LST
Processing LC08_L2SP_194028_20210722_20210729_02_T1
Read land cover
Read NDVI
Read LST
Processing LC08_L2SP_194028_20200719_20200911_02_T1
Read land cover
Read NDVI
Read LST


In [9]:
predict_n

7

In [10]:
# save the lst and ndvi predict
if predict_n == 0: predict_n = 0.0000001
predict_lst = np.where(landcover_array != 9999, (predict_lst / predict_n), 9999)
predict_ndvi = np.where(landcover_array != 9999, (predict_ndvi / predict_n), 9999)

predict_df = pd.DataFrame({'x': x_flat, 'y': y_flat})
predict_df['landcover'] = pd.Series(landcover_array.flatten()).astype('int32')
predict_df['x'] = predict_df['x'].astype('uint32')
predict_df['y'] = predict_df['y'].astype('uint32')
predict_df['lst'] = pd.Series(predict_lst.flatten()).astype('float32')
predict_df['ndvi'] = pd.Series(predict_ndvi.flatten()).astype('float32')

predict_df = predict_df.loc[
    (predict_df['landcover'] != 9999) & (predict_df['lst'] != 0) & (predict_df['ndvi'] != 0)
]
predict_df


Unnamed: 0,x,y,landcover,lst,ndvi
1968,1968,0,2,319.631439,0.043480
1969,1969,0,2,319.631439,0.043480
1970,1970,0,2,319.631439,0.043480
1971,1971,0,2,319.631439,0.043480
1972,1972,0,2,319.631439,0.043480
...,...,...,...,...,...
12283281,2385,3312,5,307.238220,0.340860
12283282,2386,3312,5,307.238220,0.340860
12286985,2381,3313,5,306.947662,0.383189
12286986,2382,3313,5,306.947662,0.383189


In [11]:
sufix = '_simulation_base'
predict_path = f'{samples_base_path}/{city}_predict{sufix}.csv'
print(f'Saving predict in {predict_path}')
predict_df = predict_df.reset_index(drop=True)

predict_df.loc[
    (predict_df['landcover'] != 9999)]
predict_df.to_csv(predict_path)

Saving predict in training_samples/MILANO_predict_simulation_base.csv
