In [1]:
# Supress Warnings
import warnings
warnings.filterwarnings('ignore')

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Data Science
import numpy as np
import pandas as pd

# Multi-dimensional arrays and datasets
import xarray as xr

# Geospatial raster data handling
import rioxarray as rxr

# Geospatial data analysis
import geopandas as gpd

# Geospatial operations
import rasterio
from rasterio import windows  
from rasterio import features  
from rasterio import warp
from rasterio.warp import transform_bounds 
from rasterio.windows import from_bounds 
from shapely.geometry import Point

# Image Processing
from PIL import Image

# Coordinate transformations
from pyproj import Proj, Transformer, CRS

# Feature Engineering
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Machine Learning
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

# Planetary Computer Tools
import pystac_client
import planetary_computer as pc
from pystac.extensions.eo import EOExtension as eo

# Others
import os
from tqdm import tqdm

In [5]:
# Load the training data from csv file and display the first few rows to inspect the data
ground_df = pd.read_csv("../baseline/Training_data_uhi_index.csv")
ground_df.head()

Unnamed: 0,Longitude,Latitude,datetime,UHI Index
0,-73.909167,40.813107,24-07-2021 15:53,1.030289
1,-73.909187,40.813045,24-07-2021 15:53,1.030289
2,-73.909215,40.812978,24-07-2021 15:53,1.023798
3,-73.909242,40.812908,24-07-2021 15:53,1.023798
4,-73.909257,40.812845,24-07-2021 15:53,1.021634


In [6]:
# ground_df['datetime'].value_counts()
ground_df.groupby(['Longitude', 'Latitude']).agg({'datetime': 'nunique'}).sort_values('datetime', ascending=False)
# .reset_index(name='counts')

Unnamed: 0_level_0,Unnamed: 1_level_0,datetime
Longitude,Latitude,Unnamed: 2_level_1
-73.994457,40.771468,1
-73.916647,40.818755,1
-73.916707,40.816543,1
-73.916702,40.815750,1
-73.916692,40.811897,1
...,...,...
-73.951927,40.787617,1
-73.951910,40.769373,1
-73.951910,40.803873,1
-73.951905,40.811565,1


In [7]:
# `lower_left` and `upper_right` variables of the "Sentinel2_GeoTIFF" notebook
ground_df[['Longitude', 'Latitude']].describe()

Unnamed: 0,Longitude,Latitude
count,11229.0,11229.0
mean,-73.933927,40.8088
std,0.028253,0.023171
min,-73.994457,40.758792
25%,-73.955703,40.790905
50%,-73.932968,40.810688
75%,-73.909647,40.824515
max,-73.879458,40.859497


In [8]:
# Reads and plots four bands (B04, B08, B06, B01) from the GeoTIFF file.

# Open the GeoTIFF file
tiff_path = "../baseline/S2_sample.tiff"

# Read the bands from the GeoTIFF file
with rasterio.open(tiff_path) as src1:
    band1 = src1.read(1)  # Band [B01]
    band2 = src1.read(2)  # Band [B04]
    band3 = src1.read(3)  # Band [B06]
    band4 = src1.read(4)  # Band [B08]
    band1 = src1.read(5)  # Band [B02]
    band2 = src1.read(6)  # Band [B03]
    band3 = src1.read(7)  # Band [B05]
    band4 = src1.read(8)  # Band [B07]
    band2 = src1.read(9)  # Band [B8A]
    band3 = src1.read(10)  # Band [B11]
    band4 = src1.read(11)  # Band [B1]

# Plot the bands in a 2x2 grid
fig, axes = plt.subplots(2, 2, figsize=(10, 10))

# Flatten the axes for easier indexing
axes = axes.flatten()

# Plot the first band (B01)
im1 = axes[0].imshow(band1, cmap='viridis')
axes[0].set_title('Band [B01]')
fig.colorbar(im1, ax=axes[0])

# Plot the second band (B04)
im2 = axes[1].imshow(band2, cmap='viridis')
axes[1].set_title('Band [B04]')
fig.colorbar(im2, ax=axes[1])

# Plot the third band (B06)
im3 = axes[2].imshow(band3, cmap='viridis')                 
axes[2].set_title('Band [B06]')
fig.colorbar(im3, ax=axes[2])

# Plot the fourth band (B08)
im4 = axes[3].imshow(band4, cmap='viridis')
axes[3].set_title('Band [B08]')
fig.colorbar(im4, ax=axes[3])

# Plot the first band (B02)
im1 = axes[0].imshow(band1, cmap='viridis')
axes[0].set_title('Band [B02]')
fig.colorbar(im1, ax=axes[0])

# Plot the second band (B03)
im2 = axes[1].imshow(band2, cmap='viridis')
axes[1].set_title('Band [B03]')
fig.colorbar(im2, ax=axes[1])

# Plot the third band (B05)
im3 = axes[2].imshow(band3, cmap='viridis')                 
axes[2].set_title('Band [B05]')
fig.colorbar(im3, ax=axes[2])

# Plot the fourth band (B07)
im4 = axes[3].imshow(band4, cmap='viridis')
axes[3].set_title('Band [B07]')
fig.colorbar(im4, ax=axes[3])

# Plot the second band (B8A)
im2 = axes[1].imshow(band2, cmap='viridis')
axes[1].set_title('Band [B8A]')
fig.colorbar(im2, ax=axes[1])

# Plot the third band (B11)
im3 = axes[2].imshow(band3, cmap='viridis')                 
axes[2].set_title('Band [B11]')
fig.colorbar(im3, ax=axes[2])

# Plot the fourth band (B12)
im4 = axes[3].imshow(band4, cmap='viridis')
axes[3].set_title('Band [B12]')
fig.colorbar(im4, ax=axes[3])

plt.tight_layout()
plt.show()

RasterioIOError: S2_sample.tiff: No such file or directory