In [1]:
import numpy as np
import geopandas as gpd
import rasterio
import pandas as pd
from pyproj import Transformer
from shapely.geometry import Point
import hvplot.pandas
from shapely.geometry import Polygon, box
import panel as pn
from bokeh.palettes import Inferno256
import logging
import os
import zipfile
from datetime import datetime
import logging
import shutil
import panel as pn

#### Preprocessing data


##### Filtering by specific postal code for spatial map plotting (Tampines, Treatment Blocks)

In [1]:
# Getting coordinates of polygons
geojson_path = "C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\SG_geojson\\SG.geojson"
geo_data = gpd.read_file(geojson_path)

In [2]:
blocks_of_interest = [
    '899A', '899', '898A', '898', '890', '890A', '890B', '890C', '890D', '897', '897A',
    '895A', '895', '894A', '896', '896A', '896', '894', '893A', '893', '892A', '892', '891A', '891B', '891',
    '889', '889A', '887', '887A', '888',
    '827', '827A', '827B', '825', '824', '823', '823A', '828', '821', '822', '829',
    '830', '832', '834', '836', '838', '840', 
    '841', '835', '839', '837', '833', '831',
    '881', '882', '883', '884', '885', '885A', '886', '886A', '882A',
    '880', '880A', '879', '878', '877', '875', '876',
    '874A', '874', '873', '872', '871A', '871', '870', '865', '866', '869', '869A', '869B', '868', '867', '867A', '868D', '868C', '868B', '868A',
    '863B', '864', '864B', '864A', '863', '863A', '862A', '862', '857A', '857B', '857',
    '860B', '861', '861A', '859A', '859', '860', '860A', '858', '858A', '858B', '859B', 
    '849', '855', '853', '851', '847', '845', '843', '842', '846', '844', '846', '848', '850', '852', '854', '856'
]

# blocks removed = '828A', '826', '826A', '888A', 'Junyuan Secondary School'

polygons = {}
polygon_index = 0  # Initialize the polygon index counter

for block in blocks_of_interest:
    matching_features = geo_data[(geo_data['addr_street'].str.contains("Tampines", na=False)) & 
                                 (geo_data['addr_housenumber'] == block)]

    if not matching_features.empty:
        polygon = matching_features.iloc[0]['geometry']
        polygons[f'polygon_{block}'] = polygon
        polygon_index += 1  # Increment the counter each time a polygon is processed
        print(f"Polygon {polygon_index} for block {block}: {polygon}")
    else:
        print(f"No matching features found for block {block}.")

Polygon 1 for block 899A: POLYGON ((103.9347843 1.3463483, 103.9348304 1.3463296, 103.9348191 1.3463016, 103.9349043 1.346267, 103.9348942 1.346242, 103.9349178 1.3462324, 103.9349279 1.3462574, 103.9354963 1.3460266, 103.9354849 1.3459984, 103.9355116 1.3459876, 103.9355231 1.3460158, 103.9356069 1.3459817, 103.9356151 1.3460019, 103.9356605 1.3459834, 103.9357072 1.3460983, 103.935589 1.3461463, 103.9355806 1.3461257, 103.9355241 1.3461487, 103.9355325 1.3461692, 103.9353054 1.3462614, 103.935297 1.3462408, 103.9352131 1.3462749, 103.9352214 1.3462955, 103.9349969 1.3463867, 103.9349895 1.3463684, 103.934937 1.3463897, 103.9349444 1.3464079, 103.9348278 1.3464553, 103.9347843 1.3463483))
Polygon 2 for block 899: POLYGON ((103.9350812 1.346578, 103.9353029 1.346489, 103.9353114 1.3465101, 103.9353936 1.3464771, 103.9353851 1.3464559, 103.935609 1.346366, 103.9356179 1.3463884, 103.9356744 1.3463657, 103.9356655 1.3463433, 103.9357839 1.3462958, 103.935827 1.346403, 103.9357829 1.34642

In [3]:
print(len(polygons))

123


In [4]:
# Extract the polygon for Tampines West Community Club
tamp_west_community_club_features = geo_data[geo_data['name'].str.contains("Tampines West Community Club", na=False)]

if not tamp_west_community_club_features.empty:
    # Assuming you want the first occurrence if there are multiple
    community_club_polygon = tamp_west_community_club_features.iloc[0]['geometry']
    polygons['Tampines West Community Club'] = community_club_polygon
    print(f"Polygon for Tampines West Community Club added: {community_club_polygon}")
else:
    print("No matching features found for Tampines West Community Club.")

Polygon for Tampines West Community Club added: POLYGON ((103.9354216 1.3487426, 103.9354609 1.3487266, 103.9354474 1.3486936, 103.9359365 1.3484941, 103.9359531 1.3485347, 103.9359833 1.3485224, 103.9360003 1.348564, 103.9359896 1.3485683, 103.9360011 1.3485966, 103.9360372 1.3485819, 103.9360993 1.3487343, 103.9359589 1.3487922, 103.9359281 1.3488763, 103.9357717 1.348819, 103.9357551 1.3488643, 103.9357833 1.3489375, 103.9355413 1.3490361, 103.9354216 1.3487426))


In [5]:
# Extract the polygon for Tampines Central Community Club
tamp_central_community_club_features = geo_data[geo_data['name'].str.contains("Tampines Central Community Club", na=False)]

if not tamp_central_community_club_features.empty:
    # Assuming you want the first occurrence if there are multiple
    community_club_polygon = tamp_central_community_club_features.iloc[0]['geometry']
    polygons['Tampines Central Community Club'] = community_club_polygon
    print(f"Polygon for Tampines Central Community Club added: {community_club_polygon}")
else:
    print("No matching features found for Tampines Central Community Club.")

Polygon for Tampines Central Community Club added: POLYGON ((103.9341537 1.3553222, 103.9347155 1.3550296, 103.9347303 1.355043, 103.9347263 1.3551208, 103.9347444 1.3551208, 103.9347457 1.3553554, 103.9347015 1.3554023, 103.9347289 1.3554446, 103.9346974 1.3554714, 103.9346968 1.3554975, 103.9346612 1.3555304, 103.934686 1.3555639, 103.9346398 1.3555994, 103.9346103 1.3555666, 103.9345808 1.3555867, 103.9345668 1.3555636, 103.934285 1.3555967, 103.9342744 1.3555394, 103.9341537 1.3553222), (103.934279 1.3553661, 103.9343441 1.3555002, 103.9345157 1.3554734, 103.9345338 1.3554459, 103.9344466 1.355281, 103.934279 1.3553661), (103.9345365 1.3552233, 103.9346532 1.3553554, 103.9346579 1.3551529, 103.9345365 1.3552233))


In [6]:
# Extract the polygon for Lighthouse Evangelism
lighthouse_evangelism_features = geo_data[geo_data['name'] == "Lighthouse Evangelism"]

if not lighthouse_evangelism_features.empty:
    # Assuming you want the first occurrence if there are multiple
    community_club_polygon = lighthouse_evangelism_features.iloc[0]['geometry']
    polygons['Lighthouse Evangelism'] = community_club_polygon
    print(f"Polygon for Lighthouse Evangelism added: {community_club_polygon}")
else:
    print("No matching features found for Lighthouse Evangelism.")

Polygon for Lighthouse Evangelism added: POLYGON ((103.9351504 1.3496679, 103.9354429 1.3495486, 103.9354726 1.3496215, 103.9355208 1.3496018, 103.9355676 1.3497166, 103.9355144 1.3497383, 103.9355391 1.3497991, 103.9352493 1.3499172, 103.9352398 1.3498939, 103.935259 1.3498861, 103.9352368 1.3498318, 103.9352039 1.349751, 103.9351789 1.3496898, 103.9351622 1.3496967, 103.9351504 1.3496679))


In [7]:
# Extract the polygon for Singapore Soka Association Headquarters
ssa_headquarters_features = geo_data[geo_data['name'].str.contains("Singapore Soka Association Headquarters", na=False)]

if not ssa_headquarters_features.empty:
    # Assuming you want the first occurrence if there are multiple
    community_club_polygon = ssa_headquarters_features.iloc[0]['geometry']
    polygons['Singapore Soka Association Headquarters'] = community_club_polygon
    print(f"Polygon for Singapore Soka Association Headquarters added: {community_club_polygon}")
else:
    print("No matching features found for Singapore Soka Association Headquarters.")

Polygon for Singapore Soka Association Headquarters added: POLYGON ((103.9354882 1.3480382, 103.9355433 1.347902, 103.9355236 1.347894, 103.9355526 1.3478225, 103.9355741 1.3478312, 103.9355812 1.3478137, 103.9356812 1.3478542, 103.9356723 1.3478762, 103.9356925 1.3478844, 103.9356748 1.3479281, 103.9356603 1.3479222, 103.9356515 1.3479439, 103.9356817 1.3479562, 103.935668 1.34799, 103.9356762 1.3479933, 103.9356935 1.347986, 103.9356955 1.3479908, 103.9357105 1.3479846, 103.9358481 1.3483156, 103.9356535 1.3483965, 103.9355142 1.3480615, 103.9355296 1.348055, 103.9354882 1.3480382))


In [8]:
print(len(polygons))

127


In [9]:
# Getting postal code
postalcode_geojson_path = "C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\boundaries.geojson"

# Load GeoJSON data into a GeoDataFrame
postalcode_gdf = gpd.read_file(postalcode_geojson_path)

# print(postalcode_gdf.columns)

def extract_postal_codes(geojson_path, road_name_keyword, blocks_of_interest):
    # Filter for road name containing the specified keyword and block numbers of interest
    filtered_gdf = postalcode_gdf[
        (postalcode_gdf['ROAD_NAME'].str.contains(road_name_keyword, case=False, na=False)) &
        (postalcode_gdf['HOUSE_BLK_NO'].isin(blocks_of_interest))
    ]

    # Extract the postal codes
    postal_codes = filtered_gdf['POSTAL_CODE'].dropna().unique()

    return postal_codes

# Extract postal codes
postal_codes = extract_postal_codes(geojson_path, "Tampines", blocks_of_interest)

# Print the results
print(f"{len(postal_codes)} postal codes in Tampines for specified blocks:", postal_codes)

124 postal codes in Tampines for specified blocks: ['521823' '522891' '520894' '523890' '522890' '520890' '520896' '521898'
 '521887' '523868' '521869' '520897' '521895' '522869' '521897' '521894'
 '520889' '524868' '521892' '520822' '520825' '521827' '521896' '520891'
 '521874' '520886' '521886' '520862' '520824' '521889' '521890' '520821'
 '520863' '520870' '521864' '520895' '520898' '520823' '521899' '520885'
 '520867' '521862' '521868' '520877' '520871' '520872' '521882' '520869'
 '520866' '520899' '521867' '520865' '521885' '520864' '520882' '522863'
 '524890' '520884' '521893' '521880' '520878' '520876' '520868' '521891'
 '520892' '520879' '520893' '520880' '520888' '520881' '521871' '520875'
 '520828' '520829' '520883' '520873' '520874' '522868' '520842' '520844'
 '520846' '520848' '520852' '520854' '520845' '522827' '520887' '520847'
 '520827' '521863' '520833' '520830' '520831' '520832' '520834' '520836'
 '520838' '522858' '521858' '521861' '520840' '520858' '520855' '520853'


In [10]:
tampines_west_cc = postalcode_gdf[postalcode_gdf['BUILDING_NAME'].str.contains("Tampines West Community Club", case = False, na = False)]
tampines_central_cc = postalcode_gdf[postalcode_gdf['BUILDING_NAME'].str.contains("Tampines Central Community Complex", case = False, na = False)]
lighthouse_evangelism = postalcode_gdf[postalcode_gdf['BUILDING_NAME'] == 'LIGHTHOUSE EVANGELISM']
ssa_headquarters = postalcode_gdf[postalcode_gdf['BUILDING_NAME'] == 'SSA HEADQUARTERS']

tampines_west_cc_postal_codes = tampines_west_cc['POSTAL_CODE'].unique()
tampines_central_cc_postal_codes = tampines_central_cc['POSTAL_CODE'].unique()
lighthouse_evangelism_postal_codes = lighthouse_evangelism['POSTAL_CODE'].unique()
ssa_headquarters_postal_codes = ssa_headquarters['POSTAL_CODE'].unique()

postal_codes = np.concatenate((postal_codes, tampines_west_cc_postal_codes, tampines_central_cc_postal_codes, lighthouse_evangelism_postal_codes, ssa_headquarters_postal_codes))

print(f"{len(postal_codes)} postal codes generated in total, ", postal_codes)

128 postal codes generated in total,  ['521823' '522891' '520894' '523890' '522890' '520890' '520896' '521898'
 '521887' '523868' '521869' '520897' '521895' '522869' '521897' '521894'
 '520889' '524868' '521892' '520822' '520825' '521827' '521896' '520891'
 '521874' '520886' '521886' '520862' '520824' '521889' '521890' '520821'
 '520863' '520870' '521864' '520895' '520898' '520823' '521899' '520885'
 '520867' '521862' '521868' '520877' '520871' '520872' '521882' '520869'
 '520866' '520899' '521867' '520865' '521885' '520864' '520882' '522863'
 '524890' '520884' '521893' '521880' '520878' '520876' '520868' '521891'
 '520892' '520879' '520893' '520880' '520888' '520881' '521871' '520875'
 '520828' '520829' '520883' '520873' '520874' '522868' '520842' '520844'
 '520846' '520848' '520852' '520854' '520845' '522827' '520887' '520847'
 '520827' '521863' '520833' '520830' '520831' '520832' '520834' '520836'
 '520838' '522858' '521858' '521861' '520840' '520858' '520855' '520853'
 '520851' '52

In [11]:
# Define a dictionary to store coordinates
coordinates_dict = {}

# Function to get coordinates by postal code
def get_coordinates_by_postal_code(postal_code):
    postal_data = postalcode_gdf[postalcode_gdf['POSTAL_CODE'] == postal_code]
    if not postal_data.empty:
        # Extract the coordinates of the first matching entry
        longitude = postal_data.geometry.x.values[0]
        latitude = postal_data.geometry.y.values[0]
        return longitude, latitude
    else:
        return None, None

# Fetch coordinates for each postal code and store them
for postal_code in postal_codes:
    coordinates_dict[postal_code] = get_coordinates_by_postal_code(postal_code)

# Extract coordinates and calculate the central point
longitudes = []
latitudes = []

for postal_code, coords in coordinates_dict.items():
    if coords[0] is not None and coords[1] is not None:
        longitudes.append(coords[0])
        latitudes.append(coords[1])
        print(f'Coordinates for postal code {postal_code}: Longitude {coords[0]}, Latitude {coords[1]}')

if longitudes and latitudes:
    avg_longitude = np.mean(longitudes)
    avg_latitude = np.mean(latitudes)
    print(f'\nCentral coordinates:')
    print(f'Longitude: {avg_longitude}, Latitude: {avg_latitude}')
else:
    print('Coordinates for some or all postal codes not found.')

Coordinates for postal code 521823: Longitude 103.93355401503796, Latitude 1.3486572503763357
Coordinates for postal code 522891: Longitude 103.93058564623661, Latitude 1.3491614635609042
Coordinates for postal code 520894: Longitude 103.93181732184792, Latitude 1.3488144141057294
Coordinates for postal code 523890: Longitude 103.93376736825857, Latitude 1.3473135786926551
Coordinates for postal code 522890: Longitude 103.93340921678195, Latitude 1.346905557566624
Coordinates for postal code 520890: Longitude 103.93408640738006, Latitude 1.3470892580783935
Coordinates for postal code 520896: Longitude 103.93245173124072, Latitude 1.348303277441953
Coordinates for postal code 521898: Longitude 103.93488728387119, Latitude 1.346813182278691
Coordinates for postal code 521887: Longitude 103.93147422619111, Latitude 1.3501419480825714
Coordinates for postal code 523868: Longitude 103.9333343877725, Latitude 1.3558578015632896
Coordinates for postal code 521869: Longitude 103.93231742743029

In [12]:
# Converting x and y to coordinates for latitude/longitude
global filtered_df

def preprocessing(file_path):   
    global filtered_df
    
    # Open your GeoTIFF file
    with rasterio.open(file_path) as src:
        array = src.read()
        transform = src.transform
        src_crs = src.crs  # Source CRS
        # dest_crs = 'EPSG:4326'  # WGS 84

        # Create a transformer object to convert from src_crs to dest_crs
        transformer = Transformer.from_crs(src_crs, 'EPSG:4326', always_xy=True)

        # Get arrays of column and row indices
        cols, rows = np.meshgrid(np.arange(array.shape[2]), np.arange(array.shape[1]))
        
        # Convert meshgrid arrays to coordinate arrays using rasterio's method, which are 2D
        xs, ys = rasterio.transform.xy(transform, rows, cols, offset='center')
        
        # Flatten the coordinate arrays to pass to transform function
        lon, lat = transformer.transform(np.array(xs).flatten(), np.array(ys).flatten())

        # Create DataFrame and convert to GeoDataFrame
        df = pd.DataFrame({'Longitude': lon, 'Latitude': lat})
        for i, band in enumerate(src.read(masked=True)):
            df[src.descriptions[i]] = band.flatten()

        # # Convert 'SR_QA_AEROSOL' to integer for bitwise operation
        # df['SR_QA_AEROSOL'] = df['SR_QA_AEROSOL'].astype(int)

        # # Filter out pixels with valid aerosol retrieval and high aerosol level
        # # Assuming 'SR_QA_AEROSOL' is the name of the QA aerosol band in the data
        # valid_aerosol = (df['SR_QA_AEROSOL'] & 2) == 2  # Bit 1 must be set for valid retrieval
        # high_aerosol = (df['SR_QA_AEROSOL'] & 192) == 192  # Bits 6-7 must be set to 11 for high aerosol
        # filter_mask = valid_aerosol & high_aerosol
        # df_filtered = df[-filter_mask]

        df_filtered = df
        
        # Scale and offset specific bands
        df_filtered['ST_B6_Celsius'] = df_filtered['ST_B6'] * 0.00341802 + 149 - 273.15
        df_filtered = df_filtered[df_filtered['ST_B6_Celsius'] >= 20]  # Drop rows below 20 degrees Celsius
        
        bands_to_scale = ['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B7']
        for band in bands_to_scale:
            df_filtered[f"{band}_Scaled"] = df_filtered[band] * 2.75e-05 - 0.2

        additional_scales = {
            'SR_ATMOS_OPACITY': 0.001,
            'ST_ATRAN': 0.0001, 'ST_CDIST': 0.01, 'ST_DRAD': 0.001, 
            'ST_EMIS': 0.0001, 'ST_EMSD': 0.0001, 'ST_QA': 0.01, 
            'ST_TRAD': 0.001, 'ST_URAD': 0.001
        }

        for band, scale in additional_scales.items():
            df_filtered[f"{band}_Scaled"] = df_filtered[band] * scale

        gdf = gpd.GeoDataFrame(df_filtered, geometry=gpd.points_from_xy(df_filtered.Longitude, df_filtered.Latitude))
        gdf.set_crs('EPSG:4326', inplace=True)

        print("Total number of valid pixels: " + str(len(gdf)))
        print(df[['Latitude', 'Longitude']].head())

        gdf = gdf.to_crs('EPSG:3857')

        transformer_2 = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True)

        avg_longitude_3857, avg_latitude_3857 = transformer_2.transform(avg_longitude, avg_latitude)

        # Define your point of interest and buffer distance in meters
        poi = Point(avg_longitude_3857, avg_latitude_3857)
        desired_radius = 2000
        buffer = poi.buffer(desired_radius)  # Convert meters to degrees approximately

        # Filter points within the buffer
        filtered_gdf = gdf[gdf.geometry.within(buffer)]

        # Save or process your filtered data
        print(f"\nNumber of points within {desired_radius}m radius: {len(filtered_gdf)}")
        #print(filtered_gdf['ST_B10_Celsius'].head())

        filtered_gdf = filtered_gdf.to_crs('EPSG:4326')

    return filtered_gdf

##### Filtering 30m x 30m pixels based on region of interest

##### Using EPSG:3857 allows you to blow up the pixels in metres because the coordinate representation is in metres

In [13]:
# Suppress warnings
logging.getLogger('bokeh').setLevel(logging.ERROR)
pd.options.mode.chained_assignment = None  # default='warn'

global within_polygon_gdf

def plot_spatial_map(filtered_gdf): 
    global within_polygon_gdf
    
    filtered_gdf = filtered_gdf.to_crs('epsg:3857')

    # print(filtered_gdf['geometry'])

    # Create pixels as 30m x 30m boxes around each point
    # Assuming each point is at the center of the pixel
    half_width = 15  # half the width of the pixel in meters since the EPSG:3857 coordinate system is in metres
    filtered_gdf['geometry'] = filtered_gdf['geometry'].apply(lambda x: box(x.x - half_width, x.y - half_width, x.x + half_width, x.y + half_width))

    #print(filtered_gdf['geometry'])

    # print(polygons['Tampines West Community Club'])

    # Create a GeoDataFrame from all polygons and convert CRS to match
    polygon_gdf = gpd.GeoDataFrame({'geometry': list(polygons.values())}, crs='epsg:4326')
    polygon_gdf_3857 = polygon_gdf.to_crs('epsg:3857')

    # Filter points that intersect any polygon
    def intersects_any_polygon(point):
        return any(point.intersects(poly) for poly in polygon_gdf['geometry'])
    
    filtered_gdf['intersects'] = filtered_gdf['geometry'].apply(intersects_any_polygon)

    # Check intersection with any polygon
    within_polygon_gdf = filtered_gdf[filtered_gdf['intersects']].copy()

    # print(polygon_gdf_3857['geometry'])

    # Filter points that intersect any polygon
    filtered_gdf['intersects'] = filtered_gdf['geometry'].apply(
        lambda geom: any(geom.intersects(poly) for poly in polygon_gdf_3857['geometry']))
    within_polygon_gdf = filtered_gdf[filtered_gdf['intersects']].copy()

    print("Number of pixels in region of interest: " + str(len(within_polygon_gdf)))

    # Print or use the filtered GeoDataFrame as needed
    # print("\nNumber of points within the region of interest: " + str(len(within_polygon_gdf)))

    # # Print the centroids of the intersected pixels
    # for index, row in within_polygon_gdf.iterrows():
    #     centroid = row['geometry'].centroid
    #     print(f"Longitude: {centroid.x}, Latitude: {centroid.y}")

    # Define a function to select a subset of the color palette
    def select_colors(palette, n):
        return [palette[int(i)] for i in np.linspace(0, len(palette)-1, n)]

    # Create a custom color scale using a continuous palette
    custom_palette = select_colors(Inferno256, 256)  # More colors for smoother transitions

    # Create the heatmap using the centroid points of intersected pixels
    heatmap = within_polygon_gdf.hvplot.points('Longitude', 'Latitude', geo=True, c='ST_B6_Celsius', cmap=custom_palette, size=5, tiles='OSM', frame_width=700, frame_height=500, colorbar=True, clim=(20, 40))

    # Plot square polygons with the same color mapping as the points
    squares_plot = within_polygon_gdf.hvplot.polygons('geometry', c='ST_B6_Celsius', cmap=custom_palette, alpha=0.5, colorbar=True, clim=(20, 40))

    # Plot the polygon with visible settings
    polygon_plot = polygon_gdf.hvplot(geo=True, color='red', line_width=3, alpha=0.7)

    # Overlay the polygon onto the heatmap
    overlay_map = polygon_plot * heatmap * squares_plot

    # Set up Panel to display the plot
    # pane = pn.panel(overlay_map)

    # pane.show()
    # pane.save(f'C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\MSE-ES-UHI\\2_landsat\\Heatmaps\\{postal_code_112}_{satellite_image}_LST_Filtered.html', embed=True)

    return overlay_map

#### Plotting LST over time

##### Combining GDFs

In [14]:
# Required data is from 2022 - 2024
year = "2020"

# Suppress warnings
logging.getLogger('bokeh').setLevel(logging.ERROR)
pd.options.mode.chained_assignment = None  # default='warn'

# Specify the zip file and temporary directory for extraction
zip_file_path = f"C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\Landsat7\\{year}.zip"
temp_dir = f"C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\temp_extract"

# Create a temporary directory if it doesn't exist
os.makedirs(temp_dir, exist_ok=True)

# Extract the .tif files from the zip
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(temp_dir)

# Initialize an empty list to hold all the GeoDataFrames
gdfs = []

# Walk through the temporary directory and process each .tif file
for filename in os.listdir(f"{temp_dir}\\{year}"):
    if filename.endswith(".tif"):
        print("Currently processing: " + filename)
        file_path = os.path.join(f"{temp_dir}\\{year}", filename)
        
        # Extract the time period from the filename
        # Assuming filename format is "L8_UTC_YYYYMMDD_hhmmss.tif"
        time_str = filename.split('_')[2]
        time_obj = datetime.strptime(time_str, "%Y%m%d")
        
        # Load and preprocess the GeoDataFrame
        gdf = preprocessing(file_path)
        gdf['time'] = time_obj  # Append the datetime object as a new column
        
        # Append the processed GeoDataFrame to the list
        gdfs.append(gdf)

# Combine all GeoDataFrames into one
combined_gdf = pd.concat(gdfs)

shutil.rmtree(f"C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\temp_extract")

# Use the combined GeoDataFrame as needed
print(combined_gdf)

Currently processing: L7_UTC_20200403_025238.tif
Total number of valid pixels: 10985
   Latitude   Longitude
0  1.470099  103.589751
1  1.470099  103.590021
2  1.470099  103.590290
3  1.470099  103.590560
4  1.470100  103.590830

Number of points within 2000m radius: 0
Currently processing: L7_UTC_20200419_025142.tif
Total number of valid pixels: 1257048
   Latitude   Longitude
0  1.470099  103.589751
1  1.470099  103.590021
2  1.470099  103.590290
3  1.470099  103.590560
4  1.470100  103.590830

Number of points within 2000m radius: 6309
Currently processing: L7_UTC_20200505_025045.tif
Total number of valid pixels: 1295831
   Latitude   Longitude
0  1.470099  103.589751
1  1.470099  103.590021
2  1.470099  103.590290
3  1.470099  103.590560
4  1.470100  103.590830

Number of points within 2000m radius: 6751
Currently processing: L7_UTC_20200521_024946.tif
Total number of valid pixels: 1103794
   Latitude   Longitude
0  1.470099  103.589751
1  1.470099  103.590021
2  1.470099  103.5902

In [47]:
# import geopandas as gpd
# import hvplot.pandas
# import holoviews as hv
# from bokeh.palettes import Turbo256  # Import a predefined Bokeh palette

# # Assuming 'gdf' is your preloaded GeoDataFrame
# combined_gdf = combined_gdf.to_crs(epsg=3857)  # Convert to Web Mercator for better mapping support

# # Define a function to select a subset of the color palette
# def select_colors(palette, n):
#     return [palette[int(i)] for i in np.linspace(0, len(palette)-1, n)]

# # Create a custom color scale using a continuous palette
# custom_palette = select_colors(Turbo256, 256)  # More colors for smoother transitions

# # Create the heatmap
# heatmap = combined_gdf.hvplot.points('Longitude', 'Latitude', geo=True, c='ST_B10_Celsius',
#                             cmap=custom_palette, size=5,  # Smaller size for finer detail
#                             tiles='OSM', frame_width=700, frame_height=500,
#                             colorbar=True, clim=(20, 40))

# #file_path = "C:/LocalOneDrive/Documents/Desktop/MTI/UHI-Project/MSE-ES-UHI/MSE-ES-UHI/2_landsat/Heatmaps"

# # Set up Panel to display the plot
# heatmap_panel = hv.save(heatmap, '270524_hvPlot_Land_Surface_Temperature_Map_gradient.html', backend='bokeh')

# # Display the plot in the notebook
# heatmap_panel

##### Spatial plot over time

In [110]:
# Create an interactive plot with filtering based on the GeoDataFrame
def create_interactive_plot(combined_gdf):
    # Create a list of unique dates sorted
    unique_dates = combined_gdf['time'].dt.strftime('%Y-%m-%d').sort_values().unique()
    # print(f"Unique Dates: {unique_dates}")

    date_index_map = {i + 1: date for i, date in enumerate(unique_dates)}

    # Setup an integer slider to select time periods
    time_slider = pn.widgets.IntSlider(name='Select Time', start=1, end=len(unique_dates), value=1, step=1)

    @pn.depends(time_slider.param.value_throttled)
    def dynamic_map(value):
        selected_date = date_index_map[value]
        selected_datetime = pd.to_datetime(selected_date).date()
        
        # Filter data for the selected time
        filtered_data = combined_gdf[combined_gdf['time'].dt.date == selected_datetime]
        print(f"Displaying plot for " + str(selected_date))
        
        # Call plot_spatial_map for the selected time period
        return plot_spatial_map(filtered_data)

    layout = pn.Column(
        "<br>\nInteractive Land Surface Temperature Map",
        time_slider,
        dynamic_map
    )

    return layout

layout = create_interactive_plot(combined_gdf)
# layout.servable()
pn.serve(layout, show=False, start=True)

Displaying plot for 2022-02-28
Number of pixels in region of interest: 623
Launching server at http://localhost:50327


<panel.io.server.Server at 0x1f852e24160>

#### Exporting data to .csv

In [15]:
def filter_and_save_data(year_gdf, polygons, output_file):
    # Convert polygons dictionary to a GeoDataFrame, setting CRS to EPSG:4326 and converting to EPSG:3857
    polygon_gdf = gpd.GeoDataFrame({'block_num': list(polygons.keys()), 'geometry': list(polygons.values())}, crs='epsg:4326')
    polygon_gdf = polygon_gdf.to_crs('epsg:3857')

    # Initialize an empty DataFrame to store all filtered data
    all_filtered_data = gpd.GeoDataFrame()

    for date in year_gdf['time'].dt.strftime('%Y-%m-%d').sort_values().unique():
        # Filter data for the specific date
        date_data = year_gdf[year_gdf['time'].dt.strftime('%Y-%m-%d') == date]

        # Convert CRS to EPSG:3857 and create 30m x 30m boxes around each point
        date_data = date_data.to_crs('epsg:3857')
        date_data['geometry'] = date_data['geometry'].apply(
            lambda x: box(x.x - 15, x.y - 15, x.x + 15, x.y + 15))

        # Initialize an empty list to store block numbers for each point
        block_numbers = []

        # Check each point for intersection with any polygon and store the corresponding block number
        for point in date_data['geometry']:
            found = False
            for _, row in polygon_gdf.iterrows():
                if point.intersects(row['geometry']):
                    block_numbers.append(row['block_num'])
                    found = True
                    break
            if not found:
                block_numbers.append(None)

        # Add the block numbers to the date_data
        date_data['block_num'] = block_numbers

        # Filter points that intersect any polygon
        filtered_data = date_data[date_data['block_num'].notnull()].copy()

        # Append the filtered data of this date to the all_filtered_data DataFrame
        all_filtered_data = pd.concat([all_filtered_data, filtered_data], ignore_index=True)

    # Drop the 'geometry' column as it cannot be saved directly in CSV format
    all_filtered_data.drop(columns=['geometry'], inplace=True)

    # Save the aggregated filtered data to a CSV file
    all_filtered_data.to_csv(output_file, index=False)
    print(f"Data successfully exported to {output_file}")

combined_gdf['time'] = pd.to_datetime(combined_gdf['time'])  # Ensure 'time' is a datetime object
output_path = 'C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\FilteredData\\Tampines\\Landsat7\\Tampines_Treatment_Filtered_2020.csv'
filter_and_save_data(combined_gdf, polygons, output_path)

Data successfully exported to C:\LocalOneDrive\Documents\Desktop\MTI\UHI-Project\MSE-ES-UHI\Data\FilteredData\Tampines\Landsat7\Tampines_Treatment_Filtered_2020.csv


##### Codes to combine .csv files for 2022 - 2024

In [16]:
# Define the base file path
base_path = r"C:\LocalOneDrive\Documents\Desktop\MTI\UHI-Project\MSE-ES-UHI\Data\FilteredData\Tampines\Landsat7"

# File names
files = [
    r"Tampines_Treatment_Filtered_2020.csv",
    r"Tampines_Treatment_Filtered_2021.csv",
    r"Tampines_Treatment_Filtered_2022.csv"
    # r"Tampines_Treatment_Filtered_2023.csv",
    # r"Tampines_Treatment_Filtered_2024.csv"
]

# Read and concatenate the CSV files
df_list = [pd.read_csv(f"{base_path}\\{file_name}") for file_name in files]
combined_df = pd.concat(df_list, ignore_index=True)

# Save the combined DataFrame to a new CSV file
combined_df.to_csv(f"{base_path}\\Tampines_Treatment_Filtered_2020_to_2022.csv", index=False)

print("Files were successfully concatenated and saved.")

Files were successfully concatenated and saved.
