#### Preprocessing data


##### Filtering by specific postal code for spatial map plotting (Hawker Centres)

In [23]:
import pandas as pd
import tabula.io as tabula

file_path = "C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\list-of-hawkercentres_28-sep-2023.pdf"
output_csv_path = "C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\concatenated_hawker_centres.csv"

# Read the PDF and extract all pages
hawker_centres_df_list = tabula.read_pdf(file_path, pages='all', multiple_tables=True)

# Process each DataFrame to set the first row as the header
for index, df in enumerate(hawker_centres_df_list):
    df.columns = df.iloc[0]  # Set the first row as column names
    df = df[1:].reset_index(drop=True)  # Drop the first row and reset index
    globals()[f'df_{index}'] = df  # Save each DataFrame to a global variable named df_index
    print(f"DataFrame {index} header set and stored as df_{index}")
    print(df)  # Display the updated DataFrame

df_1.columns = df_0.columns
df_2.columns = df_0.columns

# Concatenate df_0, df_1, and df_2
concatenated_df = pd.concat([df_0, df_1, df_2], ignore_index=True)

# Display the concatenated dataframe
# print(concatenated_df)

concatenated_df.to_csv(output_csv_path, index=False)
print(f"Concatenated DataFrame saved to {output_csv_path}") #csv has been edited for ease of data manipulation

DataFrame 0 header set and stored as df_0
0  S/No           Name of Hawker Centre  \
0     1                Adam Food Centre   
1     2         Amoy Street Food Centre   
2     3               Bedok Food Centre   
3     4             Beo Crescent Market   
4     5              Berseh Food Centre   
..  ...                             ...   
58   59  Blk 726 Clementi West Street 2   
59   60              Blk 7 Empress Road   
60   61           Blk 4A Eunos Crescent   
61   62            Blk 69 Geylang Bahru   
62   63            Blk 20 Ghim Moh Road   

0                                             Address  \
0                             2, Adam Road, S(289876)   
1   National Development Building, Annex B, Telok ...   
2                            1, Bedok Road, S(469572)   
3                        38A, Beo Crescent, S(169982)   
4                         166, Jalan Besar, S(208877)   
..                                                ...   
58         Blk 726, Clementi West Street 2

In [2]:
import pandas as pd

edited_csv_path = "C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\hawkers_list_edited.csv"

hawker_concatenated_df = pd.read_csv(edited_csv_path)

print(hawker_concatenated_df)

     S/No              Name of Hawker Centre  \
0       1                   Adam Food Centre   
1       2            Amoy Street Food Centre   
2       3                  Bedok Food Centre   
3       4                Beo Crescent Market   
4       5                 Berseh Food Centre   
..    ...                                ...   
122   123    Pasir Ris Central Hawker Centre   
123   124  Fernvale Hawker Centre and Market   
124   125          One Punggol Hawker Centre   
125   126                Senja Hawker Centre   
126   127       Bukit Canberra Hawker Centre   

                                               Address  \
0                              2, Adam Road, S(289876)   
1    National Development Building, Telok Ayer Stre...   
2                             1, Bedok Road, S(469572)   
3                         38A, Beo Crescent, S(169982)   
4                          166, Jalan Besar, S(208877)   
..                                                 ...   
122              

In [3]:
import pandas as pd
import re

# Function to extract postal codes and addresses, and map them to hawker centre names
def extract_postal_codes_and_addresses(df):
    data = []
    index_count = 1

    for index, row in df.iterrows():
        # Skip rows with missing information
        if pd.isna(row['Name of Hawker Centre']) or pd.isna(row['Address']):
            print(f"Skipped row {index} due to missing essential information.")
            continue

        # Extract the name of the hawker centre
        name = row['Name of Hawker Centre']

        # Extract address and postal code
        address_field = str(row['Address'])
        # Replace 'Ave' not followed by 'nue' with 'Avenue'
        address_field = re.sub(r'\bAve\b(?!\nue)', 'Avenue', address_field)
        postal_codes = re.findall(r'\b(\d{6})\b', address_field)
        address_without_postal = re.sub(r',?\s*S\(\d{6}\)', '', address_field).strip()

        for postal_code in postal_codes:
            data.append({
                "Hawker Centre": name,
                "Postal Code": postal_code,
                "Address": address_without_postal
            })
            print(f"{index_count}. Hawker Centre: {name}, Postal Code: {postal_code}, Address: {address_without_postal}")
            index_count += 1

    # Convert list of dictionaries to DataFrame
    return pd.DataFrame(data)

# Example usage
# Assuming concatenated_df is defined and valid
hawker_df_processed = extract_postal_codes_and_addresses(hawker_concatenated_df)

# # Print the DataFrame
# print(len(hawker_df))

1. Hawker Centre: Adam Food Centre, Postal Code: 289876, Address: 2, Adam Road
2. Hawker Centre: Amoy Street Food Centre, Postal Code: 069111, Address: National Development Building, Telok Ayer Street
3. Hawker Centre: Bedok Food Centre, Postal Code: 469572, Address: 1, Bedok Road
4. Hawker Centre: Beo Crescent Market, Postal Code: 169982, Address: 38A, Beo Crescent
5. Hawker Centre: Berseh Food Centre, Postal Code: 208877, Address: 166, Jalan Besar
6. Hawker Centre: Bukit Timah Market, Postal Code: 588215, Address: 51, Upper Bukit Timah Road
7. Hawker Centre: Chomp Chomp Food Centre, Postal Code: 557269, Address: 20, Kensington Park Road
8. Hawker Centre: Commonwealth Crescent Market, Postal Code: 149644, Address: 31, Commonwealth Crescent
9. Hawker Centre: Dunman Food Centre, Postal Code: 424768, Address: 271, Onan Road
10. Hawker Centre: East Coast Lagoon Food Village, Postal Code: 468960, Address: 1220, East Coast Parkway
11. Hawker Centre: Geylang Serai Market, Postal Code: 402001

In [4]:
print(hawker_df_processed)

                         Hawker Centre Postal Code  \
0                     Adam Food Centre      289876   
1              Amoy Street Food Centre      069111   
2                    Bedok Food Centre      469572   
3                  Beo Crescent Market      169982   
4                   Berseh Food Centre      208877   
..                                 ...         ...   
122    Pasir Ris Central Hawker Centre      519641   
123  Fernvale Hawker Centre and Market      797650   
124          One Punggol Hawker Centre      828629   
125                Senja Hawker Centre      677632   
126       Bukit Canberra Hawker Centre      756973   

                                              Address  
0                                        2, Adam Road  
1    National Development Building, Telok Ayer Street  
2                                       1, Bedok Road  
3                                   38A, Beo Crescent  
4                                    166, Jalan Besar  
..             

#### Obtaining polygons for hawker centres by closest match of address

In [5]:
# Getting coordinates of polygons
import geopandas as gpd
import json

geojson_path = "C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\SG_geojson\\SG.geojson"
geo_data = gpd.read_file(geojson_path)

buildings_json_path = "C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\buildings.json"
buildings_df = pd.read_json(buildings_json_path)

In [None]:
import geopandas as gpd

# Convert the geometry column to WKT format
geo_data['geometry_wkt'] = geo_data['geometry'].apply(lambda x: x.wkt)

# Save the GeoDataFrame to a CSV file
csv_path = "C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\SG_geojson\\SG_geo_data.csv"
geo_data.to_csv(csv_path, index=False)

print(f"Data saved to {csv_path}")

In [7]:
# Creating 'address_full' by concatenating 'addr_housenumber' and 'addr_street'
geo_data['address_full'] = geo_data.apply(lambda row: f"{row['addr_housenumber']}, {row['addr_street']}".strip(), axis=1)
print(geo_data['address_full'])

0                        None, None
1                       1, Libra Dr
2                    80, Jupiter Rd
3         14D, Serangoon Garden Way
4                        None, None
                    ...            
357578                   None, None
357579                   None, None
357580                   None, None
357581                   None, None
357582            36, Kelantan Road
Name: address_full, Length: 357583, dtype: object


In [8]:
from fuzzywuzzy import process

matched_data = []
unmatched_data = []

def find_best_match(hawker_df, geo_data, threshold=95):
    index_count = 1
    for index, hawker in hawker_df.iterrows():
        hawker_name = hawker['Hawker Centre']
        address = hawker['Address']
        postal = hawker['Postal Code']

        # Check for exact name matches first
        exact_name_matches = geo_data[geo_data['name'].str.contains(hawker_name, case=False, na=False)]
        if not exact_name_matches.empty:
            for _, row in exact_name_matches.iterrows():
                matched_data.append({
                    'Index': index_count,
                    'Hawker Centre': hawker_name,
                    'Postal': postal,
                    'Matched Name': row['name'],
                    'Matched Address': row['address_full'],
                    'Score': 100,
                    'Polygon': row['geometry']
                })
                print(f"Exact Match {index_count}: {hawker_name} -> {row['name']} (Score: 100)")
                index_count += 1
            continue

        # Fuzzy match for names if no exact matches found
        name_matches = process.extractOne(hawker_name, geo_data['name'].dropna().unique(), score_cutoff=threshold)
        if name_matches:
            matched_name, score = name_matches
            matched_row = geo_data[geo_data['name'] == matched_name]
            for _, row in matched_row.iterrows():
                matched_data.append({
                    'Index': index_count,
                    'Hawker Centre': hawker_name,
                    'Postal': postal,
                    'Matched Name': matched_name,
                    'Matched Address': row['address_full'],
                    'Score': score,
                    'Polygon': row['geometry']
                })
                print(f"Name Match {index_count}: {hawker_name} -> {matched_name} (Score: {score})")
                index_count += 1
            continue

        # Fuzzy match for addresses if no name matches found
        address_matches = process.extractOne(address, geo_data['address_full'].dropna().unique(), score_cutoff=threshold)
        if address_matches:
            matched_address, score = address_matches
            matched_row = geo_data[geo_data['address_full'] == matched_address]
            for _, row in matched_row.iterrows():
                matched_data.append({
                    'Index': index_count,
                    'Hawker Centre': hawker_name,
                    'Postal': postal,
                    'Matched Name': row['name'],
                    'Matched Address': matched_address,
                    'Score': score,
                    'Polygon': row['geometry']
                })
                print(f"Address Match {index_count}: {hawker_name} -> {matched_address} (Score: {score})")
                index_count += 1
        else:
            unmatched_data.append((hawker_name, postal, address))
            print(f"No suitable match found for: {hawker_name}")

    return matched_data

# Run the function and get the results
matched_results = find_best_match(hawker_df_processed, geo_data)

# Convert to DataFrame for easier manipulation and plotting
matched_df = pd.DataFrame(matched_results)

# Convert to GeoDataFrame
matched_gdf = gpd.GeoDataFrame(matched_df, geometry='Polygon')
print(matched_gdf)

Exact Match 1: Adam Food Centre -> Adam Food Centre (Score: 100)
Exact Match 2: Amoy Street Food Centre -> Amoy Street Food Centre (Score: 100)
Exact Match 3: Bedok Food Centre -> Bedok Food Centre (Score: 100)
Address Match 4: Beo Crescent Market -> 38A, Beo Crescent (Score: 100)
Exact Match 5: Berseh Food Centre -> Berseh Food Centre (Score: 100)
Exact Match 6: Bukit Timah Market -> Bukit Timah Market & Food Centre (Score: 100)
Exact Match 7: Chomp Chomp Food Centre -> Chomp Chomp Food Centre (Score: 100)
Address Match 8: Commonwealth Crescent Market -> 31, Commonwealth Crescent (Score: 100)
Exact Match 9: Dunman Food Centre -> Dunman Food Centre (Score: 100)
Address Match 10: East Coast Lagoon Food Village -> 1206, East Coast Parkway (Score: 96)
Name Match 11: Geylang Serai Market -> Geylang Serai Food Market (Score: 95)
Exact Match 12: Golden Mile Food Centre -> Golden Mile Food Centre (Score: 100)
Name Match 13: Holland Village Market & Food Centre -> Holland Village Market and Fo

In [9]:
# Convert the list of tuples into a DataFrame
unmatched_df = pd.DataFrame(unmatched_data, columns=['Hawker Centre', 'Postal', 'Address'])

# Print the DataFrame to verify
print(unmatched_df)

                       Hawker Centre  Postal  \
0        Market Street Hawker Centre  048947   
1           Blk 341 Ang Mo Kio Ave 1  560341   
2      Blk 226H Ang Mo Kio Street 22  568226   
3        Blk 85 Bedok North Street 4  460085   
4          Blk 2 Changi Village Road  500002   
5          Blk 3 Changi Village Road  500003   
6                Blk 89 Circuit Road  370089   
7             Blk 353 Clementi Ave 2  120353   
8                 Blk 7 Empress Road  260007   
9              Blk 22B Havelock Road  162022   
10     Blk 505 Jurong West Street 52  640505   
11             Blk 159 Mei Chin Road  140159   
12      Pek Kio Market & Food Centre  211041   
13               Blk 320 Shunfu Road  570320   
14  Hawker Centre @ Our Tampines Hub  528523   

                                     Address  
0   86, Market Street, CapitaSpring Building  
1               Blk 341, Ang Mo Kio Avenue 1  
2             Blk 226H, Ang Mo Kio Street 22  
3               Blk 85, Bedok North Street 

In [None]:
# Unmatched indexes: 0, 1, 6, 7, 10, 11, 12, 13
# 2 -> 117227115
# 3 -> 364714480
# 4 -> 17292513
# 5 -> 17292513
# 8 -> 152302087
# 9 -> 481534992
# 14 -> 231670457

##### Manually matching unmatched addresses

In [10]:
import pandas as pd
import geopandas as gpd
import warnings

# Suppress future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

matched_df_copy = matched_df

# Assuming matched_df is already defined
last_index = matched_df_copy.index[-1] if not matched_df_copy.empty else 0

# Index to OSM ID mapping
index_to_osm_id = {
    2: 117227115,
    3: 364714480,
    4: 17292513,
    5: 17292513,
    8: 152302087,
    9: 481534992,
    14: 231670457
}

for index, osm_id in index_to_osm_id.items():
    if index in unmatched_df.index:
        osm_geometry = geo_data.loc[geo_data['osm_id'] == osm_id, 'geometry'].values[0]
        hawker_name = unmatched_df.loc[index, 'Hawker Centre']
        postal = unmatched_df.loc[index, 'Postal']
        matched_address = unmatched_df.loc[index, 'Address']
        
        # Append to matched_df
        matched_df_copy = matched_df_copy.append({
            'Index': last_index + 1,
            'Hawker Centre': hawker_name,
            'Postal': postal,
            'Matched Name': hawker_name,  # Assuming you want to use the hawker name as the matched name
            'Matched Address': matched_address,
            'Score': 100,  # Assign a score of 100 for manual matches
            'Polygon': osm_geometry
        }, ignore_index=True)

        last_index += 1  # Increment the index for the next entry
        print(f"Appended geometry for index {last_index}, osm_id {osm_id}, hawker name: {hawker_name}")

# Verify by printing out part of the matched_df
print(matched_df_copy.tail())  # Print the last few entries to check the appended rows

Appended geometry for index 115, osm_id 117227115, hawker name: Blk 226H Ang Mo Kio Street 22
Appended geometry for index 116, osm_id 364714480, hawker name: Blk 85 Bedok North Street 4
Appended geometry for index 117, osm_id 17292513, hawker name: Blk 2 Changi Village Road
Appended geometry for index 118, osm_id 17292513, hawker name: Blk 3 Changi Village Road
Appended geometry for index 119, osm_id 152302087, hawker name: Blk 7 Empress Road
Appended geometry for index 120, osm_id 481534992, hawker name: Blk 22B Havelock Road
Appended geometry for index 121, osm_id 231670457, hawker name: Hawker Centre @ Our Tampines Hub
     Index                     Hawker Centre  Postal  \
117    117         Blk 2 Changi Village Road  500002   
118    118         Blk 3 Changi Village Road  500003   
119    119                Blk 7 Empress Road  260007   
120    120             Blk 22B Havelock Road  162022   
121    121  Hawker Centre @ Our Tampines Hub  528523   

                         Matched 

In [11]:
import pandas as pd

# Assuming 'matched_df_copy' is defined and has columns 'Hawker Centre' and 'Polygon'
def compile_polygons_to_dict(matched_df):
    polygon_dict = {}
    for index, row in matched_df.iterrows():
        hawker_name = row['Hawker Centre']
        polygon = row['Polygon']
        if hawker_name in polygon_dict:
            print(f"Duplicate entry for {hawker_name}. Existing polygon will be overwritten.")
        polygon_dict[hawker_name] = polygon
    return polygon_dict

# Here, 'matched_df_copy' should have columns named 'Hawker Centre' and 'Polygon'
polygons = compile_polygons_to_dict(matched_df_copy)

# Print the dictionary to verify
for hawker, poly in polygons.items():
    print(f"Hawker Centre: {hawker}, Polygon: {poly}")

Duplicate entry for Blk 538 Bedok North Street 3. Existing polygon will be overwritten.
Duplicate entry for Blk 221A Boon Lay Place. Existing polygon will be overwritten.
Duplicate entry for Chong Pang Market & Food Centre. Existing polygon will be overwritten.
Duplicate entry for Blk 6 Tanjong Pagar Plaza. Existing polygon will be overwritten.
Duplicate entry for Blk 6 Tanjong Pagar Plaza. Existing polygon will be overwritten.
Hawker Centre: Adam Food Centre, Polygon: POLYGON ((103.8139964 1.3241289, 103.8140581 1.3240364, 103.8141399 1.3240016, 103.8142351 1.3240257, 103.8142834 1.3240793, 103.8142995 1.3241745, 103.8142472 1.3242778, 103.8141238 1.3243099, 103.8140219 1.3242456, 103.8139964 1.3241289))
Hawker Centre: Amoy Street Food Centre, Polygon: POLYGON ((103.8464418 1.2789964, 103.846558 1.2790928, 103.8466894 1.2789212, 103.8468986 1.2796908, 103.8468852 1.2797364, 103.8468315 1.2797551, 103.8467967 1.2797256, 103.8466062 1.2797739, 103.8465472 1.2795433, 103.846574 1.2795111

#### Plotting spatial map for one hawker centre for verification

In [30]:
# Getting first postal code for plotting
postal_code = matched_df_copy['Postal'][1]

print(postal_code)

069111


In [31]:
import geopandas as gpd
import numpy as np

# Getting postal code
postalcode_geojson_path = "C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\ADDRPT.geojson"

# Load GeoJSON data into a GeoDataFrame
postalcode_gdf = gpd.read_file(postalcode_geojson_path)

# Define a dictionary to store coordinates
coordinates_dict = {}

# Function to get coordinates by postal code
def get_coordinates_by_postal_code(postal_code):
    postal_data = postalcode_gdf[postalcode_gdf['POSTAL_CODE'] == postal_code]
    if not postal_data.empty:
        # Extract the coordinates of the first matching entry
        longitude = postal_data.geometry.x.values[0]
        latitude = postal_data.geometry.y.values[0]
        return longitude, latitude
    else:
        return None, None

# Fetch coordinates for each postal code and store them
coordinates_dict[postal_code] = get_coordinates_by_postal_code(postal_code)

# Extract coordinates and calculate the central point
longitudes = []
latitudes = []

for postal_code, coords in coordinates_dict.items():
    if coords[0] is not None and coords[1] is not None:
        longitudes.append(coords[0])
        latitudes.append(coords[1])
        print(f'Coordinates for postal code {postal_code}: Longitude {coords[0]}, Latitude {coords[1]}')

if longitudes and latitudes:
    avg_longitude = np.mean(longitudes)
    avg_latitude = np.mean(latitudes)
    print(f'\nCentral coordinates:')
    print(f'Longitude: {avg_longitude}, Latitude: {avg_latitude}')
else:
    print('Coordinates for some or all postal codes not found.')

Coordinates for postal code 069111: Longitude 103.84661927383159, Latitude 1.2792312094873002

Central coordinates:
Longitude: 103.84661927383159, Latitude: 1.2792312094873002


In [60]:
# Converting x and y to coordinates for latitude/longitude
import rasterio
import numpy as np
import pandas as pd
from pyproj import Transformer
from shapely.geometry import Point

global filtered_df

def preprocessing(file_path):   
    global filtered_df
    
    # Open your GeoTIFF file
    with rasterio.open(file_path) as src:
        array = src.read()
        transform = src.transform
        src_crs = src.crs  # Source CRS
        # dest_crs = 'EPSG:4326'  # WGS 84

        # Create a transformer object to convert from src_crs to dest_crs
        transformer = Transformer.from_crs(src_crs, 'EPSG:4326', always_xy=True)

        # Get arrays of column and row indices
        cols, rows = np.meshgrid(np.arange(array.shape[2]), np.arange(array.shape[1]))
        
        # Convert meshgrid arrays to coordinate arrays using rasterio's method, which are 2D
        xs, ys = rasterio.transform.xy(transform, rows, cols, offset='center')
        
        # Flatten the coordinate arrays to pass to transform function
        lon, lat = transformer.transform(np.array(xs).flatten(), np.array(ys).flatten())

        # Create DataFrame and convert to GeoDataFrame
        df = pd.DataFrame({'Longitude': lon, 'Latitude': lat})
        for i, band in enumerate(src.read(masked=True)):
            df[src.descriptions[i]] = band.flatten()

        # Convert 'SR_QA_AEROSOL' to integer for bitwise operation
        df['SR_QA_AEROSOL'] = df['SR_QA_AEROSOL'].astype(int)

        # Filter out pixels with valid aerosol retrieval and high aerosol level
        # Assuming 'SR_QA_AEROSOL' is the name of the QA aerosol band in the data
        valid_aerosol = (df['SR_QA_AEROSOL'] & 2) == 2  # Bit 1 must be set for valid retrieval
        high_aerosol = (df['SR_QA_AEROSOL'] & 192) == 192  # Bits 6-7 must be set to 11 for high aerosol
        filter_mask = valid_aerosol & high_aerosol
        df_filtered = df[-filter_mask]
        
        # Scale and offset specific bands
        df_filtered['ST_B10_Celsius'] = df_filtered['ST_B10'] * 0.00341802 + 149 - 273.15
        df_filtered = df_filtered[df_filtered['ST_B10_Celsius'] >= 20]  # Drop rows below 20 degrees Celsius
        
        bands_to_scale = ['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7']
        for band in bands_to_scale:
            df_filtered[f"{band}_Scaled"] = df_filtered[band] * 2.75e-05 - 0.2

        additional_scales = {
            'ST_ATRAN': 0.0001, 'ST_CDIST': 0.01, 'ST_DRAD': 0.001, 
            'ST_EMIS': 0.0001, 'ST_EMSD': 0.0001, 'ST_QA': 0.01, 
            'ST_TRAD': 0.001, 'ST_URAD': 0.001
        }

        for band, scale in additional_scales.items():
            df_filtered[f"{band}_Scaled"] = df_filtered[band] * scale

        gdf = gpd.GeoDataFrame(df_filtered, geometry=gpd.points_from_xy(df_filtered.Longitude, df_filtered.Latitude))
        gdf.set_crs('EPSG:4326', inplace=True)

        print("Total number of valid pixels: " + str(len(gdf)))
        print(df[['Latitude', 'Longitude']].head())

        gdf = gdf.to_crs('EPSG:3857')

        transformer_2 = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True)

        avg_longitude_3857, avg_latitude_3857 = transformer_2.transform(avg_longitude, avg_latitude)

        # Define your point of interest and buffer distance in meters
        poi = Point(avg_longitude_3857, avg_latitude_3857)
        desired_radius = 300
        buffer = poi.buffer(desired_radius)  # Convert meters to degrees approximately

        # Filter points within the buffer
        filtered_gdf = gdf[gdf.geometry.within(buffer)]

        # Save or process your filtered data
        print(f"\nNumber of points within {desired_radius}m radius: {len(filtered_gdf)}")
        #print(filtered_gdf['ST_B10_Celsius'].head())

        filtered_gdf = filtered_gdf.to_crs('EPSG:4326')

    return filtered_gdf

In [59]:
import geopandas as gpd
import hvplot.pandas
import pandas as pd
from shapely.geometry import Polygon, box
import panel as pn
from bokeh.palettes import Inferno256
import numpy as np
import logging

# Suppress warnings
logging.getLogger('bokeh').setLevel(logging.ERROR)
pd.options.mode.chained_assignment = None  # default='warn'

global within_polygon_gdf

def plot_spatial_map(filtered_gdf): 
    global within_polygon_gdf
    
    filtered_gdf = filtered_gdf.to_crs('epsg:3857')

    # print(filtered_gdf['geometry'])

    # Create pixels as 30m x 30m boxes around each point
    # Assuming each point is at the center of the pixel
    half_width = 15  # half the width of the pixel in meters since the EPSG:3857 coordinate system is in metres
    filtered_gdf['geometry'] = filtered_gdf['geometry'].apply(lambda x: box(x.x - half_width, x.y - half_width, x.x + half_width, x.y + half_width))

    #print(filtered_gdf['geometry'])

    # Create a GeoDataFrame from all polygons and convert CRS to match
    polygon_gdf = gpd.GeoDataFrame({'geometry': list(polygons.values())}, crs='epsg:4326')
    polygon_gdf_3857 = polygon_gdf.to_crs('epsg:3857')
    selected_polygon = polygon_gdf_3857.iloc[1].geometry

    # # Filter points that intersect any polygon
    # def intersects_any_polygon(point):
    #     return any(point.intersects(poly) for poly in polygon_gdf['geometry'])
    
    # filtered_gdf['intersects'] = filtered_gdf['geometry'].apply(intersects_any_polygon)

    # # Check intersection with any polygon
    # within_polygon_gdf = filtered_gdf[filtered_gdf['intersects']].copy()

    # print(polygon_gdf_3857['geometry'])

    # # Filter points that intersect any polygon
    # filtered_gdf['intersects'] = filtered_gdf['geometry'].apply(
    #     lambda geom: any(geom.intersects(poly) for poly in polygon_gdf_3857['geometry']))

    # Filter points that intersect the selected polygon
    filtered_gdf['intersects'] = filtered_gdf['geometry'].apply(lambda geom: geom.intersects(selected_polygon))
    
    within_polygon_gdf = filtered_gdf[filtered_gdf['intersects']].copy()

    print("Number of pixels in region of interest: " + str(len(within_polygon_gdf)))

    # Print or use the filtered GeoDataFrame as needed
    # print("\nNumber of points within the region of interest: " + str(len(within_polygon_gdf)))

    # # Print the centroids of the intersected pixels
    # for index, row in within_polygon_gdf.iterrows():
    #     centroid = row['geometry'].centroid
    #     print(f"Longitude: {centroid.x}, Latitude: {centroid.y}")

    # Define a function to select a subset of the color palette
    def select_colors(palette, n):
        return [palette[int(i)] for i in np.linspace(0, len(palette)-1, n)]

    # Create a custom color scale using a continuous palette
    custom_palette = select_colors(Inferno256, 256)  # More colors for smoother transitions

    # Create the heatmap using the centroid points of intersected pixels
    heatmap = within_polygon_gdf.hvplot.points('Longitude', 'Latitude', geo=True, c='ST_B10_Celsius', cmap=custom_palette, size=5, tiles='OSM', frame_width=700, frame_height=500, colorbar=True, clim=(20, 40))

    # Plot square polygons with the same color mapping as the points
    squares_plot = within_polygon_gdf.hvplot.polygons('geometry', c='ST_B10_Celsius', cmap=custom_palette, alpha=0.5, colorbar=True, clim=(20, 40))

    # Plot the polygon with visible settings
    # polygon_plot = polygon_gdf.hvplot(geo=True, color='red', line_width=3, alpha=0.7)

    # Create a GeoDataFrame from the selected polygon
    selected_polygon_gdf = gpd.GeoDataFrame({'geometry': [selected_polygon]}, crs='epsg:3857')

    # Plot the selected polygon with visible settings
    selected_polygon_plot = selected_polygon_gdf.hvplot(geo=True, color='red', line_width=3, alpha=0.7)

    # Overlay the polygon onto the heatmap
    overlay_map = selected_polygon_plot * heatmap * squares_plot

    # Set up Panel to display the plot
    # pane = pn.panel(overlay_map)

    # pane.show()
    # pane.save(f'C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\MSE-ES-UHI\\2_landsat\\Heatmaps\\{postal_code_112}_{satellite_image}_LST_Filtered.html', embed=True)

    return overlay_map

#### Plotting LST over time

##### Combining GDFs

In [34]:
import geopandas as gpd
import pandas as pd
import os
import zipfile
from datetime import datetime
import logging
import shutil

# Required data is from 2022 - 2024
year = "2022"

# Suppress warnings
logging.getLogger('bokeh').setLevel(logging.ERROR)
pd.options.mode.chained_assignment = None  # default='warn'

# Specify the zip file and temporary directory for extraction
zip_file_path = f"C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\Landsat8\\{year}.zip"
temp_dir = f"C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\temp_extract"

# Create a temporary directory if it doesn't exist
os.makedirs(temp_dir, exist_ok=True)

# Extract the .tif files from the zip
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(temp_dir)

# Initialize an empty list to hold all the GeoDataFrames
gdfs = []

# Walk through the temporary directory and process each .tif file
for filename in os.listdir(f"{temp_dir}\\{year}"):
    if filename.endswith(".tif"):
        print("Currently processing: " + filename)
        file_path = os.path.join(f"{temp_dir}\\{year}", filename)
        
        # Extract the time period from the filename
        # Assuming filename format is "L8_UTC_YYYYMMDD_hhmmss.tif"
        time_str = filename.split('_')[2]
        time_obj = datetime.strptime(time_str, "%Y%m%d")
        
        # Load and preprocess the GeoDataFrame
        gdf = preprocessing(file_path)
        gdf['time'] = time_obj  # Append the datetime object as a new column
        
        # Append the processed GeoDataFrame to the list
        gdfs.append(gdf)

# Combine all GeoDataFrames into one
combined_gdf = pd.concat(gdfs)

shutil.rmtree(f"C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\temp_extract")

# Use the combined GeoDataFrame as needed
print(combined_gdf)

Currently processing: L8_UTC_20220111_031652.tif
Total number of valid pixels: 487951
   Latitude   Longitude
0  1.470099  103.589751
1  1.470099  103.590021
2  1.470099  103.590290
3  1.470099  103.590560
4  1.470100  103.590830

Number of points within 300m radius: 300
Currently processing: L8_UTC_20220127_031646.tif
Total number of valid pixels: 121945
   Latitude   Longitude
0  1.470099  103.589751
1  1.470099  103.590021
2  1.470099  103.590290
3  1.470099  103.590560
4  1.470100  103.590830

Number of points within 300m radius: 0
Currently processing: L8_UTC_20220212_031643.tif
Total number of valid pixels: 782498
   Latitude   Longitude
0  1.470099  103.589751
1  1.470099  103.590021
2  1.470099  103.590290
3  1.470099  103.590560
4  1.470100  103.590830

Number of points within 300m radius: 298
Currently processing: L8_UTC_20220228_031639.tif
Total number of valid pixels: 259938
   Latitude   Longitude
0  1.470099  103.589751
1  1.470099  103.590021
2  1.470099  103.590290
3  1

##### Spatial plot over time

In [57]:
import panel as pn

# Create an interactive plot with filtering based on the GeoDataFrame
def create_interactive_plot(combined_gdf):
    # Create a list of unique dates sorted
    unique_dates = combined_gdf['time'].dt.strftime('%Y-%m-%d').sort_values().unique()
    # print(f"Unique Dates: {unique_dates}")

    date_index_map = {i + 1: date for i, date in enumerate(unique_dates)}

    # Setup an integer slider to select time periods
    time_slider = pn.widgets.IntSlider(name='Select Time', start=1, end=len(unique_dates), value=1, step=1)

    @pn.depends(time_slider.param.value_throttled)
    def dynamic_map(value):
        selected_date = date_index_map[value]
        selected_datetime = pd.to_datetime(selected_date).date()
        
        # Filter data for the selected time
        filtered_data = combined_gdf[combined_gdf['time'].dt.date == selected_datetime]
        print(f"Displaying plot for " + str(selected_date))
        
        # Call plot_spatial_map for the selected time period
        return plot_spatial_map(filtered_data)

    layout = pn.Column(
        "<br>\nInteractive Land Surface Temperature Map",
        time_slider,
        dynamic_map
    )

    return layout

layout = create_interactive_plot(combined_gdf)
# layout.servable()
pn.serve(layout, show=False, start=True)

Displaying plot for 2022-01-11
Number of pixels in region of interest: 7
Launching server at http://localhost:64348


<panel.io.server.Server at 0x1ff44781cf0>

## Plotting points that overlap with polygons (filtering from all points in Singapore)

In [66]:
# Converting x and y to coordinates for latitude/longitude
import rasterio
import numpy as np
import pandas as pd
from pyproj import Transformer
from shapely.geometry import Point

global filtered_df

def preprocessing_sg(file_path, singapore_geojson_path):   
    global filtered_df
    
    # Open your GeoTIFF file
    with rasterio.open(file_path) as src:
        array = src.read()
        transform = src.transform
        src_crs = src.crs  # Source CRS
        # dest_crs = 'EPSG:4326'  # WGS 84

        # Create a transformer object to convert from src_crs to dest_crs
        transformer = Transformer.from_crs(src_crs, 'EPSG:4326', always_xy=True)

        # Get arrays of column and row indices
        cols, rows = np.meshgrid(np.arange(array.shape[2]), np.arange(array.shape[1]))
        
        # Convert meshgrid arrays to coordinate arrays using rasterio's method, which are 2D
        xs, ys = rasterio.transform.xy(transform, rows, cols, offset='center')
        
        # Flatten the coordinate arrays to pass to transform function
        lon, lat = transformer.transform(np.array(xs).flatten(), np.array(ys).flatten())

        # Create DataFrame and convert to GeoDataFrame
        df = pd.DataFrame({'Longitude': lon, 'Latitude': lat})
        for i, band in enumerate(src.read(masked=True)):
            df[src.descriptions[i]] = band.flatten()

        # Convert 'SR_QA_AEROSOL' to integer for bitwise operation
        df['SR_QA_AEROSOL'] = df['SR_QA_AEROSOL'].astype(int)

        # Filter out pixels with valid aerosol retrieval and high aerosol level
        # Assuming 'SR_QA_AEROSOL' is the name of the QA aerosol band in the data
        valid_aerosol = (df['SR_QA_AEROSOL'] & 2) == 2  # Bit 1 must be set for valid retrieval
        high_aerosol = (df['SR_QA_AEROSOL'] & 192) == 192  # Bits 6-7 must be set to 11 for high aerosol
        filter_mask = valid_aerosol & high_aerosol
        df_filtered = df[-filter_mask]
        
        # Scale and offset specific bands
        df_filtered['ST_B10_Celsius'] = df_filtered['ST_B10'] * 0.00341802 + 149 - 273.15
        df_filtered = df_filtered[df_filtered['ST_B10_Celsius'] >= 20]  # Drop rows below 20 degrees Celsius
        
        bands_to_scale = ['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7']
        for band in bands_to_scale:
            df_filtered[f"{band}_Scaled"] = df_filtered[band] * 2.75e-05 - 0.2

        additional_scales = {
            'ST_ATRAN': 0.0001, 'ST_CDIST': 0.01, 'ST_DRAD': 0.001, 
            'ST_EMIS': 0.0001, 'ST_EMSD': 0.0001, 'ST_QA': 0.01, 
            'ST_TRAD': 0.001, 'ST_URAD': 0.001
        }

        for band, scale in additional_scales.items():
            df_filtered[f"{band}_Scaled"] = df_filtered[band] * scale

        gdf = gpd.GeoDataFrame(df_filtered, geometry=gpd.points_from_xy(df_filtered.Longitude, df_filtered.Latitude))
        gdf.set_crs('EPSG:4326', inplace=True)

        print("Total number of valid pixels: " + str(len(gdf)))
        print(df[['Latitude', 'Longitude']].head())

        gdf = gdf.to_crs('EPSG:3857')

        # Read the Singapore boundary GeoJSON, convert to Web Mercator
        singapore_boundary = gpd.read_file(singapore_geojson_path)
        singapore_boundary = singapore_boundary.to_crs('EPSG:3857')  # Ensure the CRS matches

        # Perform spatial join to filter points within Singapore
        filtered_gdf = gpd.sjoin(gdf, singapore_boundary, how="inner", op='within')

        # Print the total number of valid pixels within Singapore
        print("Total number of valid pixels within Singapore: " + str(len(filtered_gdf)))

        filtered_gdf = filtered_gdf.to_crs('EPSG:4326')

    return filtered_gdf

##### Filtering 30m x 30m pixels based on region of interest

##### Using EPSG:3857 allows you to blow up the pixels in metres because the coordinate representation is in metres

In [73]:
import geopandas as gpd
import hvplot.pandas
import pandas as pd
from shapely.geometry import Polygon, box
import panel as pn
from bokeh.palettes import Inferno256
import numpy as np
import logging

# Suppress warnings
logging.getLogger('bokeh').setLevel(logging.ERROR)
pd.options.mode.chained_assignment = None  # default='warn'

global within_polygon_gdf

def filtered_sg(filtered_gdf): 
    global within_polygon_gdf
    
    filtered_gdf = filtered_gdf.to_crs('epsg:3857')

    # print(filtered_gdf['geometry'])

    # Create pixels as 30m x 30m boxes around each point
    # Assuming each point is at the center of the pixel
    half_width = 15  # half the width of the pixel in meters since the EPSG:3857 coordinate system is in metres
    filtered_gdf['geometry'] = filtered_gdf['geometry'].apply(lambda x: box(x.x - half_width, x.y - half_width, x.x + half_width, x.y + half_width))

    #print(filtered_gdf['geometry'])

    # Create a GeoDataFrame from all polygons and convert CRS to match
    polygon_gdf = gpd.GeoDataFrame({'geometry': list(polygons.values()), 'Hawker Centre': list(polygons.keys())}, crs='epsg:4326')
    polygon_gdf_3857 = polygon_gdf.to_crs('epsg:3857')

    # print(polygon_gdf_3857['geometry'])

    # Filter points that intersect each polygon and track hawker centre name
    within_polygon_gdf = pd.DataFrame()  # initialize an empty dataframe
    for idx, row in polygon_gdf_3857.iterrows():
        intersects = filtered_gdf['geometry'].apply(lambda geom: geom.intersects(row['geometry']))
        if intersects.any():
            current_data = filtered_gdf[intersects].copy()
            current_data['Hawker Centre'] = row['Hawker Centre']  # append the name of the hawker centre
            within_polygon_gdf = pd.concat([within_polygon_gdf, current_data], ignore_index=True)

    print("Number of pixels in regions of interest: " + str(len(within_polygon_gdf)))
    print(within_polygon_gdf)

    return within_polygon_gdf

#### Plotting LST over time

##### Combining GDFs

In [95]:
import geopandas as gpd
import pandas as pd
import os
import zipfile
from datetime import datetime
import logging
import shutil

singapore_geojson_path = "C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\singapore-boundary.geojson"

# Required data is from 2022 - 2024
year = "2020"

# Suppress warnings
logging.getLogger('bokeh').setLevel(logging.ERROR)
pd.options.mode.chained_assignment = None  # default='warn'

# Specify the zip file and temporary directory for extraction
zip_file_path = f"C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\Landsat8\\{year}.zip"
temp_dir = f"C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\temp_extract"

# Create a temporary directory if it doesn't exist
os.makedirs(temp_dir, exist_ok=True)

# Extract the .tif files from the zip
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(temp_dir)

# Initialize an empty list to hold all the GeoDataFrames
gdfs = []

# Walk through the temporary directory and process each .tif file
for filename in os.listdir(f"{temp_dir}\\{year}"):
    if filename.endswith(".tif"):
        print("Currently processing: " + filename)
        file_path = os.path.join(f"{temp_dir}\\{year}", filename)
        
        # Extract the time period from the filename
        # Assuming filename format is "L8_UTC_YYYYMMDD_hhmmss.tif"
        time_str = filename.split('_')[2]
        time_obj = datetime.strptime(time_str, "%Y%m%d")
        
        # Load and preprocess the GeoDataFrame
        gdf = preprocessing_sg(file_path, singapore_geojson_path)
        gdf['time'] = time_obj  # Append the datetime object as a new column
        
        # Append the processed GeoDataFrame to the list
        gdfs.append(gdf)

# Combine all GeoDataFrames into one
combined_gdf_sg = pd.concat(gdfs)

shutil.rmtree(f"C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\temp_extract")

# Use the combined GeoDataFrame as needed
print(combined_gdf_sg)

Currently processing: L8_UTC_20200106_031650.tif
Total number of valid pixels: 1912476
   Latitude   Longitude
0  1.470099  103.589751
1  1.470099  103.590021
2  1.470099  103.590290
3  1.470099  103.590560
4  1.470100  103.590830
Total number of valid pixels within Singapore: 645943
Currently processing: L8_UTC_20200122_031646.tif
Total number of valid pixels: 1731121
   Latitude   Longitude
0  1.470099  103.589751
1  1.470099  103.590021
2  1.470099  103.590290
3  1.470099  103.590560
4  1.470100  103.590830
Total number of valid pixels within Singapore: 560469
Currently processing: L8_UTC_20200207_031641.tif
Total number of valid pixels: 1216672
   Latitude   Longitude
0  1.470099  103.589751
1  1.470099  103.590021
2  1.470099  103.590290
3  1.470099  103.590560
4  1.470100  103.590830
Total number of valid pixels within Singapore: 503453
Currently processing: L8_UTC_20200223_031638.tif
Total number of valid pixels: 1345446
   Latitude   Longitude
0  1.470099  103.589751
1  1.47009

In [96]:
# Filter data for the date '2022-04-01'
specific_date_gdf = combined_gdf_sg[combined_gdf_sg['time'].dt.date == pd.to_datetime('2020-07-16').date()]

print("Total number of valid pixels: " + str(len(specific_date_gdf)))

all_hawkers_filtered_df = filtered_sg(specific_date_gdf)

# Filtered dataframe for specific date, with name of hawker centre attached to pixels
print(all_hawkers_filtered_df)

Total number of valid pixels: 650195
Number of pixels in regions of interest: 1032
       Longitude  Latitude   SR_B1   SR_B2    SR_B3    SR_B4    SR_B5  \
0     103.813879  1.324239  7340.0  7704.0   8355.0   8100.0  17129.0   
1     103.814149  1.324239  8610.0  9050.0   9531.0  10277.0  17271.0   
2     103.814419  1.324239  8807.0  9233.0  10210.0  10472.0  16694.0   
3     103.814149  1.323968  8907.0  9325.0   9983.0  10220.0  16700.0   
4     103.814419  1.323968  8359.0  8648.0   9542.0   9503.0  16653.0   
...          ...       ...     ...     ...      ...      ...      ...   
1027  103.940050  1.351976  8546.0  8869.0   9973.0   9908.0  14476.0   
1028  103.940320  1.351976  7920.0  8287.0   9845.0   9193.0  15359.0   
1029  103.940590  1.351976  7562.0  8147.0   9629.0   9019.0  14203.0   
1030  103.940859  1.351976  8156.0  9066.0  10054.0  10366.0  15085.0   
1031  103.941129  1.351977  9372.0  9820.0  11113.0  11356.0  16525.0   

        SR_B6    SR_B7  SR_QA_AEROSOL  .

In [97]:
import pandas as pd

# Assuming combined_gdf_sg is defined and valid
unique_dates = combined_gdf_sg['time'].dt.date.unique()  # Get unique dates

# Exclude '2022-04-01' from processing
unique_dates = [date for date in unique_dates if date != pd.to_datetime('2020-07-16').date()]

# Initialize an empty DataFrame to collect all data for 2022
all_data_2020 = pd.DataFrame()

for date in unique_dates:
    # Filter data for the current date
    specific_date_gdf = combined_gdf_sg[combined_gdf_sg['time'].dt.date == date]

    if len(specific_date_gdf) == 0:
        print(f"No valid pixels for date {date}. Skipping...")
        continue

    # Process the filtered GeoDataFrame for the specific date
    all_hawkers_filtered_df_2 = filtered_sg(specific_date_gdf)

    # Check if any data was returned after filtering
    if all_hawkers_filtered_df_2.empty:
        print(f"No valid pixels found for date {date}.")
    else:
        print(f"Finished processing date {date}. Number of pixels in regions of interest: {len(all_hawkers_filtered_df_2)}")

    # Append the data from this date to the cumulative DataFrame for the year
    all_data_2020 = pd.concat([all_data_2020, all_hawkers_filtered_df_2], ignore_index=True)

all_data_2020 = pd.concat([all_data_2020, all_hawkers_filtered_df], ignore_index=True)

# Print the total collected data for 2022
print(f"Total data collected for 2020: {len(all_data_2020)}")

Number of pixels in regions of interest: 1039
       Longitude  Latitude    SR_B1    SR_B2    SR_B3    SR_B4    SR_B5  \
0     103.813879  1.324239   6247.0   6767.0   8205.0   8031.0  11474.0   
1     103.814149  1.324239   6553.0   6988.0   8102.0   8052.0  11431.0   
2     103.814419  1.324239   7143.0   7554.0   8502.0   8619.0  11192.0   
3     103.814149  1.323968   6431.0   6848.0   7762.0   7789.0  10575.0   
4     103.814419  1.323968   6615.0   7052.0   7927.0   7866.0  11095.0   
...          ...       ...      ...      ...      ...      ...      ...   
1034  103.940050  1.351976  10700.0  11004.0  12189.0  12284.0  15204.0   
1035  103.940320  1.351976  10165.0  10959.0  12306.0  12726.0  17059.0   
1036  103.940590  1.351976   9074.0   9557.0  10942.0  11188.0  17326.0   
1037  103.940859  1.351976   9186.0   9826.0  11245.0  11341.0  16795.0   
1038  103.941129  1.351977   9720.0  10735.0  12137.0  12903.0  18212.0   

        SR_B6    SR_B7  SR_QA_AEROSOL  ...  ST_DRAD_S

In [98]:
# Specify the file path where you want to save the CSV
output_csv_path = "C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\FilteredData\\HawkerCentres\\all_data_2020.csv"

# Save the DataFrame to CSV
all_data_2020.to_csv(output_csv_path, index=False)

print(f"Data successfully saved to {output_csv_path}")

Data successfully saved to C:\LocalOneDrive\Documents\Desktop\MTI\UHI-Project\MSE-ES-UHI\Data\FilteredData\HawkerCentres\all_data_2020.csv


##### Codes to combine .csv files for 2022 - 2024

In [58]:
import pandas as pd

# Define the base file path
base_path = r"C:\LocalOneDrive\Documents\Desktop\MTI\UHI-Project\MSE-ES-UHI\Data\FilteredData\Tampines"

# File names
files = [
    r"Tampines_Filtered_2022.csv",
    r"Tampines_Filtered_2023.csv",
    r"Tampines_Filtered_2024.csv"
]

# Read and concatenate the CSV files
df_list = [pd.read_csv(f"{base_path}\\{file_name}") for file_name in files]
combined_df = pd.concat(df_list, ignore_index=True)

# Save the combined DataFrame to a new CSV file
combined_df.to_csv(f"{base_path}\\Tampines_Filtered_2022_to_2024.csv", index=False)

print("Files were successfully concatenated and saved.")

Files were successfully concatenated and saved.
