#### Preprocessing data into a geodataframe

##### Filtering by specific postal code

In [16]:
import geopandas as gpd

geojson_path = "C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\ADDRPT.geojson"
postal_code_112 = "090112"
postal_code_114 = "090114"

# Load the GeoJSON file
gdf = gpd.read_file(geojson_path)

# Function to retrieve coordinates by postal code
def get_coordinates_by_postal_code(postal_code):
    # Filter GeoDataFrame for the given postal code
    filtered_gdf = gdf[gdf['POSTAL_CODE'] == postal_code]
    if not filtered_gdf.empty:
        # Extract coordinates
        point = filtered_gdf.iloc[0].geometry
        return point.x, point.y
    else:
        return None, None

longitude_112, latitude_112 = get_coordinates_by_postal_code(postal_code_112)
longitude_114, latitude_114 = get_coordinates_by_postal_code(postal_code_114)

if longitude_112 and latitude_112 and longitude_114 and latitude_114:
    print(f'Coordinates for postal code {postal_code_112}: Longitude {longitude_112}, Latitude {latitude_112}')
    print(f'Coordinates for postal code {postal_code_114}: Longitude {longitude_114}, Latitude {latitude_114}')
else:
    print('Postal code not found.')

Coordinates for postal code 090112: Longitude 103.82593292805574, Latitude 1.2745285256209595
Coordinates for postal code 090114: Longitude 103.82588719010951, Latitude 1.2750718182249274


In [17]:
# Finding the central postal code for Blocks 112 and 114
if longitude_112 and latitude_112 and longitude_114 and latitude_114:
    # Calculate the average coordinates
    avg_longitude = (longitude_112 + longitude_114) / 2
    avg_latitude = (latitude_112 + latitude_114) / 2
    print(f'Average coordinates between postal codes {postal_code_112} and {postal_code_114}:')
    print(f'Longitude: {avg_longitude}, Latitude: {avg_latitude}')
else:
    print('Postal code not found.')

Average coordinates between postal codes 090112 and 090114:
Longitude: 103.82591005908262, Latitude: 1.2748001719229434


In [30]:
# Converting x and y to coordinates for latitude/longitude
import rasterio
import numpy as np
import pandas as pd
from pyproj import Transformer
from shapely.geometry import Point

def preprocessing(file_path): 
    # Open your GeoTIFF file
    with rasterio.open(file_path) as src:
        array = src.read()
        transform = src.transform
        src_crs = src.crs  # Source CRS
        dest_crs = 'EPSG:4326'  # WGS 84

        # Create a transformer object to convert from src_crs to dest_crs
        transformer = Transformer.from_crs(src_crs, dest_crs, always_xy=True)

        # Get arrays of column and row indices
        cols, rows = np.meshgrid(np.arange(array.shape[2]), np.arange(array.shape[1]))
        
        # Convert meshgrid arrays to coordinate arrays using rasterio's method, which are 2D
        xs, ys = rasterio.transform.xy(transform, rows, cols, offset='center')
        
        # Flatten the coordinate arrays to pass to transform function
        lon, lat = transformer.transform(np.array(xs).flatten(), np.array(ys).flatten())

        # Create DataFrame and convert to GeoDataFrame
        df = pd.DataFrame({'Longitude': lon, 'Latitude': lat})
        for i, band in enumerate(src.read(masked=True)):
            df[src.descriptions[i]] = band.flatten()

        # Convert 'SR_QA_AEROSOL' to integer for bitwise operation
        df['SR_QA_AEROSOL'] = df['SR_QA_AEROSOL'].astype(int)

        # Filter out pixels with valid aerosol retrieval and high aerosol level
        # Assuming 'SR_QA_AEROSOL' is the name of the QA aerosol band in the data
        valid_aerosol = (df['SR_QA_AEROSOL'] & 2) == 2  # Bit 1 must be set for valid retrieval
        high_aerosol = (df['SR_QA_AEROSOL'] & 192) == 192  # Bits 6-7 must be set to 11 for high aerosol
        filter_mask = valid_aerosol & high_aerosol
        df_filtered = df[-filter_mask]
        
        # Scale and offset specific bands
        df_filtered['ST_B10_Celsius'] = df_filtered['ST_B10'] * 0.00341802 + 149 - 273.15
        bands_to_scale = ['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7']
        for band in bands_to_scale:
            df_filtered[f"{band}_Scaled"] = df_filtered[band] * 2.75e-05 - 0.2

        additional_scales = {
            'ST_ATRAN': 0.0001, 'ST_CDIST': 0.01, 'ST_DRAD': 0.001, 
            'ST_EMIS': 0.0001, 'ST_EMSD': 0.0001, 'ST_QA': 0.01, 
            'ST_TRAD': 0.001, 'ST_URAD': 0.001
        }

        for band, scale in additional_scales.items():
            df_filtered[f"{band}_Scaled"] = df_filtered[band] * scale

        gdf = gpd.GeoDataFrame(df_filtered, geometry=gpd.points_from_xy(df_filtered.Longitude, df_filtered.Latitude))
        gdf.set_crs('EPSG:4326', inplace=True)  # Ensure the CRS is set to WGS 84

        print("Total number of valid pixels: " + str(len(gdf)))
        print(df[['Latitude', 'Longitude']].head())

        # Define your point of interest and buffer distance in meters
        poi = Point(avg_longitude, avg_latitude)  # Example: Central Singapore
        desired_radius = 90
        buffer = poi.buffer(desired_radius / 111320)  # Convert meters to degrees approximately

        # Filter points within the buffer
        filtered_gdf = gdf[gdf.geometry.within(buffer)]

        # Save or process your filtered data
        print(f"Number of points within {desired_radius}m radius: {len(filtered_gdf)}")
        print(filtered_gdf['ST_B10_Celsius'].head())

    return filtered_gdf

##### Combining GDFs

In [51]:
import geopandas as gpd
import hvplot.pandas
import panel as pn
from bokeh.models import Slider
from bokeh.layouts import column

# Suppress SettingWithCopyWarning
pd.options.mode.chained_assignment = None  # default='warn'

file1 = "L8_UTC_20201207_031659.tif"
file2 = "L8_UTC_20201223_031657.tif"

file_path_1 = f"C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\Landsat8\\2020_test\\2020\\{file1}"
file_path_2 = f"C:\\LocalOneDrive\\Documents\\Desktop\\MTI\\UHI-Project\\MSE-ES-UHI\\Data\\Landsat8\\2020_test\\2020\\{file2}"

# Load and prepare your GeoDataFrames for two different times
gdf1 = preprocessing(file_path_1)
gdf2 = preprocessing(file_path_2)

# Combine both GeoDataFrames into one with an additional 'time' column
gdf1['time'] = 'Time 1'
gdf2['time'] = 'Time 2'
combined_gdf = pd.concat([gdf1, gdf2])

Total number of valid pixels: 2126550
   Latitude   Longitude
0  1.470099  103.589751
1  1.470099  103.590021
2  1.470099  103.590290
3  1.470099  103.590560
4  1.470100  103.590830
Number of points within 90m radius: 27
1280350    20.380981
1280351    20.210083
1280352    20.616821
1280353    20.818481
1282131    20.876587
Name: ST_B10_Celsius, dtype: float32
Total number of valid pixels: 2151197
   Latitude   Longitude
0  1.470099  103.589751
1  1.470099  103.590021
2  1.470099  103.590290
3  1.470099  103.590560
4  1.470100  103.590830
Number of points within 90m radius: 26
1280350    29.883087
1280351    29.684845
1280352    29.821564
1280353    29.746368
1282131    30.098419
Name: ST_B10_Celsius, dtype: float32


In [52]:
# Filter DataFrame for 'Time 1'
time1_values = combined_gdf.loc[combined_gdf['time'] == 'Time 1', 'ST_B10_Celsius']
print("ST_B10_Celsius values for Time 1:")
print(time1_values)

# Filter DataFrame for 'Time 2'
time2_values = combined_gdf.loc[combined_gdf['time'] == 'Time 2', 'ST_B10_Celsius']
print("\nST_B10_Celsius values for Time 2:")
print(time2_values)

ST_B10_Celsius values for Time 1:
1280350    20.380981
1280351    20.210083
1280352    20.616821
1280353    20.818481
1282131    20.876587
1282132    20.572388
1282133    20.452759
1282134    20.903931
1282135    21.163727
1282136    21.512360
1283913    21.211578
1283914    20.972321
1283915    20.985992
1283916    20.992828
1283917    21.341461
1283918    21.741364
1285695    21.741364
1285696    21.696930
1285697    21.683258
1285698    21.874664
1285699    22.260895
1285700    22.667633
1287478    22.230133
1287479    22.418121
1287480    22.500153
1287481    23.026520
1289262    23.409332
Name: ST_B10_Celsius, dtype: float32

ST_B10_Celsius values for Time 2:
1280350    29.883087
1280351    29.684845
1280352    29.821564
1280353    29.746368
1282131    30.098419
1282132    29.773712
1282133    29.534454
1282134    29.623322
1282135    29.595978
1282136    29.609650
1283914    29.527618
1283915    29.267853
1283916    28.922607
1283917    28.936279
1283918    28.990967
1285695    2

##### Plotting interactive spatial map (LST over time)

In [106]:
# For interactive map, launch local host
import geopandas as gpd
import hvplot.pandas
import panel as pn

def create_interactive_plot(combined_gdf):
    time_slider = pn.widgets.IntSlider(name='Select Time', start=1, end=combined_gdf['time'].nunique(), value=1, step=1)

    @pn.depends(time_slider.param.value_throttled)
    def dynamic_map(value):
        time_label = f'Time {value}'
        filtered_data = combined_gdf.loc[combined_gdf['time'] == time_label]
        print(f'Updating plot for: {time_label}, Number of points: {len(filtered_data)}')

        return filtered_data.hvplot.points(
            'Longitude', 'Latitude', geo=True, c='ST_B10_Celsius',
            cmap='viridis', size=5, tiles='OSM', frame_width=700,
            frame_height=500, colorbar=True, clim=(20, 40)
        )

    layout = pn.Column(
        "<br>\n Land Surface Temperature Map",
        time_slider,
        dynamic_map
    )

    return layout

# Assuming 'combined_gdf' is a GeoDataFrame already loaded and ready to be used.
layout = create_interactive_plot(combined_gdf)
# layout.servable()
pn.serve(layout, show=False, start=True, file_name="land_surface_temperature_map.html")

Updating plot for: Time 1, Number of points: 27
Launching server at http://localhost:57886


<panel.io.server.Server at 0x1af592470d0>

In [86]:
import panel as pn
import panel.widgets as pnw
import pandas as pd;
import numpy as np
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvas

DATA_URL = "https://raw.githubusercontent.com/LuisM78/Occupancy-detection-data/master/datatraining.txt"

data = pd.read_csv(DATA_URL)
data['date'] = data.date.astype('datetime64[ns]')
data = data.set_index('date')

variable  = pnw.RadioButtonGroup(name='variable', value='Temperature',
                                 options=list(data.columns))
window  = pnw.IntSlider(name='window', value=10, start=1, end=60)

def mpl_plot(avg, highlight):
    fig = Figure()
    FigureCanvas(fig) # not needed in mpl >= 3.1
    ax = fig.add_subplot()
    avg.plot(ax=ax)
    if len(highlight): highlight.plot(style='o', ax=ax)
    return fig

def find_outliers(variable='Temperature', window=30, sigma=10, view_fn=mpl_plot):
    avg = data[variable].rolling(window=window).mean()
    residual = data[variable] - avg
    std = residual.rolling(window=window).std()
    outliers = (np.abs(residual) > std * sigma)
    return view_fn(avg, avg[outliers])

@pn.depends(variable, window)
def reactive_outliers(variable, window):
    return find_outliers(variable, window, 10)

widgets   = pn.Column("<br>\n# Room occupancy", variable, window)
occupancy = pn.Row(reactive_outliers, widgets)
occupancy.servable()

BokehModel(combine_events=True, render_bundle={'docs_json': {'4daee2da-d6d1-420c-892e-355d6a054c16': {'version…