In [None]:
import dask_geopandas as dgpd

gdf_protected = dgpd.read_parquet("data/protected_areas.parquet").compute()
gdf_landcover = dgpd.read_parquet("data/land_cover.parquet").compute()

In [7]:
gdf_protected = gdf_protected.to_crs(epsg=32651)
gdf_landcover = gdf_landcover.to_crs(epsg=32651)

# Save the reprojected data
gdf_protected.to_parquet("data/protected_areas_reprojected.parquet")
gdf_landcover.to_parquet("data/land_cover_reprojected.parquet")

In [8]:
gdf_protected = dgpd.read_parquet("data/protected_areas_reprojected.parquet").compute()
gdf_landcover = dgpd.read_parquet("data/land_cover_reprojected.parquet").compute()

print(gdf_protected.crs)
print(gdf_landcover.crs)

{"$schema": "https://proj.org/schemas/v0.7/projjson.schema.json", "type": "ProjectedCRS", "name": "WGS 84 / UTM zone 51N", "base_crs": {"type": "GeographicCRS", "name": "WGS 84", "datum": {"type": "GeodeticReferenceFrame", "name": "World Geodetic System 1984", "ellipsoid": {"name": "WGS 84", "semi_major_axis": 6378137, "inverse_flattening": 298.257223563}}, "coordinate_system": {"subtype": "ellipsoidal", "axis": [{"name": "Geodetic latitude", "abbreviation": "Lat", "direction": "north", "unit": "degree"}, {"name": "Geodetic longitude", "abbreviation": "Lon", "direction": "east", "unit": "degree"}]}, "id": {"authority": "EPSG", "code": 4326}}, "conversion": {"name": "UTM zone 51N", "method": {"name": "Transverse Mercator", "id": {"authority": "EPSG", "code": 9807}}, "parameters": [{"name": "Latitude of natural origin", "value": 0, "unit": "degree", "id": {"authority": "EPSG", "code": 8801}}, {"name": "Longitude of natural origin", "value": 123, "unit": "degree", "id": {"authority": "EPS

In [22]:
import pandas as pd
df = pd.read_csv("data/sample_data.csv")
df

Unnamed: 0,latitude,longitude
0,13.4125,122.5623
1,10.7103,122.5621
2,16.4023,120.596
3,15.489,120.9739
4,14.5995,120.9842
5,8.051,124.92
6,6.9214,122.079
7,11.0046,124.6093
8,13.4125,122.5644
9,17.6133,121.7269


In [23]:
from shapely.geometry import Point
import geopandas as gpd

# Convert DataFrame to GeoDataFrame
print("Converting DataFrame to GeoDataFrame...")
geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]
gdf_points = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")
gdf_points = gdf_points.to_crs(epsg=32651)

# Check if points are inside protected areas
print("Checking if points are inside protected areas...")
gdf_points["in_protected_area"] = gdf_points.sjoin(gdf_protected, how="left", predicate="intersects")['index_right'].notnull()

# Get land cover type (assuming land cover GeoDataFrame has a 'land_type' column)
print("Getting land cover type...")
gdf_points = gdf_points.sjoin(gdf_landcover[['geometry', 'class_id']], how="left", predicate="intersects")

# Drop unnecessary index_right column from spatial join
gdf_points = gdf_points.drop(columns=['index_right'])

# Define all possible land cover classes
all_classes = [1, 2, 3, 4, 5]

# Ensure all classes appear in one-hot encoding
gdf_points['class_id'] = pd.Categorical(gdf_points['class_id'], categories=all_classes)
df_encoded = pd.get_dummies(gdf_points[['class_id']], columns=['class_id'], prefix='landcover_class').astype(int)


# Convert boolean 'in_predicted_area' to 1/0
df_encoded['in_preotected_area'] = gdf_points['in_protected_area'].astype(int)

df_encoded_array = df_encoded.to_numpy()

# Display results
df_encoded_array

Converting DataFrame to GeoDataFrame...
Checking if points are inside protected areas...
Getting land cover type...


array([[0, 1, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0, 1],
       [0, 0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0, 0],
       [0, 0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0, 0],
       [0, 1, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0]])

In [24]:
import json
import torch
import torch.nn as nn


with open("model_params/autoencoder_params.json", "r") as f:
    loaded_params = json.load(f)

num_features = loaded_params["num_features"]
threshold_autoencoder = loaded_params["threshold_autoencoder"]

class AutoEncoder(nn.Module):
    def __init__(self, input_dim):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16)
        )
        self.decoder = nn.Sequential(
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, input_dim),
            nn.Sigmoid()  # Use Sigmoid for reconstruction between [0, 1]
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Instantiate the model again
loaded_model = AutoEncoder(input_dim=num_features)

# Load the trained weights
loaded_model.load_state_dict(torch.load("model_weights/autoencoder_weights.pth"))

# Set the model to evaluation mode
loaded_model.eval()


AutoEncoder(
  (encoder): Sequential(
    (0): Linear(in_features=6, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=16, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=16, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=6, bias=True)
    (5): Sigmoid()
  )
)

In [25]:
import numpy as np

df_encoded_tensor = torch.tensor(df_encoded_array, dtype=torch.float32)

with torch.no_grad():
    test_reconstruction = loaded_model(df_encoded_tensor).numpy()
    test_reconstruction_error = np.mean(np.square(df_encoded_array - test_reconstruction), axis=1)
    test_anomalies_autoencoder = np.where(test_reconstruction_error > threshold_autoencoder, 1, 0)

In [26]:
# Mapping dictionary
land_cover_mapping = {
    1: "Terrestrial Forest",
    2: "Crop Areas",
    3: "Barren/Flatland",
    4: "Built-up",
    5: "Wetlands & Water Bodies"
}

# Map class_id to land cover names
df['land_cover'] = gdf_points['class_id'].map(land_cover_mapping)

# Convert 'in_predicted_area' to 1/0
df['in_preotected_area'] = gdf_points['in_protected_area']

df['suitability'] = np.where(test_anomalies_autoencoder == 1, "Likely Unsuitable", "Suitable")
df

Unnamed: 0,latitude,longitude,land_cover,in_preotected_area,suitability
0,13.4125,122.5623,Crop Areas,False,Suitable
1,10.7103,122.5621,Built-up,False,Likely Unsuitable
2,16.4023,120.596,Built-up,True,Likely Unsuitable
3,15.489,120.9739,Built-up,False,Likely Unsuitable
4,14.5995,120.9842,Built-up,False,Likely Unsuitable
5,8.051,124.92,Barren/Flatland,False,Likely Unsuitable
6,6.9214,122.079,Built-up,False,Likely Unsuitable
7,11.0046,124.6093,Built-up,False,Likely Unsuitable
8,13.4125,122.5644,Crop Areas,False,Suitable
9,17.6133,121.7269,Built-up,False,Likely Unsuitable


In [1]:
!streamlit run app_v2.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://202.92.159.241:8501[0m
[34m  External URL: [0m[1mhttp://202.92.159.241:8501[0m
[0m
and fails to parse leap day. The default behavior will change in Python 3.15
to either always raise an exception or to use a different default year (TBD).
To avoid trouble, add a specific year to the input & format.
See https://github.com/python/cpython/issues/70647.
  import geemap.foliumap as geemap
2025-04-03 03:20:33.428 Examining the path of torch.classes raised:
Traceback (most recent call last):
  File "/data/students/ryan/anaconda3/envs/streamlit-env/lib/python3.13/site-packages/streamlit/web/bootstrap.py", line 347, in run
    if asyncio.get_running_loop().is_running():
       ~~~~~~~~~~~~~~~~~~~~~~~~^^
RuntimeError: no running event loop

