<a href="https://colab.research.google.com/github/ced-sys/SubTerra/blob/main/Csv_script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
!pip install geopandas rasterio



In [17]:
import geopandas as gpd
import rasterio
import pandas as pd
import os

# Load sample points
pos_path = '/content/drive/MyDrive/SubTerra/Data For ML/positive_samples.geojson'
neg_path = '/content/drive/MyDrive/SubTerra/Data For ML/negative_samples.geojson'

points_pos = gpd.read_file(pos_path)
points_neg = gpd.read_file(neg_path)

# Reproject to a common CRS (EPSG:4326) before combining
points_pos = points_pos.to_crs('EPSG:4326')
points_neg = points_neg.to_crs('EPSG:4326')

# Combine
points_all = pd.concat([points_pos, points_neg], ignore_index=True)
points_all = gpd.GeoDataFrame(points_all, geometry='geometry', crs='EPSG:4326')


# Paths
raster_dir = '/content/drive/MyDrive/SubTerra/aligned/'
raster_paths = {
    'dem': 'aligned_kenya_dem.tif',
    'slope': 'aligned_slope.tif',
    'tri': 'aligned_tri.tif',
    'fault_dist': 'aligned_fault_distance.tif',
    'ndvi': 'aligned_ndvi_2020.tif',
    'lst': 'aligned_lst_2020.tif',
    'landcover': 'aligned_land_cover_2020.tif',
    'veg_peak': 'aligned_vegetation_peak.tif',
    'lithology': 'aligned_kenya_lithology_clipped.tif',
    'heat_flow': 'aligned_heat_flow_kenya.tif',
}

# Extract raster values
coords = [(pt.x, pt.y) for pt in points_all.geometry]

for key, file_name in raster_paths.items():
    path = os.path.join(raster_dir, file_name)
    with rasterio.open(path) as src:
        # Ensure the raster and points have compatible CRSs for sampling
        # Reproject points to raster CRS if necessary
        if points_all.crs != src.crs:
            points_reprojected = points_all.to_crs(src.crs)
            coords_reprojected = [(pt.x, pt.y) for pt in points_reprojected.geometry]
            values = [val[0] if val else None for val in src.sample(coords_reprojected)]
        else:
            values = [val[0] if val else None for val in src.sample(coords)]

        points_all[key] = values



# Export to CSV
output_csv = '/content/drive/MyDrive/SubTerra/Data For ML/training_dataset.csv'
points_all.to_csv(output_csv, index=False)

print(f"✅ Done! Extracted features saved to: {output_csv}")
print(f"🧬 Rows: {len(points_all)}, Columns: {len(raster_paths) + 2} (features + label + geometry)")

✅ Done! Extracted features saved to: /content/drive/MyDrive/SubTerra/Data For ML/training_dataset.csv
🧬 Rows: 360, Columns: 12 (features + label + geometry)
