In [1]:
import pandas as pd
import numpy as np
import random
import time
import json

import os
from glob import glob
import io

import ee
import geemap.foliumap as geemap
import geopandas as gpd
import dask_geopandas as dgpd
from shapely.geometry import Point
from shapely.strtree import STRtree

In [2]:
# Initialize Earth Engine
ee.Initialize()

# Initialize Map
m = geemap.Map(center=[12.8797, 121.7740], zoom=6)

common_crs = 'EPSG:4326'
common_scale = 10

land_cover = ee.ImageCollection("ESA/WorldCover/v200").first().reproject(crs=common_crs, scale=common_scale)

# Climate Datasets
solar = ee.ImageCollection("ECMWF/ERA5_LAND/MONTHLY_AGGR").select("surface_solar_radiation_downwards_sum").mean().reproject(crs=common_crs, scale=common_scale)
temp = ee.ImageCollection("ECMWF/ERA5_LAND/MONTHLY_AGGR").select("temperature_2m").mean().reproject(crs=common_crs, scale=common_scale)
precip = ee.ImageCollection("ECMWF/ERA5_LAND/MONTHLY_AGGR").select("total_precipitation_sum").mean().reproject(crs=common_crs, scale=common_scale)

# Load SRTM elevation data
srtm = ee.Image('USGS/SRTMGL1_003')

# Calculate terrain derivatives (slope, aspect, etc.)
terrain = ee.Terrain.products(srtm).reproject(crs=common_crs, scale=common_scale)

# Flood Data
flood_collection = ee.ImageCollection("GLOBAL_FLOOD_DB/MODIS_EVENTS/V1") \
    .select("flooded") \
    .map(lambda img: img.unmask(0))  # Replace null/masked values with 0

flood = flood_collection.sum().reproject(crs=common_crs, scale=common_scale)

# flood_depth = ee.ImageCollection("JRC/CEMS_GLOFAS/FloodHazard/v1").select("depth").mean()

flood_depth_collection = ee.ImageCollection("JRC/CEMS_GLOFAS/FloodHazard/v1") \
    .select("depth") \
    .map(lambda img: img.unmask(0))  # Replace null/masked values with 0
flood_depth_mean = flood_depth_collection.mean().reproject(crs=common_crs, scale=common_scale)
flood_depth_max = flood_depth_collection.max().reproject(crs=common_crs, scale=common_scale)


In [3]:
gdf_protected = dgpd.read_parquet("../01_processed_data/protected_areas_reprojected.parquet").compute()
gdf_kba = gpd.read_file("../01_processed_data/philippines_kba.geojson")
gdf_spug = gpd.read_file("../01_processed_data/philippines_spug.geojson")
gdf_airports = gpd.read_file("../01_processed_data/philippines_airports.geojson")

# gdf_landcover = dgpd.read_parquet("../01_processed_data/land_cover_reprojected.parquet").compute()
# gdf_flood_5 = dgpd.read_parquet("../01_processed_data/flood_risk/FloodRisk_5yr_reprojected.parquet").compute()
# gdf_flood_25 = dgpd.read_parquet("../01_processed_data/flood_risk/FloodRisk_25yr_reprojected.parquet").compute()
# gdf_flood_100 = dgpd.read_parquet("../01_processed_data/flood_risk/FloodRisk_100yr_reprojected.parquet").compute()

faults_geom = gpd.read_file("../01_processed_data/faults_ph_geometry.geojson")

# gdf_power_line = gpd.read_file("01_processed_data/philippines_power_lines.geojson")
gdf_grid = gpd.read_file("../01_processed_data/philippines_grid.geojson")

residential_1 = gpd.read_file("../01_processed_data/residential_areas_part1.geojson")
residential_2 = gpd.read_file("../01_processed_data/residential_areas_part2.geojson")
residential = pd.concat([residential_1, residential_2], ignore_index=True)

main_roads_1 = gpd.read_file("../01_processed_data/philippines_main_roads_1.geojson")
main_roads_2 = gpd.read_file("../01_processed_data/philippines_main_roads_2.geojson")
gdf_main_roads = pd.concat([main_roads_1, main_roads_2], ignore_index=True)

  return ogr_read(


In [4]:
# Create a spatial index from residential geometries
res_geom_list = list(residential.geometry.values)  # ensure it's a plain list of geometries
res_tree = STRtree(res_geom_list)

# For each site, find the nearest residential polygon and compute distance
def nearest_distance(site_geom):
    if site_geom is None or site_geom.is_empty:
        return np.nan
    nearest_idx = res_tree.nearest(site_geom)
    nearest_geom = res_tree.geometries.take(nearest_idx)

    return site_geom.distance(nearest_geom)


# Create a spatial index from airport geometries
air_geom_list = list(gdf_airports.geometry.values)  # ensure it's a plain list of geometries
air_tree = STRtree(air_geom_list)

# For each site, find the nearest airport polygon and compute distance
def nearest_distance_airport(site_geom):
    if site_geom is None or site_geom.is_empty:
        return np.nan
    nearest_idx = air_tree.nearest(site_geom)
    nearest_geom = air_tree.geometries.take(nearest_idx)

    return site_geom.distance(nearest_geom)


# Create a spatial index from main road geometries
road_geom_list = list(gdf_main_roads.geometry.values)  # ensure it's a plain list of geometries
road_tree = STRtree(road_geom_list)

# For each site, find the nearest road polygon and compute distance
def nearest_distance_roads(site_geom):
    if site_geom is None or site_geom.is_empty:
        return np.nan
    nearest_idx = road_tree.nearest(site_geom)
    nearest_geom = road_tree.geometries.take(nearest_idx)

    return site_geom.distance(nearest_geom)

# -------------------------------------------------------


# PROCESS GEE DATA ------------------------------------------------------------------------------------------------------------------------------------

def create_feature_collection(df):
    features = [
        ee.Feature(ee.Geometry.Point(row['longitude'], row['latitude']), {'id': idx})
        for idx, row in df.iterrows()
    ]
    return ee.FeatureCollection(features)


# Function to extract values per point with reduceRegion (allows nulls)
def extract_bands_to_feature(point):
    reducers = ee.Reducer.first()  # or ee.Reducer.mean() if you want averaging

    # Combine all the bands into a single image
    combined = land_cover \
        .addBands(solar.rename("solar")) \
        .addBands(temp.rename("temp")) \
        .addBands(precip.rename("precip")) \
        .addBands(flood.rename("flood")) \
        .addBands(flood_depth_mean.rename("flood_mean")) \
        .addBands(flood_depth_max.rename("flood_max")) \
        .addBands(terrain.select("slope").rename("slope")) \
        .addBands(terrain.select("elevation").rename("elevation"))

    # Reduce each image at the point
    sampled = combined.reduceRegion(
        reducer=reducers,
        geometry=point.geometry(),
        scale=10,
        maxPixels=1e13
    )

    # Return the point with added properties (some may be null)
    return point.set(sampled)

In [5]:

# Extract Feature Data Function
def extract_GEE_values(df):
    fc_points = create_feature_collection(df)

    # Apply to all points
    sampled = fc_points.map(extract_bands_to_feature)


    results = sampled.getInfo()

    extracted = []
    for f in results['features']:
        props = f['properties']
        extracted.append({
            'id': props['id'],
            'land_cover': props.get('Map'),  # land cover code
            'Slope (deg)': props.get('slope'), # Slope (degrees)
            'Elevation (m)': props.get('elevation'), # Elevation (m)
            'Monthly Surface Solar Radiation (J/m²)': props.get('solar') if props.get('solar') > 5 else None, # Monthly Surface Solar Radiation (J/m²)
            'Mean 2m Temperature (°C)': props.get('temp') - 273.15 if props.get('temp') is not None else None, # Mean 2m Temperature (K)
            'Mean Monthly Precipitation (m)': props.get('precip'), # Mean Monthly Precipitation (m)
            'Flood Extent History': props.get('flood'), 
            'Mean Flood Depth (m)': props.get('flood_mean'), # Mean Flood Depth (m)
            'Max Flood Depth (m)': props.get('flood_max'), # Max Flood Depth (m)
            # 'Mean Flood Duration (days)': props.get('flood_dur_mean'), # Mean Flood Duration (days)
            # 'Max Flood Duration (days)': props.get('flood_dur_max'), # Max Flood Duration (days)
            # 'Mean Flood Extent (%)': props.get('flood_mean'), 
            # 'Max Flood Extent (%)': props.get('flood_max')
        })

    df['id'] = df.index
    extracted = pd.DataFrame(extracted)

    # Merge the extracted data with the original DataFrame
    df_results = pd.merge(df, extracted, on='id', how='left')
    # Drop the 'id' column
    df_results = df_results.drop(columns=['id'])

    # Decoding land cover codes
    land_labels = {
        10: "Tree Cover", 20: "Shrubland", 30: "Grassland", 40: "Cropland",
        50: "Built-up", 60: "Bare/Sparse Veg", 70: "Snow/Ice", 80: "Water",
        90: "Wetlands", 95: "Mangroves", 100: "Moss & Lichen"
    }

    df_results['Land Cover'] = df_results['land_cover'].map(land_labels)
    # drop land cover code
    df_results = df_results.drop(columns=['land_cover'])

    return df_results

In [6]:
def get_features(df):

    # Convert DataFrame to GeoDataFrame
    # print("Converting DataFrame to GeoDataFrame...")
    geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]
    gdf_points = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

    # Extract GEE values
    df = extract_GEE_values(gdf_points)

    gdf_points = gdf_points.to_crs(epsg=32651)


    # Get Min Distance to Fault Lines
    df["Min. Distance to Fault Line (m)"] = gdf_points.geometry.apply(
    lambda point: faults_geom.distance(point).min()
    )


    df['Min. Distance to Residential Areas (m)'] = gdf_points.geometry.apply(nearest_distance)


    # Get Min Distance to Airports
    df["Min. Distance to Airport (m)"] = gdf_points.geometry.apply(nearest_distance_airport)

    # Get Min Distance to Main Roads
    df["Min. Distance to Main Road (m)"] = gdf_points.geometry.apply(nearest_distance_roads)


    # drop geometry
    df = df.drop(columns=['geometry'])

    # Check if points are inside protected areas
    gdf_points = gdf_points.reset_index(drop=True)
    joined_gdf = gdf_points.sjoin(gdf_protected, how="left", predicate="intersects")
    joined_gdf = joined_gdf.sort_values(by='index_right', ascending=False)
    joined_gdf = joined_gdf[~joined_gdf.index.duplicated(keep='first')]
    gdf_points["in_protected_area"] = joined_gdf['index_right'].notnull()

    # Calculate distance to the nearest protected area and get its name
    def get_nearest_protected_area(point):
        if point is None or point.is_empty:
            return pd.Series([np.nan, np.nan])
        distances = gdf_protected.geometry.distance(point)
        nearest_idx = distances.idxmin()  # Get the index of the nearest protected area
        nearest_name = gdf_protected.loc[nearest_idx, 'NAME'] if not pd.isnull(nearest_idx) else np.nan
        nearest_distance = distances.min() if not pd.isnull(nearest_idx) else np.nan
        return pd.Series([nearest_distance, nearest_name])

    gdf_points[['distance_to_protected_area', 'nearest_protected_area_name']] = gdf_points.geometry.apply(get_nearest_protected_area)


    gdf_points = gdf_points.reset_index(drop=True)
    joined_gdf = gdf_points.sjoin(gdf_kba, how="left", predicate="intersects")
    joined_gdf = joined_gdf.sort_values(by='index_right', ascending=False)
    joined_gdf = joined_gdf[~joined_gdf.index.duplicated(keep='first')]
    gdf_points["in_KBA"] = joined_gdf['index_right'].notnull()

    def get_nearest_kba(point):
        if point is None or point.is_empty:
            return pd.Series([np.nan, np.nan])
        distances = gdf_kba.geometry.distance(point)
        nearest_idx = distances.idxmin()  # Get the index of the nearest protected area
        nearest_name = gdf_kba.loc[nearest_idx, 'NatName'] if not pd.isnull(nearest_idx) else np.nan
        nearest_distance = distances.min() if not pd.isnull(nearest_idx) else np.nan
        return pd.Series([nearest_distance, nearest_name])

    gdf_points[['distance_to_KBA', 'nearest_KBA_name']] = gdf_points.geometry.apply(get_nearest_kba)


    def get_nearest_grid(point):
        if point is None or point.is_empty:
            return pd.Series([np.nan, np.nan])
        distances = gdf_grid.geometry.distance(point)
        nearest_idx = distances.idxmin()  # Get the index of the nearest protected area
        nearest_name = gdf_grid.loc[nearest_idx, 'power'] if not pd.isnull(nearest_idx) else np.nan
        nearest_distance = distances.min() if not pd.isnull(nearest_idx) else np.nan
        return pd.Series([nearest_distance, nearest_name])

    gdf_points[['distance_to_grid', 'nearest_grid_type']] = gdf_points.geometry.apply(get_nearest_grid)



    gdf_points = gdf_points.reset_index(drop=True)
    joined_gdf = gdf_points.sjoin(gdf_spug, how="left", predicate="intersects")
    joined_gdf = joined_gdf.sort_values(by='index_right', ascending=False)
    joined_gdf = joined_gdf[~joined_gdf.index.duplicated(keep='first')]
    gdf_points["in_SPUG"] = joined_gdf['index_right'].notnull()

    def get_nearest_spug_area(point):
        if point is None or point.is_empty:
            return pd.Series([np.nan, np.nan])
        distances = gdf_spug.geometry.distance(point)
        nearest_idx = distances.idxmin()  # Get the index of the nearest protected area
        nearest_name = gdf_spug.loc[nearest_idx, 'adm4_en'] if not pd.isnull(nearest_idx) else np.nan
        nearest_distance = distances.min() if not pd.isnull(nearest_idx) else np.nan
        return pd.Series([nearest_distance, nearest_name])

    gdf_points[['distance_to_SPUG', 'nearest_SPUG_name']] = gdf_points.geometry.apply(get_nearest_spug_area)

    

    # Get land cover type (assuming land cover GeoDataFrame has a 'land_type' column)
    # print("Getting land cover type...")
    # gdf_points = gdf_points.sjoin(gdf_landcover[['geometry', 'class_id']], how="left", predicate="intersects")

    # Drop unnecessary index_right column from spatial join
    # gdf_points = gdf_points.drop(columns=['index_right'])

    # print("Getting flood risk...")
    # gdf_points = gdf_points.sjoin(gdf_flood_5[['geometry', 'FloodRisk']], how="left", predicate="intersects").fillna({'FloodRisk': 0}).rename(columns={'FloodRisk': 'FloodRisk_5'}).drop(columns=['index_right'], errors='ignore')
    # gdf_points = gdf_points.sjoin(gdf_flood_25[['geometry', 'FloodRisk']], how="left", predicate="intersects").fillna({'FloodRisk': 0}).rename(columns={'FloodRisk': 'FloodRisk_25'}).drop(columns=['index_right'], errors='ignore')
    # gdf_points = gdf_points.sjoin(gdf_flood_100[['geometry', 'FloodRisk']], how="left", predicate="intersects").fillna({'FloodRisk': 0}).rename(columns={'FloodRisk': 'FloodRisk_100'}).drop(columns=['index_right'], errors='ignore')


    # Convert 'in_predicted_area' to 1/0
    df['in_protected_area'] = gdf_points['in_protected_area']
    df['distance_to_protected_area'] = gdf_points['distance_to_protected_area']
    df['nearest_protected_area_name'] = gdf_points['nearest_protected_area_name']
    df['in_KBA'] = gdf_points['in_KBA']
    df['distance_to_KBA'] = gdf_points['distance_to_KBA']
    df['nearest_KBA_name'] = gdf_points['nearest_KBA_name']
    df['distance_to_grid'] = gdf_points['distance_to_grid']
    df['nearest_grid_type'] = gdf_points['nearest_grid_type']
    df['in_SPUG'] = gdf_points['in_SPUG']
    df['distance_to_SPUG'] = gdf_points['distance_to_SPUG']
    df['nearest_SPUG_name'] = gdf_points['nearest_SPUG_name']
 

    # Get Flood Risk
    # df['FloodRisk_5yr'] = gdf_points['FloodRisk_5'].astype(int).map(flood_risk_mapping)
    # df['FloodRisk_25yr'] = gdf_points['FloodRisk_25'].astype(int).map(flood_risk_mapping)
    # df['FloodRisk_100yr'] = gdf_points['FloodRisk_100'].astype(int).map(flood_risk_mapping)

    # Map class_id to land cover names
    # df['land_cover'] = gdf_points['class_id'].map(land_cover_mapping)

    rename_mapping = {
        'latitude': 'Latitude',
        'longitude': 'Longitude',
        'in_protected_area': 'In Protected Area?',
        'distance_to_protected_area': 'Min. Distance to Protected Area (m)',
        'nearest_protected_area_name': 'Nearest Protected Area',
        'in_KBA': 'In KBA?',
        'distance_to_KBA': 'Min. Distance to KBA (m)',
        'nearest_KBA_name': 'Nearest KBA',
        'distance_to_grid': 'Grid Proximity (m)',
        'nearest_grid_type': 'Nearest Grid Type',
        'in_SPUG': 'In SPUG Area?',
        'distance_to_SPUG': 'Min. Distance to SPUG Area (m)',
        'nearest_SPUG_name': 'Nearest SPUG Area',
    }

    df = df.rename(columns=rename_mapping)

    
    return df

In [None]:
# import pandas as pd
# from tqdm import tqdm

# # Load the data
# df = pd.read_csv('../00_data/pv_labeled_data.csv')

# # Make a copy and extract coordinates
# original_df = df.copy()
# df_coords = df[['latitude', 'longitude']]

# # Empty list to collect feature results
# all_features = []

# # Set batch size
# batch_size = 1000
# num_batches = (len(df_coords) + batch_size - 1) // batch_size

# # Loop over batches
# for i in tqdm(range(num_batches), desc="Processing batches"):
#     start = i * batch_size
#     end = start + batch_size
#     batch_df = df_coords.iloc[start:end]
    
#     try:
#         features = get_features(batch_df)
#         all_features.append(features)
#     except Exception as e:
#         print(f"Batch {i+1} failed: {e}")

# # Combine all features into a single DataFrame
# df_features = pd.concat(all_features, ignore_index=True)

In [10]:
import pandas as pd
from tqdm import tqdm

# Load the data
df = pd.read_csv('../00_data/pv_labeled_data_v2.csv')

# Make a copy and extract coordinates
original_df = df.copy()
df_coords = df[['latitude', 'longitude']]

# Empty list to collect feature results
all_features = []

# Set batch size
batch_size = 1000
num_batches = (len(df_coords) + batch_size - 1) // batch_size

# Loop over batches
for i in tqdm(range(num_batches), desc="Processing batches"):
    start = i * batch_size
    end = start + batch_size
    batch_df = df_coords.iloc[start:end]
    
    try:
        features = get_features(batch_df)
        all_features.append(features)
    except Exception as e:
        print(f"Batch {i+1} failed: {e}")

# Combine all features into a single DataFrame
df_features = pd.concat(all_features, ignore_index=True)
df_features

Processing batches: 100%|██████████| 22/22 [1:08:15<00:00, 186.18s/it]


Unnamed: 0,Latitude,Longitude,Slope (deg),Elevation (m),Monthly Surface Solar Radiation (J/m²),Mean 2m Temperature (°C),Mean Monthly Precipitation (m),Flood Extent History,Mean Flood Depth (m),Max Flood Depth (m),...,Min. Distance to Protected Area (m),Nearest Protected Area,In KBA?,Min. Distance to KBA (m),Nearest KBA,Grid Proximity (m),Nearest Grid Type,In SPUG Area?,Min. Distance to SPUG Area (m),Nearest SPUG Area
0,12.658645,120.416232,1.0,8.0,,,,0,0.0,0.0,...,0.000000,Apo Reef Natural Park,True,0.000000,Apo Reef Marine Natural Park,46718.946358,substation,False,75022.698324,Bintuan
1,16.307015,119.785458,11.0,65.0,6.039935e+08,26.958410,0.178895,0,0.0,0.0,...,570.009587,BBBIDA Pangasinan,False,60344.119443,Zambales mountains,11895.838867,substation,False,228535.238408,Dibet
2,14.961935,120.906647,2.0,24.0,5.488331e+08,26.598375,0.153467,0,0.0,0.0,...,23842.382956,Biak-na-Bato National Park,False,7715.920807,Manila Bay,5.522954,minor_line,False,140321.581257,Patnanungan Sur
3,7.070329,125.622841,5.0,4.0,5.478269e+08,26.007129,0.143056,1,0.0,0.0,...,10033.952651,"Mangrove From Baculin Point to Lakud Point, Fr...",False,24365.978379,Mount Apo,790.331571,substation,False,150832.041033,Bugso
4,11.642965,122.307870,3.0,24.0,5.353490e+08,25.601383,0.204691,0,0.0,0.0,...,10341.306087,Aklan River Watershed Forest Reserve,False,11507.178000,Central Panay mountains,887.206652,line,False,45100.527708,Batbatan Island
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21139,12.165591,122.033751,3.0,10.0,5.946163e+08,26.651680,0.233838,0,0.0,0.0,...,32464.539225,"DENR Antique, BFAR Antique, Office of the Prov...",False,33666.311751,North-west Panay peninsula (Pandan),22239.577893,line,False,13175.342365,Poblacion
21140,16.669444,122.167906,9.0,447.0,5.361585e+08,22.608339,0.230929,0,0.0,0.0,...,4446.815096,Northern Sierra Madre Natural Park,False,1870.250597,Northern Sierra Madre Natural Park,49119.590207,line,False,43812.678784,Dibet
21141,8.252855,125.185393,28.0,1380.0,5.508559e+08,20.477528,0.241044,0,0.0,0.0,...,21768.219537,"Patagonan daw Bahaw-bahaw (Agtulawon, Mintapod...",True,0.000000,Mount Tago Range,17262.140521,line,False,182117.288243,Cabulihan
21142,17.299043,120.933428,29.0,2316.0,5.359187e+08,18.256741,0.221814,0,0.0,0.0,...,20274.544329,Tongrayan Imong,True,0.000000,Balbalasang-Balbalan National Park and propose...,47893.353284,line,False,83038.442683,Poblacion


In [12]:
cols_to_front = ["Land Cover",
                         "In Protected Area?", "Min. Distance to Protected Area (m)", "Nearest Protected Area",
                         "In KBA?", "Min. Distance to KBA (m)", "Nearest KBA",
                         'Grid Proximity (m)', 'Nearest Grid Type',
                         "In SPUG Area?", "Min. Distance to SPUG Area (m)", "Nearest SPUG Area",
                         'Min. Distance to Residential Areas (m)',
                         'Min. Distance to Main Road (m)',
                         'Min. Distance to Airport (m)',
                         'Min. Distance to Fault Line (m)']
pred_cols = df_features[cols_to_front + [col for col in df_features.columns if col not in cols_to_front]].drop(columns=['Latitude', 'Longitude'])

df_final = pd.concat([df_features[['Latitude', 'Longitude']],
                    original_df.drop(columns=['latitude', 'longitude']),
                    pred_cols], axis=1)
df_final

Unnamed: 0,Latitude,Longitude,id,label,Land Cover,In Protected Area?,Min. Distance to Protected Area (m),Nearest Protected Area,In KBA?,Min. Distance to KBA (m),...,Min. Distance to Airport (m),Min. Distance to Fault Line (m),Slope (deg),Elevation (m),Monthly Surface Solar Radiation (J/m²),Mean 2m Temperature (°C),Mean Monthly Precipitation (m),Flood Extent History,Mean Flood Depth (m),Max Flood Depth (m)
0,12.658645,120.416232,2674178575,1,Tree Cover,True,0.000000,Apo Reef Natural Park,True,0.000000,...,63512.347025,89208.689449,1.0,8.0,,,,0,0.0,0.0
1,16.307015,119.785458,3268411673,1,Tree Cover,False,570.009587,BBBIDA Pangasinan,False,60344.119443,...,29240.303800,47254.328244,11.0,65.0,6.039935e+08,26.958410,0.178895,0,0.0,0.0
2,14.961935,120.906647,6380661297,1,Built-up,False,23842.382956,Biak-na-Bato National Park,False,7715.920807,...,1309.354101,29411.203523,2.0,24.0,5.488331e+08,26.598375,0.153467,0,0.0,0.0
3,7.070329,125.622841,6761083556,1,Built-up,False,10033.952651,"Mangrove From Baculin Point to Lakud Point, Fr...",False,24365.978379,...,2046.144229,8332.244091,5.0,4.0,5.478269e+08,26.007129,0.143056,1,0.0,0.0
4,11.642965,122.307870,8947255151,1,Built-up,False,10341.306087,Aklan River Watershed Forest Reserve,False,11507.178000,...,6970.687907,11298.036355,3.0,24.0,5.353490e+08,25.601383,0.204691,0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21139,12.165591,122.033751,unl_11284,0,Tree Cover,False,32464.539225,"DENR Antique, BFAR Antique, Office of the Prov...",False,33666.311751,...,,,3.0,10.0,5.946163e+08,26.651680,0.233838,0,0.0,0.0
21140,16.669444,122.167906,unl_11965,0,Tree Cover,False,4446.815096,Northern Sierra Madre Natural Park,False,1870.250597,...,,,9.0,447.0,5.361585e+08,22.608339,0.230929,0,0.0,0.0
21141,8.252855,125.185393,unl_5390,0,Tree Cover,False,21768.219537,"Patagonan daw Bahaw-bahaw (Agtulawon, Mintapod...",True,0.000000,...,,,28.0,1380.0,5.508559e+08,20.477528,0.241044,0,0.0,0.0
21142,17.299043,120.933428,unl_860,0,Tree Cover,False,20274.544329,Tongrayan Imong,True,0.000000,...,,,29.0,2316.0,5.359187e+08,18.256741,0.221814,0,0.0,0.0


In [13]:
df_final.to_csv('../00_data/positive_unlabeled_features_raw.csv', index=False)