# Extract & consolidate remote sensing data - NDVI, NDWI, NDBI, LULC

In [None]:
# # import dependencies
# !pip install -r https://raw.githubusercontent.com/anujavenkatachalam04/chvi_vbd_rj/main/requirements.txt

In [46]:
import os
import pandas as pd
import geopandas as gpd
import requests
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# from google.colab import drive
# drive.mount('/content/drive')
import uuid
import ee
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import geemap
import json


In [4]:
# os.chdir("/content/drive/MyDrive/CHVI")

In [5]:
# --- Initialize Earth Engine ---
ee.Authenticate()
ee.Initialize()

In [6]:
# --- Paths ---
grid_path = "5_Shapefiles/Rajasthan_10kmx10km_grid.geojson"
output_folder = "1_Data/Remote_Sensing/Raw/"
os.makedirs(output_folder, exist_ok=True)

In [10]:
grid_path="Rajasthan_10kmx10km_grid.geojson"

In [11]:
# Import 10kmx10km grid
grid = gpd.read_file(grid_path)

In [12]:
grid.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [13]:
grid["grid_id"] = grid.index + 1  # unique ID

In [14]:
grid.sample() 

Unnamed: 0,NAME,DISTRICT,C_CODE11,geometry,grid_id
3072,Jhalrapatan,Jhalawar,812900679000000,POINT (76.39161 24.41006),3073


In [15]:
# --- Define monthly range ---
start_date = "2024-01-01"
end_date   = "2025-09-30"
date_range = pd.date_range(start=start_date, end=end_date, freq="M")

  date_range = pd.date_range(start=start_date, end=end_date, freq="M")


In [16]:
date_range

DatetimeIndex(['2024-01-31', '2024-02-29', '2024-03-31', '2024-04-30',
               '2024-05-31', '2024-06-30', '2024-07-31', '2024-08-31',
               '2024-09-30', '2024-10-31', '2024-11-30', '2024-12-31',
               '2025-01-31', '2025-02-28', '2025-03-31', '2025-04-30',
               '2025-05-31', '2025-06-30', '2025-07-31', '2025-08-31',
               '2025-09-30'],
              dtype='datetime64[ns]', freq='ME')

# Extract NDVI

In [None]:
import ee
import pandas as pd
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm



# -----------------------
# NDVI extraction per batch
# -----------------------
def get_ndvi_batch(batch_df, ee_start_date, max_retries=1):
    """Compute NDVI for one batch of grid cells.
       If Sentinel-2 NDVI missing ‚Üí record None for kriging later."""
    for attempt in range(max_retries):
        try:
            # Build FeatureCollection (2 km buffer around centroid)
            fc = ee.FeatureCollection([
                ee.Feature(
                    ee.Geometry.Point(list(row.geometry.coords)[0]).buffer(2000).bounds(),
                    {"grid_id": int(row["grid_id"]), "source": "Sentinel-2 SR Harmonized"}
                )
                for _, row in batch_df.iterrows()
            ])

            # Sentinel-2 Harmonized NDVI
            s2 = (ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
                  .filterDate(ee_start_date, ee_start_date.advance(1, "month"))
                  .filterBounds(fc)
                  .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 20))
                  .map(lambda img: img.normalizedDifference(["B8", "B4"]).rename("NDVI")))

            ndvi_img = s2.mean()
            reduced = ndvi_img.reduceRegions(collection=fc, reducer=ee.Reducer.mean(), scale=10)
            ndvi_list = reduced.getInfo().get("features", [])

            # If Sentinel-2 returns no NDVI values
            if not ndvi_list:
                print(f"‚ö†Ô∏è No Sentinel-2 data for {ee_start_date.format('YYYY-MM').getInfo()}")
                return [
                    {"grid_id": int(row["grid_id"]), "NDVI": None, "source": None}
                    for _, row in batch_df.iterrows()
                ]

            # Otherwise, return NDVI results
            return [
                {
                    "grid_id": f["properties"]["grid_id"],
                    "NDVI": f["properties"].get("mean"),
                    "source": f["properties"].get("source")
                }
                for f in ndvi_list
            ]

        except Exception as e:
            if attempt < max_retries - 1:
                print(f"‚ö†Ô∏è Retry {attempt + 1}/{max_retries} due to error: {e}")
                time.sleep(2)
                continue
            else:
                print(f"‚ùå Failed after {max_retries} attempts: {e}")
                # Return blank records for this batch
                return [
                    {"grid_id": int(row["grid_id"]), "NDVI": None, "source": None}
                    for _, row in batch_df.iterrows()
                ]


# -----------------------
# Monthly processor (parallel)
# -----------------------
def process_month_parallel(current_date, grid, output_folder, batch_size=200, max_workers=10):
    ee_start_date = ee.Date(current_date.strftime("%Y-%m-%d"))
    results = []

    total = len(grid)
    batches = [grid.iloc[i:i + batch_size] for i in range(0, total, batch_size)]

    print(f"\n NDVI processing for {current_date.strftime('%Y-%m')} "
          f"({len(batches)} batches, {max_workers} threads)")

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {
            executor.submit(get_ndvi_batch, batch, ee_start_date): idx
            for idx, batch in enumerate(batches)
        }

        for future in tqdm(as_completed(futures), total=len(futures)):
            batch_result = future.result()
            if batch_result:
                results.extend(batch_result)

    # Build dataframe
    df = pd.DataFrame(results)
    df["Date"] = current_date

    out = f"{output_folder}/ndvi/ndvi_{current_date.strftime('%Y-%m')}.csv"
    df.to_csv(out, index=False)
    print(f"‚úÖ Saved NDVI for {current_date.strftime('%Y-%m')} ({len(df)} records) ‚Üí {out}")


for dt in date_range:
    process_month_parallel(dt, grid, output_folder, batch_size=200, max_workers=10)



üåø NDVI processing for 2025-02 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:03<00:00,  3.55s/it]


‚úÖ Saved NDVI for 2025-02 (3420 records) ‚Üí NDVI/ndvi_2025-02.csv

üåø NDVI processing for 2025-03 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:18<00:00,  4.36s/it]


‚úÖ Saved NDVI for 2025-03 (3420 records) ‚Üí NDVI/ndvi_2025-03.csv

üåø NDVI processing for 2025-04 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:19<00:00,  4.39s/it]


‚úÖ Saved NDVI for 2025-04 (3420 records) ‚Üí NDVI/ndvi_2025-04.csv

üåø NDVI processing for 2025-05 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:39<00:00,  2.22s/it]


‚úÖ Saved NDVI for 2025-05 (3420 records) ‚Üí NDVI/ndvi_2025-05.csv

üåø NDVI processing for 2025-06 (18 batches, 10 threads)


 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 8/18 [00:10<00:09,  1.10it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 10/18 [00:10<00:04,  1.78it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 11/18 [00:11<00:03,  2.16it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:14<00:00,  1.21it/s]


‚úÖ Saved NDVI for 2025-06 (3420 records) ‚Üí NDVI/ndvi_2025-06.csv

üåø NDVI processing for 2025-07 (18 batches, 10 threads)


  6%|‚ñå         | 1/18 [00:01<00:19,  1.17s/it]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 11%|‚ñà         | 2/18 [00:02<00:18,  1.14s/it]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 17%|‚ñà‚ñã        | 3/18 [00:03<00:19,  1.27s/it]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 22%|‚ñà‚ñà‚ñè       | 4/18 [00:05<00:18,  1.29s/it]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 8/18 [00:07<00:06,  1.53it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 10/18 [00:07<00:03,  2.26it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.
‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.
‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 13/18 [00:08<00:01,  3.74it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:18<00:00,  1.04s/it]


‚úÖ Saved NDVI for 2025-07 (3420 records) ‚Üí NDVI/ndvi_2025-07.csv

üåø NDVI processing for 2025-08 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:45<00:00,  2.55s/it]


‚úÖ Saved NDVI for 2025-08 (3420 records) ‚Üí NDVI/ndvi_2025-08.csv

üåø NDVI processing for 2025-09 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:20<00:00,  4.48s/it]

‚úÖ Saved NDVI for 2025-09 (3420 records) ‚Üí NDVI/ndvi_2025-09.csv





In [None]:
# Consolidate all monthly files and merge into grid
# all_files = [f for f in os.listdir(output_folder) if f.startswith("ndvi

In [13]:
all_files = [f for f in os.listdir("ndvi") if f.startswith("ndvi_") and f.endswith(".csv")]

In [14]:
len(all_files)

21

In [None]:
# consolidating all files
ndvi_data=pd.DataFrame()
for file in all_files:
    df = pd.read_csv(os.path.join("ndvi", file))
    ndvi_data=pd.concat([ndvi_data, df], ignore_index=True)

In [None]:
# check that if ndvi is na, source is also na
ndvi_data.loc[ndvi_data["NDVI"].isna()]["source"].unique()

array(['Sentinel-2 SR Harmonized'], dtype=object)

In [None]:
# check that if ndvi is not na, source is Sentinel-2 SR Harmonized
ndvi_data.loc[~ndvi_data["NDVI"].isna()]["source"].unique()

In [29]:
ndvi_data.isna().sum()

grid_id        0
NDVI       10207
source     10207
Date           0
dtype: int64

In [30]:
# merging with grid
ndvi_merged = grid.merge(ndvi_data, on="grid_id", how="left")

In [38]:
# Check that ndvi nans have the same count as ndvi_data nans
ndvi_merged.isna().sum()/len(ndvi_merged)*100

NAME         0.000000
DISTRICT     0.000000
C_CODE11     0.000000
geometry     0.000000
grid_id      0.000000
NDVI        14.211919
source      14.211919
Date         0.000000
dtype: float64

In [None]:
ndvi_merged.to_file(f"1_Data/Remote_Sensing/Raw/ndvi_10kmx10km_grid.gpkg", layer="ndvi_data", driver="GPKG")

# NDWI

In [None]:
import ee
import pandas as pd
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm


# -----------------------
# NDWI extraction per batch
# -----------------------
def get_ndwi_batch(batch_df, ee_start_date, max_retries=1):
    """Compute NDWI for one batch of grid cells.
       If Sentinel-2 NDWI missing ‚Üí record None for kriging later."""
    for attempt in range(max_retries):
        try:
            # Build FeatureCollection (2 km buffer around centroid)
            fc = ee.FeatureCollection([
                ee.Feature(
                    ee.Geometry.Point(list(row.geometry.coords)[0]).buffer(2000).bounds(),
                    {"grid_id": int(row["grid_id"]), "source": "Sentinel-2 SR Harmonized"}
                )
                for _, row in batch_df.iterrows()
            ])

            # Sentinel-2 Harmonized NDWI
            s2 = (
                ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
                .filterDate(ee_start_date, ee_start_date.advance(1, "month"))
                .filterBounds(fc)
                .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 20))
                .map(lambda img: img.normalizedDifference(["B3", "B8"]).rename("NDWI"))
            )

            ndwi_img = s2.mean()
            reduced = ndwi_img.reduceRegions(collection=fc, reducer=ee.Reducer.mean(), scale=10)
            ndwi_list = reduced.getInfo().get("features", [])

            # If Sentinel-2 returns no NDWI values
            if not ndwi_list:
                print(f"‚ö†Ô∏è No Sentinel-2 NDWI data for {ee_start_date.format('YYYY-MM').getInfo()}")
                return [
                    {"grid_id": int(row["grid_id"]), "NDWI": None, "source": None}
                    for _, row in batch_df.iterrows()
                ]

            # Otherwise, return NDWI results
            return [
                {
                    "grid_id": f["properties"]["grid_id"],
                    "NDWI": f["properties"].get("mean"),
                    "source": f["properties"].get("source")
                }
                for f in ndwi_list
            ]

        except Exception as e:
            if attempt < max_retries - 1:
                print(f"‚ö†Ô∏è Retry {attempt + 1}/{max_retries} due to error: {e}")
                time.sleep(2)
                continue
            else:
                print(f"‚ùå Failed after {max_retries} attempts: {e}")
                # Return blank records for this batch
                return [
                    {"grid_id": int(row["grid_id"]), "NDWI": None, "source": None}
                    for _, row in batch_df.iterrows()
                ]


# -----------------------
# Monthly processor (parallel)
# -----------------------
def process_month_parallel(current_date, grid, output_folder, batch_size=200, max_workers=10):
    ee_start_date = ee.Date(current_date.strftime("%Y-%m-%d"))
    results = []

    total = len(grid)
    batches = [grid.iloc[i:i + batch_size] for i in range(0, total, batch_size)]

    print(f"\nüíß NDWI processing for {current_date.strftime('%Y-%m')} "
          f"({len(batches)} batches, {max_workers} threads)")

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {
            executor.submit(get_ndwi_batch, batch, ee_start_date): idx
            for idx, batch in enumerate(batches)
        }

        for future in tqdm(as_completed(futures), total=len(futures)):
            batch_result = future.result()
            if batch_result:
                results.extend(batch_result)

    # Build dataframe
    df = pd.DataFrame(results)
    df["Date"] = current_date

    out = f"{output_folder}/ndwi_{current_date.strftime('%Y-%m')}.csv"
    df.to_csv(out, index=False)
    print(f"‚úÖ Saved NDWI for {current_date.strftime('%Y-%m')} ({len(df)} records) ‚Üí {out}")



for dt in date_range:
    process_month_parallel(dt, grid, output_folder, batch_size=200, max_workers=10)



üíß NDWI processing for 2024-01 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:59<00:00,  3.28s/it]


‚úÖ Saved NDWI for 2024-01 (3420 records) ‚Üí NDVI/ndwi_2024-01.csv

üíß NDWI processing for 2024-02 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:49<00:00,  2.76s/it]


‚úÖ Saved NDWI for 2024-02 (3420 records) ‚Üí NDVI/ndwi_2024-02.csv

üíß NDWI processing for 2024-03 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:49<00:00,  2.74s/it]


‚úÖ Saved NDWI for 2024-03 (3420 records) ‚Üí NDVI/ndwi_2024-03.csv

üíß NDWI processing for 2024-04 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:26<00:00,  4.78s/it]


‚úÖ Saved NDWI for 2024-04 (3420 records) ‚Üí NDVI/ndwi_2024-04.csv

üíß NDWI processing for 2024-05 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:51<00:00,  2.89s/it]


‚úÖ Saved NDWI for 2024-05 (3420 records) ‚Üí NDVI/ndwi_2024-05.csv

üíß NDWI processing for 2024-06 (18 batches, 10 threads)


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 9/18 [00:13<00:06,  1.29it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 11/18 [00:15<00:05,  1.37it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:19<00:00,  1.08s/it]


‚úÖ Saved NDWI for 2024-06 (3420 records) ‚Üí NDVI/ndwi_2024-06.csv

üíß NDWI processing for 2024-07 (18 batches, 10 threads)


 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 10/18 [00:09<00:03,  2.65it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 11/18 [00:10<00:04,  1.51it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:15<00:00,  1.18it/s]


‚úÖ Saved NDWI for 2024-07 (3420 records) ‚Üí NDVI/ndwi_2024-07.csv

üíß NDWI processing for 2024-08 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:40<00:00,  2.25s/it]


‚úÖ Saved NDWI for 2024-08 (3420 records) ‚Üí NDVI/ndwi_2024-08.csv

üíß NDWI processing for 2024-09 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:03<00:00,  3.51s/it]


‚úÖ Saved NDWI for 2024-09 (3420 records) ‚Üí NDVI/ndwi_2024-09.csv

üíß NDWI processing for 2024-10 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:15<00:00,  4.21s/it]


‚úÖ Saved NDWI for 2024-10 (3420 records) ‚Üí NDVI/ndwi_2024-10.csv

üíß NDWI processing for 2024-11 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:19<00:00,  4.41s/it]


‚úÖ Saved NDWI for 2024-11 (3420 records) ‚Üí NDVI/ndwi_2024-11.csv

üíß NDWI processing for 2024-12 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:09<00:00,  3.85s/it]


‚úÖ Saved NDWI for 2024-12 (3420 records) ‚Üí NDVI/ndwi_2024-12.csv

üíß NDWI processing for 2025-01 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:58<00:00,  3.24s/it]


‚úÖ Saved NDWI for 2025-01 (3420 records) ‚Üí NDVI/ndwi_2025-01.csv

üíß NDWI processing for 2025-02 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:07<00:00,  3.73s/it]


‚úÖ Saved NDWI for 2025-02 (3420 records) ‚Üí NDVI/ndwi_2025-02.csv

üíß NDWI processing for 2025-03 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:21<00:00,  4.51s/it]


‚úÖ Saved NDWI for 2025-03 (3420 records) ‚Üí NDVI/ndwi_2025-03.csv

üíß NDWI processing for 2025-04 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:17<00:00,  4.29s/it]


‚úÖ Saved NDWI for 2025-04 (3420 records) ‚Üí NDVI/ndwi_2025-04.csv

üíß NDWI processing for 2025-05 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:53<00:00,  2.95s/it]


‚úÖ Saved NDWI for 2025-05 (3420 records) ‚Üí NDVI/ndwi_2025-05.csv

üíß NDWI processing for 2025-06 (18 batches, 10 threads)


 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 8/18 [00:10<00:09,  1.04it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 9/18 [00:11<00:08,  1.11it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 12/18 [00:12<00:03,  1.81it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:17<00:00,  1.04it/s]


‚úÖ Saved NDWI for 2025-06 (3420 records) ‚Üí NDVI/ndwi_2025-06.csv

üíß NDWI processing for 2025-07 (18 batches, 10 threads)


  6%|‚ñå         | 1/18 [00:01<00:22,  1.32s/it]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 11%|‚ñà         | 2/18 [00:03<00:28,  1.79s/it]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 17%|‚ñà‚ñã        | 3/18 [00:05<00:29,  1.97s/it]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 33%|‚ñà‚ñà‚ñà‚ñé      | 6/18 [00:07<00:11,  1.03it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 39%|‚ñà‚ñà‚ñà‚ñâ      | 7/18 [00:09<00:11,  1.05s/it]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 10/18 [00:09<00:03,  2.26it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 11/18 [00:09<00:03,  2.29it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.
‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 13/18 [00:11<00:02,  1.79it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:23<00:00,  1.33s/it]


‚úÖ Saved NDWI for 2025-07 (3420 records) ‚Üí NDVI/ndwi_2025-07.csv

üíß NDWI processing for 2025-08 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:41<00:00,  2.28s/it]


‚úÖ Saved NDWI for 2025-08 (3420 records) ‚Üí NDVI/ndwi_2025-08.csv

üíß NDWI processing for 2025-09 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:03<00:00,  3.55s/it]

‚úÖ Saved NDWI for 2025-09 (3420 records) ‚Üí NDVI/ndwi_2025-09.csv





In [43]:
all_files = [f for f in os.listdir("ndwi") if f.startswith("ndwi_") and f.endswith(".csv")]

In [51]:
# consolidating all files
ndwi_data=pd.DataFrame()
for file in all_files:
    df = pd.read_csv(os.path.join("ndwi", file))
    ndwi_data=pd.concat([ndwi_data, df], ignore_index=True)

In [53]:
# check that if ndvi is na, source is also na
ndwi_data.loc[ndwi_data["NDWI"].isna()]["source"].unique()

array([nan], dtype=object)

In [54]:
# check that if ndvi is not na, source is Sentinel-2 SR Harmonized
ndwi_data.loc[~ndwi_data["NDWI"].isna()]["source"].unique()

array(['Sentinel-2 SR Harmonized'], dtype=object)

In [55]:
ndwi_data.isna().sum()

grid_id        0
NDWI       10207
source     10207
Date           0
dtype: int64

In [57]:
# merging with grid
ndwi_merged = grid.merge(ndwi_data, on="grid_id", how="left")

In [59]:
# Check that ndvi nans have the same count as ndvi_data nans
ndwi_merged.isna().sum()

NAME            0
DISTRICT        0
C_CODE11        0
geometry        0
grid_id         0
NDWI        10207
source      10207
Date            0
dtype: int64

In [None]:
ndwi_merged.to_file(f"1_Data/Remote_Sensing/Raw/ndwi_10kmx10km_grid.gpkg", layer="ndwi_data", driver="GPKG")

# NDBI

In [None]:
import ee
import pandas as pd
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm


# -----------------------
# NDBI extraction per batch
# -----------------------
def get_ndbi_batch(batch_df, ee_start_date, max_retries=1):
    """Compute NDBI for one batch of grid cells.
       If Sentinel-2 NDBI missing ‚Üí record None for kriging later."""
    for attempt in range(max_retries):
        try:
            # Build FeatureCollection (2 km buffer around centroid)
            fc = ee.FeatureCollection([
                ee.Feature(
                    ee.Geometry.Point(list(row.geometry.coords)[0]).buffer(2000).bounds(),
                    {"grid_id": int(row["grid_id"]), "source": "Sentinel-2 SR Harmonized"}
                )
                for _, row in batch_df.iterrows()
            ])

            # Sentinel-2 Harmonized NDBI
            s2 = (
                ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
                .filterDate(ee_start_date, ee_start_date.advance(1, "month"))
                .filterBounds(fc)
                .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 20))
                .map(lambda img: img.normalizedDifference(["B11", "B8"]).rename("NDBI"))
            )

            ndbi_img = s2.mean()
            reduced = ndbi_img.reduceRegions(collection=fc, reducer=ee.Reducer.mean(), scale=10)
            ndbi_list = reduced.getInfo().get("features", [])

            # If Sentinel-2 returns no NDBI values
            if not ndbi_list:
                print(f"‚ö†Ô∏è No Sentinel-2 NDBI data for {ee_start_date.format('YYYY-MM').getInfo()}")
                return [
                    {"grid_id": int(row["grid_id"]), "NDBI": None, "source": None}
                    for _, row in batch_df.iterrows()
                ]

            # Otherwise, return NDBI results
            return [
                {
                    "grid_id": f["properties"]["grid_id"],
                    ""
                    "NDBI": f["properties"].get("mean"),
                    "source": f["properties"].get("source")
                }
                for f in ndbi_list
            ]

        except Exception as e:
            if attempt < max_retries - 1:
                print(f"‚ö†Ô∏è Retry {attempt + 1}/{max_retries} due to error: {e}")
                time.sleep(2)
                continue
            else:
                print(f"‚ùå Failed after {max_retries} attempts: {e}")
                # Return blank records for this batch
                return [
                    {"grid_id": int(row["grid_id"]), "NDBI": None, "source": None}
                    for _, row in batch_df.iterrows()
                ]


# -----------------------
# Monthly processor (parallel)
# -----------------------
def process_month_parallel(current_date, grid, output_folder, batch_size=200, max_workers=10):
    ee_start_date = ee.Date(current_date.strftime("%Y-%m-%d"))
    results = []

    total = len(grid)
    batches = [grid.iloc[i:i + batch_size] for i in range(0, total, batch_size)]

    print(f"\nüèôÔ∏è NDBI processing for {current_date.strftime('%Y-%m')} "
          f"({len(batches)} batches, {max_workers} threads)")

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {
            executor.submit(get_ndbi_batch, batch, ee_start_date): idx
            for idx, batch in enumerate(batches)
        }

        for future in tqdm(as_completed(futures), total=len(futures)):
            batch_result = future.result()
            if batch_result:
                results.extend(batch_result)

    # Build dataframe
    df = pd.DataFrame(results)
    df["Date"] = current_date

    out = f"{output_folder}/ndbi_{current_date.strftime('%Y-%m')}.csv"
    df.to_csv(out, index=False)
    print(f"‚úÖ Saved NDBI for {current_date.strftime('%Y-%m')} ({len(df)} records) ‚Üí {out}")


for dt in date_range[7:]:
    process_month_parallel(dt, grid, output_folder, batch_size=200, max_workers=10)



üèôÔ∏è NDBI processing for 2024-08 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:34<00:00,  1.90s/it]


‚úÖ Saved NDBI for 2024-08 (3420 records) ‚Üí NDVI/ndbi_2024-08.csv

üèôÔ∏è NDBI processing for 2024-09 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:54<00:00,  3.04s/it]


‚úÖ Saved NDBI for 2024-09 (3420 records) ‚Üí NDVI/ndbi_2024-09.csv

üèôÔ∏è NDBI processing for 2024-10 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:08<00:00,  3.80s/it]


‚úÖ Saved NDBI for 2024-10 (3420 records) ‚Üí NDVI/ndbi_2024-10.csv

üèôÔ∏è NDBI processing for 2024-11 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:04<00:00,  3.57s/it]


‚úÖ Saved NDBI for 2024-11 (3420 records) ‚Üí NDVI/ndbi_2024-11.csv

üèôÔ∏è NDBI processing for 2024-12 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:50<00:00,  2.78s/it]


‚úÖ Saved NDBI for 2024-12 (3420 records) ‚Üí NDVI/ndbi_2024-12.csv

üèôÔ∏è NDBI processing for 2025-01 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:47<00:00,  2.65s/it]


‚úÖ Saved NDBI for 2025-01 (3420 records) ‚Üí NDVI/ndbi_2025-01.csv

üèôÔ∏è NDBI processing for 2025-02 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:52<00:00,  2.92s/it]


‚úÖ Saved NDBI for 2025-02 (3420 records) ‚Üí NDVI/ndbi_2025-02.csv

üèôÔ∏è NDBI processing for 2025-03 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:20<00:00,  4.48s/it]


‚úÖ Saved NDBI for 2025-03 (3420 records) ‚Üí NDVI/ndbi_2025-03.csv

üèôÔ∏è NDBI processing for 2025-04 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:54<00:00,  3.05s/it]


‚úÖ Saved NDBI for 2025-04 (3420 records) ‚Üí NDVI/ndbi_2025-04.csv

üèôÔ∏è NDBI processing for 2025-05 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:40<00:00,  2.27s/it]


‚úÖ Saved NDBI for 2025-05 (3420 records) ‚Üí NDVI/ndbi_2025-05.csv

üèôÔ∏è NDBI processing for 2025-06 (18 batches, 10 threads)


 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 8/18 [00:07<00:06,  1.63it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 9/18 [00:08<00:05,  1.79it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 10/18 [00:08<00:03,  2.22it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:12<00:00,  1.43it/s]


‚úÖ Saved NDBI for 2025-06 (3420 records) ‚Üí NDVI/ndbi_2025-06.csv

üèôÔ∏è NDBI processing for 2025-07 (18 batches, 10 threads)


  6%|‚ñå         | 1/18 [00:01<00:20,  1.22s/it]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 11%|‚ñà         | 2/18 [00:02<00:20,  1.31s/it]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 17%|‚ñà‚ñã        | 3/18 [00:03<00:20,  1.34s/it]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 22%|‚ñà‚ñà‚ñè       | 4/18 [00:05<00:18,  1.33s/it]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 8/18 [00:06<00:05,  1.79it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 10/18 [00:07<00:02,  2.68it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.
‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


 72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 13/18 [00:07<00:01,  4.83it/s]

‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.
‚ùå Failed after 1 attempts: Image.reduceRegions: Image has no bands.


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:18<00:00,  1.04s/it]


‚úÖ Saved NDBI for 2025-07 (3420 records) ‚Üí NDVI/ndbi_2025-07.csv

üèôÔ∏è NDBI processing for 2025-08 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [00:42<00:00,  2.34s/it]


‚úÖ Saved NDBI for 2025-08 (3420 records) ‚Üí NDVI/ndbi_2025-08.csv

üèôÔ∏è NDBI processing for 2025-09 (18 batches, 10 threads)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 18/18 [01:08<00:00,  3.78s/it]

‚úÖ Saved NDBI for 2025-09 (3420 records) ‚Üí NDVI/ndbi_2025-09.csv





In [61]:
all_files = [f for f in os.listdir("ndbi") if f.startswith("ndbi_") and f.endswith(".csv")]

In [67]:
# consolidating all files
ndbi_data=pd.DataFrame()
for file in all_files:
    df = pd.read_csv(os.path.join("ndbi", file))
    ndbi_data=pd.concat([ndbi_data, df], ignore_index=True)

In [70]:
# check that if ndvi is na, source is also na
ndbi_data.loc[ndbi_data["NDBI"].isna()]["source"].unique()

array([nan], dtype=object)

In [71]:
ndbi_data.loc[~ndbi_data["NDBI"].isna()]["source"].unique()

array(['Sentinel-2 SR Harmonized'], dtype=object)

In [73]:
ndbi_data.isna().sum()

grid_id        0
NDBI       10207
source     10207
Date           0
dtype: int64

In [74]:
# merging with grid
ndbi_merged = grid.merge(ndbi_data, on="grid_id", how="left")

In [75]:
# Check that ndvi nans have the same count as ndvi_data nans
ndbi_merged.isna().sum()

NAME            0
DISTRICT        0
C_CODE11        0
geometry        0
grid_id         0
NDBI        10207
source      10207
Date            0
dtype: int64

In [None]:
ndbi_merged.to_file(f"1_Data/Remote_Sensing/Raw/ndbi_10kmx10km_grid.gpkg", layer="ndbi_data", driver="GPKG")

# lulc_data_data_data_data

In [None]:
# upload grid to ee
grid_fc = geemap.geojson_to_ee("rajasthan_10kmx10km_grid.geojson")


In [47]:
# ---------------------------------------------------------------------
# Dynamic World parameters
# ---------------------------------------------------------------------
dw = ee.ImageCollection("GOOGLE/DYNAMICWORLD/V1")
class_names = [
    "water", "trees", "grass", "flooded_vegetation", "crops",
    "shrub_and_scrub", "built", "bare", "snow_and_ice"
]

# ---------------------------------------------------------------------
# Loop through months and export locally as CSV
# ---------------------------------------------------------------------
for i in range(len(date_range)):
    start = ee.Date(date_range[i].strftime("%Y-%m-01"))
    end = start.advance(1, "month")
    month_str = date_range[i].strftime("%Y-%m")

    dw_month = dw.filterDate(start, end).select(class_names)

    if dw_month.size().getInfo() == 0:
        print(f"‚ö†Ô∏è No Dynamic World data for {month_str}")
        continue

    dw_mean = dw_month.mean()

    mean_stats = dw_mean.reduceRegions(
        collection=grid_fc,
        reducer=ee.Reducer.mean(),
        scale=100,
        crs='EPSG:4326'
    )

    mean_stats = mean_stats.map(lambda f: f.set("Date", month_str))

    try:
        features = mean_stats.getInfo()["features"]
    except Exception as e:
        print(f"‚ùå Failed to fetch {month_str}: {e}")
        continue

    rows = [f["properties"] for f in features]
    df = pd.DataFrame(rows)

    # save locally - then later merge all files
    output_path = f"LULC_{month_str}.csv"
    df.to_csv(output_path, index=False)
    print(f"‚úÖ Saved: {output_path}")


‚úÖ Saved: LULC_2024-01.csv
‚úÖ Saved: LULC_2024-02.csv
‚úÖ Saved: LULC_2024-03.csv
‚úÖ Saved: LULC_2024-04.csv
‚úÖ Saved: LULC_2024-05.csv
‚úÖ Saved: LULC_2024-06.csv
‚úÖ Saved: LULC_2024-07.csv
‚úÖ Saved: LULC_2024-08.csv
‚úÖ Saved: LULC_2024-09.csv
‚úÖ Saved: LULC_2024-10.csv
‚úÖ Saved: LULC_2024-11.csv
‚úÖ Saved: LULC_2024-12.csv
‚úÖ Saved: LULC_2025-01.csv
‚úÖ Saved: LULC_2025-02.csv
‚úÖ Saved: LULC_2025-03.csv
‚úÖ Saved: LULC_2025-04.csv
‚úÖ Saved: LULC_2025-05.csv
‚úÖ Saved: LULC_2025-06.csv
‚úÖ Saved: LULC_2025-07.csv
‚úÖ Saved: LULC_2025-08.csv
‚úÖ Saved: LULC_2025-09.csv


In [48]:
# Concat all files
all_files = [f for f in os.listdir() if f.startswith("LULC_") and f.endswith(".csv")]

In [49]:
len(all_files)

21

In [65]:
lulc_data=pd.DataFrame()
for file in all_files:
    df = pd.read_csv(f"lulc_raw_monthly_grid/{file}")
    lulc_data=pd.concat([lulc_data, df], ignore_index=True)
# lulc_data.to_csv("1_Data/Remote_Sensing/Raw/lulc_10kmx10km_grid.csv", index=False)

In [66]:
grid["C_CODE11"]=grid["C_CODE11"].astype(int)   

In [67]:
# build in geometry
lulc_data=lulc_data.merge(grid, how="left")

In [None]:
# ADD indicators
lulc_data['urbanization_index'] = lulc_data['built'] / (
    lulc_data['built'] + lulc_data['trees'] + lulc_data['grass'] + lulc_data['shrub_and_scrub']
)

lulc_data['surface_water_fraction'] = (
    lulc_data['water'] + lulc_data['flooded_vegetation']
)

lulc_data['agriculture_fraction'] = lulc_data['crops']

lulc_data['vegetation_cover'] = (
    lulc_data['trees'] + lulc_data['grass'] + lulc_data['shrub_and_scrub']
)

lulc_data['nonhabitat_fraction'] = lulc_data['bare'] + lulc_data['snow_and_ice']


In [69]:
lulc_data_gdf=gpd.GeoDataFrame(lulc_data)

In [72]:
len(lulc_data_gdf)

71820

In [None]:
lulc_data_gdf.to_file(f"1_Data/Remote_Sensing/Raw/lulc_10kmx10km_grid.gpkg", layer="lulc_data", driver="GPKG")

In [75]:
lulc_data_gdf.isna().sum()/len(lulc_data_gdf)*100

C_CODE11               0.00000
DISTRICT               0.00000
Date                   0.00000
NAME                   0.00000
bare                  13.84851
built                 13.84851
crops                 13.84851
flooded_vegetation    13.84851
grass                 13.84851
grid_id                0.00000
shrub_and_scrub       13.84851
snow_and_ice          13.84851
trees                 13.84851
water                 13.84851
geometry               0.00000
dtype: float64

In [None]:
# The End!