<a href="https://colab.research.google.com/github/hessamhz/dasa/blob/main/Satellite_indices_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [7]:
import ee
from tqdm import tqdm
import time
from concurrent.futures import ThreadPoolExecutor

In [3]:
ffs_df = pd.read_csv("/content/drive/MyDrive/Google Colab/DASA/Datasets/GigaFFS/ffs_data_v_4.csv")

In [4]:
ffs_df.head()

Unnamed: 0,district,sub_division,village,ffs_cropping_system_name,ffsplot_major_crop_name,ffs_major_crop_date_of_sowing,created_at,pests,defenders,soil_condition,weather_condition,wind_conditions,weed_types_intensity,crop_conditions,census_code,days_after_sowing_in_numbers,latitude,longitude
0,Amravati,Achalpur,Borala,Intercropping,Cotton,2019-07-05,2019-09-09,Aphid,Crysopa spp,3,2,1,1,2,532989,66.0,21.291711,77.375508
1,Amravati,Amrawati,Antapur,Intercropping,Cotton,2019-07-04,2019-09-09,Aphid,Lady bird beetle,3,1,2,4,2,532800,67.0,20.906323,77.538296
2,Nanded,Deglur,Akharga,Intercropping,Cotton,2019-06-30,2019-09-28,Aphid,big eyed bugs,3,1,2,1,1,545472,90.0,18.545867,77.57627
3,Beed,Ambejogai,Kelgaon,Intercropping,Cotton,2019-06-24,2019-09-28,Aphid,Lady bird beetle,1,1,2,1,2,559693,96.0,18.741947,75.939066
4,Amravati,Achalpur,Naigaon,Intercropping,Cotton,2019-07-04,2019-09-28,No Pest,No Defender,3,1,2,1,3,532999,86.0,21.2001,77.49099


In [11]:
from google.colab import auth
auth.authenticate_user(project_id='wotrm-444909')

In [14]:
# Authenticate and initialize the Earth Engine API
ee.Initialize()

In [34]:
def get_indices(lat, lon, date, max_days_to_check=5):
    """
    This function checks for NDVI, NDMI, GCVI for a given latitude, longitude, and date.
    If no data is found, it will keep checking up to `max_days_to_check` days before and after the given date.
    """

    # load the dataset
    def load_dataset(start_date, end_date):
        return ee.ImageCollection('COPERNICUS/S2') \
                    .filterDate(start_date, end_date) \
                    .filterBounds(ee.Geometry.Point(lon, lat)) \
                    .first()

    start_date = ee.Date(date)
    end_date = start_date.advance(1, 'day')

    dataset = load_dataset(start_date, end_date)
    # If no data found, search for previous or future dates
    for i in range(1, max_days_to_check + 1):
        if dataset.getInfo() is None:
            # Check the previous day
            prev_start_date = start_date.advance(-i, 'day')
            prev_end_date = prev_start_date.advance(1, 'day')
            dataset = load_dataset(prev_start_date, prev_end_date)
            if dataset.getInfo() is None:
                # If still no data, check the next day
                next_start_date = start_date.advance(i, 'day')
                next_end_date = next_start_date.advance(1, 'day')
                dataset = load_dataset(next_start_date, next_end_date)
        # If dataset is available, break the loop
        if dataset.getInfo() is not None:
            break

    if dataset.getInfo() is None:
        return {}

    # calculate the indices using band names
    band_names = dataset.bandNames().getInfo()
    if 'B4' in band_names and 'B8' in band_names and 'B11' in band_names and 'B3' in band_names:
        ndvi = dataset.normalizedDifference(['B8', 'B4']).rename('NDVI')  # NIR (B8) and Red (B4)
        ndmi = dataset.normalizedDifference(['B8', 'B11']).rename('NDMI')  # NIR (B8) and SWIR (B11)
        gcvi = dataset.expression('b("B8") / b("B3") - 1').rename('GCVI')  # NIR (B8) and Green (B3)
        point = ee.Geometry.Point(lon, lat)

        indices = dataset.addBands([ndvi, ndmi, gcvi]).reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=point,
            scale=10,
            maxPixels=1e8
        )

        return indices.getInfo()
    else:
        return {}


In [16]:
# Function to retry with exponential backoff
def get_indices_with_backoff(lat, lon, date, retries=3, delay=1):
    for i in range(retries):
        try:
            result = get_indices(lat, lon, date)
            return result
        except ee.EEException:
            if i == retries - 1:
                print(f"Failed to fetch data for {lat}, {lon}, {date} after {retries} retries.")
                return None  # Give up after retries
            print(f"Retrying {lat}, {lon}, {date} after error...")
            time.sleep(delay)  # Wait before retrying
            delay *= 2

In [28]:
# Function to apply get_indices for each row in ffs1_df and collect the results
def extract_indices_for_row(row):
    lat = row['latitude']
    lon = row['longitude']
    date = row['created_at']

    # Get the indices using the backoff function
    indices_values = get_indices_with_backoff(lat, lon, date)

    if indices_values:
        return (indices_values.get('NDVI'), indices_values.get('NDMI'), indices_values.get('GCVI'))
    else:
        return (None, None, None)

In [71]:
def extract_indices_and_append(df):
    ndvi_values = []
    ndmi_values = []
    gcvi_values = []

    # parallel processing
    with ThreadPoolExecutor(max_workers=15) as executor:
        for result in tqdm(executor.map(extract_indices_for_row, [row for _, row in df.iterrows()]), total=df.shape[0], desc="Processing", unit="row"):
            ndvi_values.append(result[0])
            ndmi_values.append(result[1])
            gcvi_values.append(result[2])

    df['NDVI'] = ndvi_values
    df['NDMI'] = ndmi_values
    df['GCVI'] = gcvi_values

    df.to_csv('indices_result60000-last.csv', index=False)
    return df


In [72]:
# ffs100 = ffs_df.iloc[:100]
# ffs10000 = ffs_df.iloc[100:10000]
# ffs20000 = ffs_df.iloc[10000:20000]
# ffs30000 = ffs_df.iloc[20000:30000]
# ffs40000 = ffs_df.iloc[30000:40000]
ffs70000 = ffs_df.iloc[60000:]

In [None]:
indices_df = extract_indices_and_append(ffs70000)

In [75]:
indices_df.to_csv('/content/drive/MyDrive/Google Colab/DASA/Datasets/satellite_indices/indices60000-71900.csv', index=False)

In [76]:
df1_1 = pd.read_csv('/content/drive/MyDrive/Google Colab/DASA/Datasets/satellite_indices/indices100.csv')
df1_2 = pd.read_csv('/content/drive/MyDrive/Google Colab/DASA/Datasets/satellite_indices/indices100-1000.csv')
df2 = pd.read_csv('/content/drive/MyDrive/Google Colab/DASA/Datasets/satellite_indices/indices10000-20000.csv')
df3 = pd.read_csv('/content/drive/MyDrive/Google Colab/DASA/Datasets/satellite_indices/indices20000-30000.csv')
df4 = pd.read_csv('/content/drive/MyDrive/Google Colab/DASA/Datasets/satellite_indices/indices30000-40000.csv')
df5 = pd.read_csv('/content/drive/MyDrive/Google Colab/DASA/Datasets/satellite_indices/indices40000-50000.csv')
df6 = pd.read_csv('/content/drive/MyDrive/Google Colab/DASA/Datasets/satellite_indices/indices50000-60000.csv')
df7 = pd.read_csv('/content/drive/MyDrive/Google Colab/DASA/Datasets/satellite_indices/indices60000-71900.csv')

In [78]:
all_df = pd.concat([df1_1, df1_2, df2, df3, df4, df5, df6, df7], ignore_index=True)
all_df

Unnamed: 0,district,sub_division,village,ffs_cropping_system_name,ffsplot_major_crop_name,ffs_major_crop_date_of_sowing,created_at,pests,defenders,soil_condition,...,wind_conditions,weed_types_intensity,crop_conditions,census_code,days_after_sowing_in_numbers,latitude,longitude,NDVI,NDMI,GCVI
0,Amravati,Achalpur,Borala,Intercropping,Cotton,2019-07-05,2019-09-09,Aphid,Crysopa spp,3,...,1,1,2,532989,66.0,21.291711,77.375508,-0.009941,0.282712,0.004623
1,Amravati,Amrawati,Antapur,Intercropping,Cotton,2019-07-04,2019-09-09,Aphid,Lady bird beetle,3,...,2,4,2,532800,67.0,20.906323,77.538296,0.010619,0.209492,0.041197
2,Nanded,Deglur,Akharga,Intercropping,Cotton,2019-06-30,2019-09-28,Aphid,big eyed bugs,3,...,2,1,1,545472,90.0,18.545867,77.576270,0.153499,0.061971,0.376246
3,Beed,Ambejogai,Kelgaon,Intercropping,Cotton,2019-06-24,2019-09-28,Aphid,Lady bird beetle,1,...,2,1,2,559693,96.0,18.741947,75.939066,0.223633,0.121928,0.357730
4,Amravati,Achalpur,Naigaon,Intercropping,Cotton,2019-07-04,2019-09-28,No Pest,No Defender,3,...,2,1,3,532999,86.0,21.200100,77.490990,0.212296,0.096863,0.450888
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71895,Hingoli,Hingoli,Naukha tarf kalamnuri,Sole,Turmeric,2021-06-10,2021-12-15,White Grub,Dragonfly,1,...,1,1,2,546160,188.0,19.630750,77.430036,0.400752,0.251144,0.870438
71896,Hingoli,Hingoli,Walki,Sole,Turmeric,2021-06-20,2021-10-27,No Pest,No Defender,1,...,2,4,2,546020,129.0,19.556666,77.088103,0.320266,0.104862,0.699099
71897,Hingoli,Hingoli,Tuppa,Sole,Turmeric,2021-06-09,2021-10-27,White Grub,Spider,1,...,3,1,2,546159,140.0,19.632255,77.409950,0.317025,0.029990,0.804348
71898,Nanded,Nanded,Limbgaon,Sole,Turmeric,2021-06-15,2021-12-09,White Grub,Spider,1,...,1,1,2,544685,177.0,19.178827,77.202431,0.181190,0.047230,0.292086


In [80]:
all_df.to_csv('/content/drive/MyDrive/Google Colab/DASA/Datasets/GigaFFS/ffs_data_v_5.csv', index=False)