In [1]:
import ee

# Trigger the authentication flow.
# ee.Authenticate()

ee.Initialize(project='ee-arzaaan789')

In [2]:
import pandas as pd
from tqdm import tqdm

df = pd.read_csv("Erinaceus europaeus.csv", delimiter='\t')
df = df[df["occurrenceStatus"] == "PRESENT"]
df = df[df['year']>=2022]
# df['eventDate'] = df['eventDate'].str.replace('/','')
df['eventDate'] = pd.to_datetime(df['eventDate'], format='%Y-%m-%d', errors='coerce')
df = df[['species', 'decimalLatitude', 'decimalLongitude', 'eventDate']]
df = df.dropna().reset_index(drop=True)

  df = pd.read_csv("Erinaceus europaeus.csv", delimiter='\t')


In [3]:
import ee
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm

# ee.Initialize()

# Your dataframe 'df' must have columns: decimalLongitude, decimalLatitude
# Example: df = pd.read_csv("Erinaceus europaeus.csv", delimiter='\t')

def create_aoi(lon, lat, box_size_km=1):
    """Create approx 1km x 1km square polygon around lon, lat."""
    half_side_deg = box_size_km / 111.32 / 2  # Rough approx degrees per km

    coords = [
        [lon - half_side_deg, lat - half_side_deg],
        [lon + half_side_deg, lat - half_side_deg],
        [lon + half_side_deg, lat + half_side_deg],
        [lon - half_side_deg, lat + half_side_deg],
        [lon - half_side_deg, lat - half_side_deg]
    ]
    return ee.Geometry.Polygon(coords)

def compute_all_indices(feature, start_date, end_date):
    """Compute spectral indices and LST for one EE Feature (with AOI geometry)."""
    aoi = feature.geometry()

    s2 = (ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
          .filterBounds(aoi)
          .filterDate(start_date, end_date)
          .sort('CLOUDY_PIXEL_PERCENTAGE')
          .first())
    s2 = ee.Image(s2).clip(aoi)

    blue = s2.select('B2')
    green = s2.select('B3')
    red = s2.select('B4')
    nir = s2.select('B8')
    swir = s2.select('B11')

    L = 0.5  # SAVI constant

    ndvi = nir.subtract(red).divide(nir.add(red)).rename('NDVI')
    ndwi = green.subtract(nir).divide(green.add(nir)).rename('NDWI')
    ndbi = swir.subtract(nir).divide(swir.add(nir)).rename('NDBI')
    savi = nir.subtract(red).divide(nir.add(red).add(L)).multiply(1 + L).rename('SAVI')
    mndwi = green.subtract(swir).divide(green.add(swir)).rename('MNDWI')
    ndsi = green.subtract(swir).divide(green.add(swir)).rename('NDSI')
    bsi = (red.add(blue).subtract(nir.add(swir))).divide(red.add(blue).add(nir).add(swir)).rename('BSI')
    ui = nir.subtract(swir).divide(nir.add(swir)).rename('UI')

    reducers = ee.Reducer.mean()
    scale_10m = 10

    ndvi_mean = ndvi.reduceRegion(reducers, aoi, scale_10m).get('NDVI')
    ndwi_mean = ndwi.reduceRegion(reducers, aoi, scale_10m).get('NDWI')
    ndbi_mean = ndbi.reduceRegion(reducers, aoi, scale_10m).get('NDBI')
    savi_mean = savi.reduceRegion(reducers, aoi, scale_10m).get('SAVI')
    mndwi_mean = mndwi.reduceRegion(reducers, aoi, scale_10m).get('MNDWI')
    ndsi_mean = ndsi.reduceRegion(reducers, aoi, scale_10m).get('NDSI')
    bsi_mean = bsi.reduceRegion(reducers, aoi, scale_10m).get('BSI')
    ui_mean = ui.reduceRegion(reducers, aoi, scale_10m).get('UI')

    # MODIS LST dataset
    modis = (ee.ImageCollection("MODIS/061/MOD11A1")
             .filterBounds(aoi)
             .filterDate(start_date, end_date)
             .select('LST_Day_1km'))

    lst_mean_img = modis.mean().multiply(0.02).clip(aoi)

    lst_mean = lst_mean_img.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=aoi,
        scale=1000
    ).get('LST_Day_1km')

    return feature.set({
        'NDVI': ndvi_mean,
        'NDWI': ndwi_mean,
        'NDBI': ndbi_mean,
        'SAVI': savi_mean,
        'MNDWI': mndwi_mean,
        'NDSI': ndsi_mean,
        'BSI': bsi_mean,
        'UI': ui_mean,
        'LST': lst_mean
    })

# Split df into batches
batch_size = 50
batches = [df.iloc[i:i+batch_size] for i in range(0, len(df), batch_size)]

results_list = []

index_names = ['NDVI', 'NDWI', 'NDBI', 'SAVI', 'MNDWI', 'NDSI', 'BSI', 'UI', 'LST']

for batch in tqdm(batches, desc="Processing batches"):
    features = []
    batch_indices = []
    feature_metadata = {}

    for idx, row in batch.iterrows():
        
        aoi = create_aoi(row['decimalLongitude'], row['decimalLatitude'])
        feature = ee.Feature(aoi).set('index', idx)
        features.append(feature)
        batch_indices.append(idx)  # Save the original index
        
        event_date = row['eventDate']
        start_date = (event_date - timedelta(days=15)).strftime('%Y-%m-%d')
        end_date = (event_date + timedelta(days=15)).strftime('%Y-%m-%d')
        feature_metadata[idx] = (start_date, end_date)

    # Create a FeatureCollection from the list of features
    fc = ee.FeatureCollection(features)
    
    # Define wrapper for map to inject per-feature dates
    def map_with_dates(f):
        idx = f.get('index')
        # Use dictionary lookup to get dates for this feature
        date_dict = ee.Dictionary(ee.Dictionary(feature_metadata))
        dates = ee.List(date_dict.get(ee.Number(idx)))
        return compute_all_indices(f, dates.get(0), dates.get(1))

    try:
        result_fc = fc.map(map_with_dates)
        results = result_fc.getInfo()

        rows = []
        for f in results['features']:
            props = f['properties']
            rows.append(props)

    except Exception as e:
        print(f"Error processing batch starting at index {batch.index[0]}: {e}")
        # If there's an error, create placeholder rows with None
        rows = [{'index': i, **{name: None for name in index_names}} for i in batch_indices]

    batch_results_df = pd.DataFrame(rows).sort_values('index').reset_index(drop=True)
    results_list.append(batch_results_df)

# Concatenate all batches and sort by original index
all_results_df = pd.concat(results_list).sort_values('index').reset_index(drop=True)

# Merge with original df
df_final = pd.concat([df.reset_index(drop=True), all_results_df.drop(columns=['index'])], axis=1)

print(df_final.head())


Processing batches: 100%|██████████| 924/924 [50:10<00:00,  3.26s/it]  

               species  decimalLatitude  decimalLongitude  eventDate  \
0  Erinaceus europaeus        52.835848         -0.947492 2022-06-19   
1  Erinaceus europaeus        52.818132         -0.977597 2023-05-13   
2  Erinaceus europaeus        52.689385         -0.684702 2022-06-12   
3  Erinaceus europaeus        52.727454         -0.890871 2022-02-25   
4  Erinaceus europaeus        52.710769         -1.039317 2022-07-13   

        BSI         LST     MNDWI      NDBI      NDSI      NDVI      NDWI  \
0 -0.736822  297.235328 -0.500073 -0.255981 -0.500073  0.743785 -0.663119   
1 -0.674325  293.872242 -0.457554 -0.257522 -0.457554  0.657087 -0.615822   
2 -0.643783  294.579695 -0.459196 -0.202234 -0.459196  0.583621 -0.591600   
3 -0.566207  282.179633 -0.483596 -0.024255 -0.483596  0.473914 -0.483627   
4 -0.712308  304.672511 -0.574857 -0.196281 -0.574857  0.668698 -0.681336   

       SAVI        UI  
0  1.115571  0.255981  
1  0.985519  0.257522  
2  0.875334  0.202234  
3  0.710




In [4]:
df_final.to_csv("hedgehog_full_data.csv", index=False)
df = pd.read_csv("hedgehog_full_data.csv")
df['eventDate'] = pd.to_datetime(df['eventDate'], format='%Y-%m-%d', errors='coerce')

df = df.dropna().reset_index(drop=True)

badgers = pd.read_csv("Meles meles.csv", delimiter='\t')
badgers = badgers[badgers["occurrenceStatus"] == "PRESENT"]
badgers = badgers[badgers['year']>=2022]
badgers['eventDate'] = pd.to_datetime(badgers['eventDate'], format='%Y-%m-%d', errors='coerce')
badgers = badgers[['species', 'decimalLatitude', 'decimalLongitude', 'eventDate']]
badgers = badgers.dropna().reset_index(drop=True)

ground_beetles = pd.read_csv("ground_beetles.csv", delimiter='\t')
ground_beetles = ground_beetles[ground_beetles["occurrenceStatus"] == "PRESENT"]
ground_beetles = ground_beetles[ground_beetles['year']>=2022]
ground_beetles['eventDate'] = pd.to_datetime(ground_beetles['eventDate'], format='%Y-%m-%d', errors='coerce')
ground_beetles = ground_beetles[['species', 'decimalLatitude', 'decimalLongitude', 'eventDate']]
ground_beetles = ground_beetles.dropna().reset_index(drop=True)

  badgers = pd.read_csv("Meles meles.csv", delimiter='\t')
  ground_beetles = pd.read_csv("ground_beetles.csv", delimiter='\t')


In [5]:
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree
from tqdm import tqdm

# Convert lat/lon to radians for BallTree
hedgehog_coords = np.deg2rad(df[['decimalLatitude', 'decimalLongitude']].values)
badger_coords = np.deg2rad(badgers[['decimalLatitude', 'decimalLongitude']].values)
ground_beetles_coords = np.deg2rad(ground_beetles[['decimalLatitude', 'decimalLongitude']].values)

# Build BallTrees
tree_badger = BallTree(badger_coords, metric='haversine')
tree_ground_beetles = BallTree(ground_beetles_coords, metric='haversine')

# 1 km radius in radians
radius = 1 / 6371.0

# Initialize presence columns
df['badger_presence'] = 0
df['ground_beetles_presence'] = 0

# Iterate through each hedgehog point with tqdm for progress tracking
for i in tqdm(range(len(df))):
    point = hedgehog_coords[i].reshape(1, -1)
    event_month = df.loc[i, 'eventDate'].month
    event_year = df.loc[i, 'eventDate'].year

    # BADGERS
    idxs = tree_badger.query_radius(point, r=radius)[0]
    for j in idxs:
        badger_date = badgers.loc[j, 'eventDate']
        if badger_date.month == event_month and badger_date.year == event_year:
            df.at[i, 'badger_presence'] = 1
            break  # Found at least one match, no need to check further

    # GROUND BEETLES
    idxs = tree_ground_beetles.query_radius(point, r=radius)[0]
    for j in idxs:
        beetle_date = ground_beetles.loc[j, 'eventDate']
        if beetle_date.month == event_month and beetle_date.year == event_year:
            df.at[i, 'ground_beetles_presence'] = 1
            break

100%|██████████| 45912/45912 [00:04<00:00, 10843.78it/s]


In [6]:
import osmnx as ox
from shapely.geometry import Point
import geopandas as gpd
import numpy as np
from tqdm import tqdm

# Construct GeoDataFrame
df['geometry'] = [Point(xy) for xy in zip(df['decimalLongitude'], df['decimalLatitude'])]
gdf = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326')

# Define spatial tiling: 0.05 x 0.05 degrees
tile_size = 0.1
gdf['tile_x'] = (gdf['decimalLongitude'] // tile_size).astype(int)
gdf['tile_y'] = (gdf['decimalLatitude'] // tile_size).astype(int)

# Group by tile
grouped = gdf.groupby(['tile_x', 'tile_y'])

# Store results
all_distances = []

for (tile_x, tile_y), group in tqdm(grouped, total=len(grouped)):
    west = tile_x * tile_size - 0.02
    south = tile_y * tile_size - 0.02
    east = (tile_x + 1) * tile_size + 0.02
    north = (tile_y + 1) * tile_size + 0.02
    bbox = (west, south, east, north)

    try:
        G = ox.graph_from_bbox(bbox, network_type='drive_service')
        if len(G.nodes) == 0:
            print(f"[EMPTY GRAPH] bbox: {bbox}, skipping...")
            all_distances.extend([np.nan] * len(group))
            continue

        G_proj = ox.project_graph(G)
        nodes_proj, edges_proj = ox.graph_to_gdfs(G_proj)
        points_proj = group.geometry.to_crs(nodes_proj.crs)

        for point_proj in points_proj:
            try:
                u, v, k = ox.distance.nearest_edges(G_proj, [point_proj.x], [point_proj.y])[0]
                edge_geom = edges_proj.loc[(u, v, k)]['geometry']
                distance = point_proj.distance(edge_geom)
                all_distances.append(distance)
            except Exception as e:
                print(f"  [Point ERROR] {e}")
                all_distances.append(np.nan)

    except Exception as e:
        print(f"[TILE ERROR] {e} — bbox: {bbox}")
        all_distances.extend([np.nan] * len(group))


# Store back in original DataFrame order
gdf['distance_to_road'] = all_distances

  8%|▊         | 207/2551 [07:04<1:15:55,  1.94s/it] 

[TILE ERROR] No data elements in server response. Check query location/filters and log. — bbox: (np.float64(-5.32), np.float64(57.18), np.float64(-5.180000000000001), np.float64(57.32000000000001))


100%|██████████| 2551/2551 [3:06:08<00:00,  4.38s/it]   


In [7]:
gdf['near_road'] = np.where(gdf['distance_to_road'] <= 500, 1, 0)        

In [8]:
gdf=gdf.drop(['tile_x', 'tile_y', 'geometry', 'distance_to_road'], axis=1)

In [9]:
from pyproj import Transformer
from rasterio.windows import Window
import rasterio

land_cover_map = {
    1: "Deciduous woodland",
    2: "Coniferous woodland",
    3: "Arable",
    4: "Improved grassland",
    5: "Neutral grassland",
    6: "Calcareous grassland",
    7: "Acid grassland",
    8: "Fen",
    9: "Heather",
    10: "Heather grassland",
    11: "Bog",
    12: "Inland rock",
    13: "Saltwater",
    14: "Freshwater",
    15: "Supralittoral rock",
    16: "Supralittoral sediment",
    17: "Littoral rock",
    18: "Littoral sediment",
    19: "Saltmarsh",
    20: "Urban",
    21: "Suburban"
}

# Batch coordinate transformation
transformer_ni = Transformer.from_crs("EPSG:4326", "EPSG:29903", always_xy=True)
transformer_gb = Transformer.from_crs("EPSG:4326", "EPSG:27700", always_xy=True)

coords = list(zip(gdf['decimalLongitude'], gdf['decimalLatitude']))
gdf['easting_ni'], gdf['northing_ni'] = zip(*transformer_ni.itransform(coords))
gdf['easting_gb'], gdf['northing_gb'] = zip(*transformer_gb.itransform(coords))

# Raster processing optimization
gb_raster = 'gblcm2023_10m.tif'
n_ireland_raster = 'nilcm2023_10m.tif'


def get_land_cover_class(row):
    try:
        # Try GB raster first
        with rasterio.open(gb_raster) as src:
            row_idx, col_idx = src.index(row['easting_gb'], row['northing_gb'])
            # Read a small window around the point for better performance
            window = Window(col_idx, row_idx, 1, 1)
            land_cover_class = src.read(1, window=window)[0, 0]

            if land_cover_class == 0:  # Check NI raster if GB is 0
                with rasterio.open(n_ireland_raster) as src_ni:
                    row_idx, col_idx = src_ni.index(row['easting_ni'], row['northing_ni'])
                    window = Window(col_idx, row_idx, 1, 1)
                    land_cover_class = src_ni.read(1, window=window)[0, 0]

        return land_cover_map.get(land_cover_class, "Unknown")
    except Exception as e:
        print(f"Error processing row: {e}")
        return "Unknown"


tqdm.pandas()
gdf['Land_cover'] = gdf.progress_apply(get_land_cover_class, axis=1)


  4%|▎         | 1720/45912 [00:04<01:41, 436.66it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


  6%|▋         | 2955/45912 [00:07<01:35, 451.89it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 30%|██▉       | 13744/45912 [00:30<01:15, 425.71it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 31%|███       | 14082/45912 [00:31<01:16, 414.49it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 31%|███       | 14206/45912 [00:31<01:19, 397.48it/s]

Error processing row: index 0 is out of bounds for axis 1 with size 0


 33%|███▎      | 15013/45912 [00:33<01:13, 421.43it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0
Error processing row: index 0 is out of bounds for axis 0 with size 0


 36%|███▌      | 16360/45912 [00:36<01:08, 432.36it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 36%|███▋      | 16710/45912 [00:37<01:08, 427.78it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 37%|███▋      | 16926/45912 [00:37<01:08, 425.12it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0
Error processing row: index 0 is out of bounds for axis 0 with size 0


 41%|████      | 18725/45912 [00:42<01:02, 434.22it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 41%|████▏     | 18990/45912 [00:42<01:02, 433.41it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 42%|████▏     | 19077/45912 [00:42<01:03, 424.72it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 44%|████▍     | 20306/45912 [00:45<00:59, 430.21it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 45%|████▌     | 20703/45912 [00:46<00:58, 433.91it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 46%|████▋     | 21277/45912 [00:48<00:56, 432.26it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 48%|████▊     | 21895/45912 [00:49<00:55, 433.90it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 48%|████▊     | 21984/45912 [00:49<00:55, 431.32it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 52%|█████▏    | 24018/45912 [00:54<00:50, 436.30it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0
Error processing row: index 0 is out of bounds for axis 0 with size 0


 53%|█████▎    | 24149/45912 [00:54<00:50, 428.53it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0
Error processing row: index 0 is out of bounds for axis 0 with size 0


 54%|█████▍    | 24772/45912 [00:56<00:48, 439.37it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 56%|█████▌    | 25711/45912 [00:58<00:45, 439.53it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 58%|█████▊    | 26752/45912 [01:00<00:45, 425.21it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 61%|██████    | 27778/45912 [01:03<00:41, 434.64it/s]

Error processing row: index 0 is out of bounds for axis 1 with size 0


 61%|██████    | 28041/45912 [01:03<00:41, 433.05it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 62%|██████▏   | 28305/45912 [01:04<00:40, 429.98it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 64%|██████▎   | 29197/45912 [01:06<00:37, 443.61it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 68%|██████▊   | 31268/45912 [01:11<00:33, 436.07it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 69%|██████▊   | 31544/45912 [01:11<00:31, 455.21it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 70%|██████▉   | 32051/45912 [01:12<00:30, 457.44it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 71%|███████   | 32698/45912 [01:14<00:28, 456.02it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 71%|███████▏  | 32790/45912 [01:14<00:29, 447.61it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 72%|███████▏  | 33109/45912 [01:15<00:28, 446.08it/s]

Error processing row: index 0 is out of bounds for axis 1 with size 0


 72%|███████▏  | 33245/45912 [01:15<00:28, 444.47it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 74%|███████▍  | 33889/45912 [01:16<00:26, 455.67it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 75%|███████▌  | 34626/45912 [01:18<00:25, 449.35it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 76%|███████▌  | 34900/45912 [01:19<00:24, 449.15it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 76%|███████▋  | 35036/45912 [01:19<00:24, 438.94it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 77%|███████▋  | 35536/45912 [01:20<00:23, 447.52it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 81%|████████  | 37291/45912 [01:24<00:18, 454.27it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 82%|████████▏ | 37706/45912 [01:25<00:18, 455.12it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 84%|████████▎ | 38442/45912 [01:27<00:16, 455.30it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 84%|████████▍ | 38718/45912 [01:27<00:15, 452.46it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 90%|████████▉ | 41174/45912 [01:32<00:10, 457.88it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0


 97%|█████████▋| 44380/45912 [01:40<00:03, 455.51it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0
Error processing row: index 0 is out of bounds for axis 0 with size 0
Error processing row: index 0 is out of bounds for axis 0 with size 0
Error processing row: index 0 is out of bounds for axis 0 with size 0


 99%|█████████▊| 45316/45912 [01:42<00:01, 431.04it/s]

Error processing row: index 0 is out of bounds for axis 0 with size 0
Error processing row: index 0 is out of bounds for axis 0 with size 0


100%|██████████| 45912/45912 [01:44<00:00, 440.54it/s]


In [10]:
gdf = gdf.dropna()
gdf=gdf.drop(['easting_ni', 'northing_ni', 'easting_gb', 'northing_gb'],axis=1)
# Remove where Land_cover is Unknown
gdf = gdf[gdf['Land_cover'] != "Unknown"].reset_index(drop=True)
gdf.to_csv("hedgehog_final_data.csv", index=False)