In [2]:
import numpy as np
species_data = np.load("../data/species.npy", allow_pickle=True)

In [9]:
species_dict = {species_data[i] : i  for i in range(len(species_data))}

In [8]:
species_dict.get(10)

'Junco hyemalis'

In [12]:
species_dict

{'Bombus griseocollis': 0,
 'Euchromius ocellea': 1,
 'Nycticorax nycticorax': 2,
 'Dryophytes versicolor': 3,
 'Branta canadensis': 4,
 'Accipiter cooperii': 5,
 'Papilio glaucus': 6,
 'Ardea alba': 7,
 'Xylocopa virginica': 8,
 'Toxomerus marginatus': 9,
 'Junco hyemalis': 10,
 'Helvella elastica': 11,
 'Stemonitis splendens': 12,
 'Hemerocallis fulva': 13,
 'Persicaria virginiana': 14,
 'Symphoricarpos orbiculatus': 15,
 'Alypia octomaculata': 16,
 'Lespedeza cuneata': 17,
 'Syringa vulgaris': 18,
 'Platycryptus undatus': 19,
 'Zonotrichia albicollis': 20,
 'Agelaius phoeniceus': 21,
 'Diospyros virginiana': 22,
 'Spinus tristis': 23,
 'Estigmene acrea': 24,
 'Catocala palaeogama': 25,
 'Lasiurus borealis': 26,
 'Canis latrans': 27,
 'Sciurus carolinensis': 28,
 'Coleomegilla maculata': 29,
 'Leonurus cardiaca': 30,
 'Anas platyrhynchos': 31,
 'Synanthedon rileyana': 32,
 'Callopistria floridensis': 33,
 'Trametes cinnabarina': 34,
 'Leptoglossus oppositus': 35,
 'Chondestes grammac

In [2]:
import pandas as pd
ica = pd.read_csv('/data/cher/EcoBound/data/ica.csv')

In [5]:
densely_sampled = ica[['lon', 'lat']]
densely_sampled.to_csv('/data/cher/EcoBound/data/densely_sampled_pts.csv')

In [None]:
import pandas as pd
import torch
import numpy as np
from tqdm import tqdm

from dataset import MapDataset

from model import SDM

experiment_name = "STL-loc-base"

# If specific species of interest
species = None # e.g. 'Sciurus carolinensis'

# Model for inference
model = SDM.load_from_checkpoint(f"../models/{experiment_name}.ckpt")
model.cuda().eval()

# Densely sampled dataset
mapdataset = MapDataset()
maploader = torch.utils.data.DataLoader(mapdataset, batch_size=128, shuffle=False, num_workers=16)

# Species data is necessary if doing specific species
if species:
    species_data = np.load("../data/species.npy", allow_pickle=True)
    species_index = np.where(species_data == species)[0][0]

# Run inference
species_probs = []
with torch.no_grad():
    for batch in tqdm(maploader):
        loc_feats = batch.cuda()
        if species:
            logits = model.forward_species(loc_feats, class_of_interest=species_index)
        else:
            logits = model.forward_species(loc_feats)
        probs = torch.sigmoid(logits).cpu().numpy()
        species_probs.append(probs)

species_probs = np.concatenate(species_probs, axis=0)

if species:
    np.save(f'../output/species_priors/{species}-{experiment_name}.npy', species_probs)
else:
    np.save(f'../output/species_priors/{experiment_name}.npy', species_probs)

100%|██████████| 2033/2033 [00:05<00:00, 390.35it/s]


In [None]:
# After your inference code...

# 1. Get coordinates from the dataset
coords_df = mapdataset.coords.reset_index(drop=True)[['lon', 'lat']]

# 2. Create DataFrame with probabilities
species_names = np.load("/data/cher/EcoBound/data/species.npy", allow_pickle=True)  # Load your species names
prob_df = pd.DataFrame(species_probs, columns=[f"prob_{name}" for name in species_names])

# 3. Combine coordinates with probabilities
results_df = pd.concat([coords_df, prob_df], axis=1)

# 4. Save to CSV
results_df.to_parquet('species_predictions.parquet', index=False)

In [8]:
results_df[['lon', 'lat', 'prob_Sciurus carolinensis']].to_csv('ecobound-squirrel_test.csv')

In [18]:
index

28