In [4]:
import os
import json
import sys
sys.path.append('..')

import numpy as np

import open_clip
import torch

sys.path.append('../satclip')
sys.path.append('../satclip/satclip')
import satclip
from satclip.load import get_satclip
from huggingface_hub import hf_hub_download

from utils import get_species_names, format_species_name_CLIP, get_species_embeddings, read_csv_non_utf, count_parameters

In [5]:
# Loading in general configuration
with open('../config.json', 'r') as f:
    config = json.load(f)

# Getting filepaths
gdrive_fp = config['gdrive_path']
LIFE_fp = config['LIFE_folder']
dataset_fp = config['datasets_path']

# Grabbing Benitez-Lopez
benitez_lopez2019 = config['indiv_data_paths']['benitez_lopez2019']
ben_lop_path = os.path.join(gdrive_fp, LIFE_fp, dataset_fp, benitez_lopez2019)

# Trying out BioCLIP and thinking about integration
- Relevant pages for `pytaxize` (to get taxonomic and common names)
   - [classifier class - get hierarchy from ID](https://sckott.github.io/pytaxize/modules/classification.html)
   - [taxonomic identifier class - get taxonomic ID from scientific name](https://sckott.github.io/pytaxize/modules/ids.html)
   - [`taxize` package documentation in R - original package](https://docs.ropensci.org/taxize/articles/taxize.html)
- Relevant pages for BioCLIP
   - [`open_clip` package documentation - base package](https://pypi.org/project/open-clip-torch/)
   - [BioCLIP model page on HuggingFace](https://huggingface.co/imageomics/bioclip) 

In [2]:
# Testing out on a toy dataset of scientific names
sci_names = ['Loxodonta africana', 'Odocoileus virginianus', 'Pandinus imperator']

# Reading in the pre-trained BioCLIP model
model, _, preprocess_val = open_clip.create_model_and_transforms('hf-hub:imageomics/bioclip')
tokenizer = open_clip.get_tokenizer('hf-hub:imageomics/bioclip')

# Extracting the relevant info from ITIS
full_names = []
for name in sci_names:
    full_names.append(get_species_names(name))

In [12]:
# Processing with BioCLIP
species_embeddings = get_species_embeddings(full_names, model, tokenizer, full_hierarchy = True, common_name = False)

In [13]:
# Checking out the embeddings and the name strings that were processed
for k, v in species_embeddings.items():
    print(f'Species {k} has embedding of shape {v['embedding'].shape}')
    print(v['names_used'])
    print()

Species Loxodonta africana has embedding of shape (512,)
['a photo of Animalia Chordata Mammalia Proboscidea Elephantidae Loxodonta africana']

Species Odocoileus virginianus has embedding of shape (512,)
['a photo of Animalia Chordata Mammalia Artiodactyla Cervidae Odocoileus virginianus']

Species Pandinus imperator has embedding of shape (512,)
['a photo of Animalia Arthropoda Euchelicerata Scorpiones Scorpionidae Pandinus imperator']



In [14]:
# Seeing if elephants are more similar to deer than to scorpions as a basic sanity check
print(species_embeddings['Loxodonta africana']['embedding'].dot(species_embeddings['Odocoileus virginianus']['embedding']))
print(species_embeddings['Loxodonta africana']['embedding'].dot(species_embeddings['Pandinus imperator']['embedding']))

0.64558005
0.4450716


# Trying out SatCLIP for location embeddings

It's a little unclear if we want to use the model with $L=50$ or $L=10$; as mentioned in the paper, the latter is better for large-scale patterns and spatial generalization, while the former is better at capturing fine-grained patterns.

In [6]:
# Loading a pre-trained SatCLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"

#  this only loads location encoder by default
model = get_satclip(
    hf_hub_download("microsoft/SatCLIP-ResNet50-L40", "satclip-resnet50-l40.ckpt"),
    device = device,
)
model.eval();

using pretrained moco resnet50


In [7]:
# Checking out the size of the model - seems relatively small!
count_parameters(model)

1213696

In [8]:
# Loading Benitez-Lopez and extracting coordinates - inputs are (longitude, latitude)
ben_lop2019 = read_csv_non_utf(ben_lop_path)
coords = torch.from_numpy(ben_lop2019[['X', 'Y']].values).to(device)
coords.shape

torch.Size([3281, 2])

In [9]:
# Processing using the pre-trained location embedder from SatCLIP
with torch.no_grad():
    coord_emb = model(coords).detach().cpu() # these don't seem to be normalized

coord_emb.shape # embedding shape is 256

torch.Size([3281, 256])