In [59]:
import torch
import sys
import itertools
from functools import partial
from typing import Dict

from tqdm import tqdm
import clip
sys.path.append('..')
from lidarclip.anno_loader import build_anno_loader, CLASSES, WEATHERS
from lidarclip.helpers import MultiLoader, try_paths, logit_img_txt
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [3]:
from lidarclip.prompts import OBJECT_PROMPT_TEMPLATES
print("Num prompts per subcategory:")
print(f"  Objects: {len(OBJECT_PROMPT_TEMPLATES)}")

Num prompts per subcategory:
  Objects: 85


In [14]:
CLIP_VERSION = "ViT-L/14"

# Load data and features
batch_size = 1
clip_model, clip_preprocess = clip.load(CLIP_VERSION)
feature_version = CLIP_VERSION.lower().replace("/", "-")
feature_root = try_paths("/proj/nlp4adas/features", "../features")
obj_feats = torch.load(f"{feature_root}/once_{feature_version}_val_lidar_objs.pt", map_location=device)
for class_name, cls_feats in obj_feats.items():
    print(class_name, len(cls_feats))
    obj_feats[class_name] = torch.stack(cls_feats)

Car 44829
torch.Size([44829, 768])
Bus 2617
torch.Size([2617, 768])
Truck 1672
torch.Size([1672, 768])
Cyclist 11894
torch.Size([11894, 768])
Pedestrian 17930
torch.Size([17930, 768])


In [50]:
CATEGORIES = CLASSES
def gen_cls_embedding(cls_name: str) -> torch.Tensor:
    print(f"Generating embedding for {cls_name}")
    # prompts = [template.format(cls_name) for template in OBJECT_PROMPT_TEMPLATES]
    prompts = [cls_name]
    with torch.no_grad():
        tokenized_prompts = clip.tokenize(prompts).to(device)
        cls_features = clip_model.encode_text(tokenized_prompts)
        return cls_features.sum(axis=0, keepdim=True)
cls_embeddings = {name: gen_cls_embedding(name) for name in CATEGORIES}
print("Generated embeddings for: ", list(cls_embeddings.keys()))
cls_embeddings_pt = torch.vstack(list(cls_embeddings.values()))

Generating embedding for Car
Generating embedding for Truck
Generating embedding for Bus
Generating embedding for Pedestrian
Generating embedding for Cyclist
Generated embeddings for:  ['Car', 'Truck', 'Bus', 'Pedestrian', 'Cyclist']


In [64]:
from collections import defaultdict


def compute_accuracy(obj_feats: torch.Tensor, tru_class_idx: int) -> Dict[str, float]:
    logits_per_text, _ = logit_img_txt(obj_feats, cls_embeddings_pt, clip_model)
    score_per_class = logits_per_text.softmax(0).T
    accuracies = {}
    for k in range(1, min(6, len(score_per_class))):
        topk = (score_per_class.argsort(axis=1, descending=True)[:, :k] == tru_class_idx).sum() / len(score_per_class)
        accuracies[f"top-{k}"] = topk
    return accuracies

overall = defaultdict(float)
for class_name, cls_obj_feats in obj_feats.items():
    print("Evaluating class", class_name, f"(n={len(cls_obj_feats)})")
    accuracies = compute_accuracy(cls_obj_feats, CATEGORIES.index(class_name))
    res_string = ", ".join(f"{k}: {v:.3f} ({v*100:.1f}%)" for k, v in accuracies.items())
    print(f"  {res_string}")
    for k, v in accuracies.items():
        overall[k] += v
overall = {k: v / len(obj_feats) for k, v in overall.items()}
res_string = ", ".join(f"{k}: {v:.3f} ({v*100:.1f}%)" for k, v in overall.items())
print(f"\nOverall:")
print(f"  {res_string}")
print("\nOverall if guessing randomly:")
# Compute the probability of guessing correctly by chance
rand_acc = 1 / len(CATEGORIES)
res_string = ", ".join(f"{k}: {v:.3f} ({v*100:.1f}%)" for k, v in {k: rand_acc*int(k.split("-")[1]) for k in overall}.items())
print(f"  {res_string}")


    

Evaluating class Car (n=44829)
  top-1: 0.026 (2.6%), top-2: 0.244 (24.4%), top-3: 0.986 (98.6%), top-4: 0.999 (99.9%), top-5: 1.000 (100.0%)
Evaluating class Bus (n=2617)
  top-1: 0.006 (0.6%), top-2: 0.021 (2.1%), top-3: 0.043 (4.3%), top-4: 0.084 (8.4%), top-5: 1.000 (100.0%)
Evaluating class Truck (n=1672)
  top-1: 0.010 (1.0%), top-2: 0.035 (3.5%), top-3: 0.076 (7.6%), top-4: 0.989 (98.9%), top-5: 1.000 (100.0%)
Evaluating class Cyclist (n=11894)
  top-1: 0.176 (17.6%), top-2: 0.691 (69.1%), top-3: 0.935 (93.5%), top-4: 0.989 (98.9%), top-5: 1.000 (100.0%)
Evaluating class Pedestrian (n=17930)
  top-1: 0.618 (61.8%), top-2: 0.896 (89.6%), top-3: 0.961 (96.1%), top-4: 0.994 (99.4%), top-5: 1.000 (100.0%)

Overall:
  top-1: 0.167 (16.7%), top-2: 0.377 (37.7%), top-3: 0.600 (60.0%), top-4: 0.811 (81.1%), top-5: 1.000 (100.0%)

Overall if guessing randomly:
  top-1: 0.200 (20.0%), top-2: 0.400 (40.0%), top-3: 0.600 (60.0%), top-4: 0.800 (80.0%), top-5: 1.000 (100.0%)
