In [1]:
import os
import json
from json import JSONDecodeError
import numpy as np
from tqdm import tqdm

from collections import defaultdict

from util.constants import Topic

from transformers import YolosFeatureExtractor, YolosForObjectDetection
from PIL import Image
from matplotlib import pyplot as plt
import matplotlib.patches as patches
import torch.nn.functional as F

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Models

with open(os.path.join("..", "data", "coco_classes.txt"), "r") as f:
    coco_classes = [c.rstrip("\n") for c in f.readlines()]
coco_classes.insert(0, "unknown")

feature_extractor = YolosFeatureExtractor.from_pretrained("hustvl/yolos-small")
YOLOS_model = YolosForObjectDetection.from_pretrained("hustvl/yolos-small")

def model(img_path):
    img = Image.open(img_path)
    inputs = feature_extractor(images=img, return_tensors="pt")
    outputs = YOLOS_model(**inputs)
    l = []

    # model predicts bounding boxes and corresponding COCO classes
    logits = outputs.logits
    bboxes = outputs.pred_boxes

    probs = F.softmax(logits.detach().clone(), dim=-1)  # [B, 100, 92]
    preds      = probs.argmax(-1) # [B, 100]
    confidence = probs.max(-1)[0] # [B, 100]
    known_indices     = [(preds_img != 91).nonzero()[:,0] for preds_img in preds] # [B, known indices]
    confident_indices = [(conf_img > 0.75).nonzero()[:,0] for conf_img in confidence] # [B, confident indices]
    indices = [list(set(known_idx_img.tolist()).intersection(set(conf_idx_img.tolist()))) for known_idx_img,conf_idx_img in zip(known_indices, confident_indices)] # [B, intersection of indices]
    pred_classes = [[coco_classes[v] for v in preds_img[idx_img]] for preds_img, idx_img in zip(preds, indices)] # [B, predicted classes]

    confidence = (confidence*100).tolist()

    pred_boxes = bboxes.detach().clone()
    cmap = plt.cm.get_cmap("hsv", len(coco_classes))

    for j, patch_idx in enumerate(indices[0]):
        c = pred_classes[0][j]
        conf = confidence[0][patch_idx]
        bbox = pred_boxes[0, patch_idx].detach()
        x, y, W, H = bbox.split(1)
        im_w = img.width
        im_h = img.height
        W *= im_w
        H *= im_h
        x = x*im_w - W*.5
        y = y*im_h - H*.5

        # Create a Rectangle patch
        color = cmap(preds[0][patch_idx].item())
        # color = "b"
        rect = patches.Rectangle((x, y), W, H, linewidth=2,
            edgecolor=color,
            facecolor='none',
        )
        x = {"pred_class": c,
            "conf": conf,
            "bbox": bbox.tolist()}
        l.append(x)
    
    return l
    

Get latents for all videos

In [3]:
# Read data
videos = []
for cat in Topic._member_names_:
    with open(os.path.join("..", "data", "info_videos", F"videos-info_{cat}.json"), "r") as f:
        videos_info = json.load(f)
        videos.extend([vid for channel_vids in videos_info.values() for vid in channel_vids])

In [4]:
RESULTS_DIR = os.path.join("..", "data", "thumbnails-object")

video_results_dir = os.path.join(RESULTS_DIR, "videos")
channel_results_dir = os.path.join(RESULTS_DIR, "channels")
def get_done_list(dir):
    return [nm.replace(".json",'') for nm in os.listdir(dir)]

In [5]:
# Calculate video results
vid2result = {}
done_list = get_done_list(video_results_dir)
for vid in tqdm(videos):
    id = vid["id"]
    if id in done_list:
        continue
    img_path = "../data/thumbnails/"+id+"_high.jpg"
    if os.path.isfile(img_path):
        vid2result[id] = model(img_path)

100%|██████████| 722429/722429 [01:21<00:00, 8840.59it/s] 


In [6]:
# Save video results
for vid_id,result in vid2result.items():
    #print(vid_id)
    #print(result)
    path = os.path.join(video_results_dir, f"{vid_id}.json")
    with open(path, "w") as f:
        json.dump(result, f)

Channel stats

In [7]:
# Read data
with open(os.path.join("..", "data", "vid2channel.json"), "r") as f:
    vid2channel = json.load(f)

In [8]:
# Make list of results per video for each channel
channel_result_list = defaultdict(list)
for vid_id in tqdm(get_done_list(video_results_dir)):
    channel = vid2channel[vid_id]
    filepath = os.path.join(video_results_dir, f"{vid_id}.json")
    try:
        with open(filepath, "r") as f:
            result = json.load(f)
    except JSONDecodeError:
        print(f"couldn't open {vid_id}; deleting file")
        os.remove(filepath)
    channel_result_list[channel].append(result)

100%|██████████| 4/4 [00:00<00:00, 260.62it/s]


## Works till here then it needs the inverted lists

In [10]:
# Calculate channel results
channel_results = {}
for channel,result_list in channel_result_list.items():
    result_list = np.array(result_list)
    print(result_list)
    channel_results[channel] = {
        "mean": result_list.mean(axis=0),
        "std": result_list.std(),
        "len": len(result_list),
    }

[[{'pred_class': 'person', 'conf': 99.86813354492188, 'bbox': [0.16978369653224945, 0.5443723201751709, 162.1193389892578, 240.9904327392578]}
  {'pred_class': 'person', 'conf': 99.9327392578125, 'bbox': [0.5046120882034302, 0.5368502140045166, 155.4878387451172, 243.84986877441406]}
  {'pred_class': 'cell phone', 'conf': 86.10973358154297, 'bbox': [0.4223462641239166, 0.7047587633132935, 43.598812103271484, 67.23351287841797]}
  {'pred_class': 'toothbrush', 'conf': 76.47359466552734, 'bbox': [0.0462351031601429, 0.7307983636856079, 14.49171257019043, 55.052001953125]}
  {'pred_class': 'person', 'conf': 99.68734741210938, 'bbox': [0.8342981338500977, 0.5453698635101318, 158.6527099609375, 241.33522033691406]}]]


TypeError: unsupported operand type(s) for /: 'dict' and 'int'

In [None]:
# Save channel stats
for channel,results in channel_results.items():
    filepath = os.path.join(channel_results_dir, f"{channel}.json")
    with open(filepath, "w") as f:
        json.dump(results, f)

Category stats

In [None]:
# Read data
with open(os.path.join("..", "data", "channel2category.json"), "r") as f:
    channel2cat = json.load(f)

In [None]:
# Make list of results per channel for each category
category_results_list = defaultdict(list)
for channel in tqdm(get_done_list(channel_results_dir)):
    cat = channel2cat[channel]
    filepath = os.path.join(channel_results_dir, f"{channel}.json")
    try:
        with open(filepath, "r") as f:
            results = json.load(f)
    except JSONDecodeError:
        print(f"couldn't open {channel}; deleting file")
        os.remove(filepath)
    category_results_list[cat].append(results)

In [None]:
# Calculate category results
category_results = {}
for cat,stats_list in category_results_list.items():
    mean_list = np.array([channel_stats["mean"] for channel_stats in stats_list])
    std_list = np.array([channel_stats["std"] for channel_stats in stats_list])
    category_results[cat] = {
        "mean": mean_list.mean(axis=0).tolist(),
        "std": std_list.mean(),
        "len": len(mean_list),
    }

In [None]:
# Save category results
for cat,stats in category_results.items():
    filepath = os.path.join("..", "data", "title-latents", "categories", f"{cat}.json")
    with open(filepath, "w") as f:
        json.dump(stats, f)