In [None]:
import os
import json
from json import JSONDecodeError
import numpy as np
import torch
import gc
from tqdm import tqdm

from collections import defaultdict

from util.constants import Topic, ThumbnailURL, thumbnail_URL
import requests

from transformers import YolosFeatureExtractor, YolosForObjectDetection
from PIL import Image
from matplotlib import pyplot as plt
import matplotlib.patches as patches
import torch.nn.functional as F

In [None]:
# Models

with open(os.path.join("..", "data", "coco_classes.txt"), "r") as f:
    coco_classes = [c.rstrip("\n") for c in f.readlines()]
coco_classes.insert(0, "unknown")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
print(f"using device: {device}")

feature_extractor = YolosFeatureExtractor.from_pretrained("hustvl/yolos-tiny")
def load_model():
    return YolosForObjectDetection.from_pretrained("hustvl/yolos-tiny").to(device)
YOLOS_model = load_model()

# Print model size
mem_params = sum([param.nelement()*param.element_size() for param in YOLOS_model.parameters()])
mem_bufs = sum([buf.nelement()*buf.element_size() for buf in YOLOS_model.buffers()])
print(f"Memory used by model: {mem_params + mem_bufs} bytes")


def model(images):
    inputs = feature_extractor(images=images, return_tensors="pt").to(device)
    outputs = YOLOS_model(**inputs)

    ls = np.zeros((len(images),0)).tolist()

    # model predicts bounding boxes and corresponding COCO classes
    logits = outputs.logits.detach().cpu()
    bboxes = outputs.pred_boxes.detach().cpu()

    probs = F.softmax(logits.clone(), dim=-1)  # [B, 100, 92]
    preds      = probs.argmax(-1) # [B, 100]
    confidence = probs.max(-1)[0] # [B, 100]
    known_indices     = [(preds_img != 91).nonzero()[:,0] for preds_img in preds] # [B, known indices]
    confident_indices = [(conf_img > 0.75).nonzero()[:,0] for conf_img in confidence] # [B, confident indices]
    indices = [list(set(known_idx_img.tolist()).intersection(set(conf_idx_img.tolist()))) for known_idx_img,conf_idx_img in zip(known_indices, confident_indices)] # [B, intersection of indices]
    pred_classes = [[coco_classes[v] for v in preds_img[idx_img]] for preds_img, idx_img in zip(preds, indices)] # [B, predicted classes]

    confidence = np.round((confidence*100).tolist(), 1)

    for i, img in enumerate(images):
        for j, patch_idx in enumerate(indices[i]):
            c = pred_classes[i][j]
            conf = confidence[i][patch_idx]
            bbox = np.round(bboxes[i, patch_idx].tolist(), 3).tolist()

            ls[i].append({
                "pred_class": c,
                "conf": conf,
                "bbox": bbox
            })
    
    return ls

def resize(img, new_width=100):
    wpercent = (new_width/float(img.size[0]))
    hsize = int((float(img.size[1])*float(wpercent)))
    return img.resize((new_width,hsize), Image.ANTIALIAS)

Get objects for all videos

In [None]:
# Read data
videos = []
for cat in Topic._member_names_:
    with open(os.path.join("..", "data", "info_videos", F"videos-info_{cat}.json"), "r") as f:
        videos_info = json.load(f)
        videos.extend([vid for channel_vids in videos_info.values() for vid in channel_vids])

In [None]:
RESULTS_DIR = os.path.join("..", "data", "thumbnail-objects")

video_results_dir = os.path.join(RESULTS_DIR, "videos")
channel_results_dir = os.path.join(RESULTS_DIR, "channels")
def get_done_list(dir):
    return [nm.replace(".json",'') for nm in os.listdir(dir)]

In [None]:
del YOLOS_model
torch.cuda.memory_allocated() / 1e6

In [None]:
try:
    del YOLOS_model
except:
    YOLOS_model = load_model()
torch.cuda.memory_allocated() / 1e6

In [None]:
gc.collect()
torch.cuda.empty_cache()
torch.cuda.memory_allocated() / 1e6

In [None]:
# Run the code in batches
batch_size = 4
batch_num = len(videos)//batch_size
if batch_num != int(len(videos)/batch_size):
    batch_num += 1 

quality = ThumbnailURL.high

done_list = get_done_list(video_results_dir)
for batch in tqdm(range(batch_num)):

    vid_batch = videos[batch*batch_size:(batch+1)*batch_size]

    ids = [vid["id"] for vid in vid_batch if
        # os.path.isfile("../data/thumbnails/"+vid["id"]+"_high.jpg") and
        vid["id"] not in done_list]

    if not ids:
        continue

    # imgs_paths = ["../data/thumbnails/"+vid_id+"_high.jpg" for vid_id in ids]
    # images = [Image.open(path) for path in imgs_paths]

    raws = []
    fetched_ids = []
    for id in ids:
        url = thumbnail_URL(id, quality)
        try:
            raws.append(requests.get(url, stream=True).raw)
        except:
            continue
        fetched_ids.append(id)
    images = [Image.open(raw) for raw in raws]
    ids = fetched_ids

    # images = [resize(img, 100) for img in images]

    results = model(images)

    for vid_id, result in zip(ids, results):
        path = os.path.join(video_results_dir, f"{vid_id}.json")
        with open(path, "w") as f:
            json.dump(result, f)

Channel stats

In [None]:
# Read data
with open(os.path.join("..", "data", "vid2channel.json"), "r") as f:
    vid2channel = json.load(f)

In [None]:
# Make list of results per video for each channel
channel_result_list = defaultdict(list)
for vid_id in tqdm(get_done_list(video_results_dir)):
    channel = vid2channel[vid_id]
    filepath = os.path.join(video_results_dir, f"{vid_id}.json")
    try:
        with open(filepath, "r") as f:
            result = json.load(f)
    except JSONDecodeError:
        print(f"couldn't open {vid_id}; deleting file")
        os.remove(filepath)
    channel_result_list[channel].append(result)

## Works till here then it needs the inverted lists

In [None]:
# Calculate channel results
channel_results = {}
for channel,result_list in channel_result_list.items():
    result_list = np.array(result_list)
    print(result_list)
    channel_results[channel] = {
        "mean": result_list.mean(axis=0),
        "std": result_list.std(),
        "len": len(result_list),
    }

In [None]:
# Save channel stats
for channel,results in channel_results.items():
    filepath = os.path.join(channel_results_dir, f"{channel}.json")
    with open(filepath, "w") as f:
        json.dump(results, f)

Category stats

In [None]:
# Read data
with open(os.path.join("..", "data", "channel2category.json"), "r") as f:
    channel2cat = json.load(f)

In [None]:
# Make list of results per channel for each category
category_results_list = defaultdict(list)
for channel in tqdm(get_done_list(channel_results_dir)):
    cat = channel2cat[channel]
    filepath = os.path.join(channel_results_dir, f"{channel}.json")
    try:
        with open(filepath, "r") as f:
            results = json.load(f)
    except JSONDecodeError:
        print(f"couldn't open {channel}; deleting file")
        os.remove(filepath)
    category_results_list[cat].append(results)

In [None]:
# Calculate category results
category_results = {}
for cat,stats_list in category_results_list.items():
    mean_list = np.array([channel_stats["mean"] for channel_stats in stats_list])
    std_list = np.array([channel_stats["std"] for channel_stats in stats_list])
    category_results[cat] = {
        "mean": mean_list.mean(axis=0).tolist(),
        "std": std_list.mean(),
        "len": len(mean_list),
    }

In [None]:
# Save category results
for cat,stats in category_results.items():
    filepath = os.path.join("..", "data", "title-latents", "categories", f"{cat}.json")
    with open(filepath, "w") as f:
        json.dump(stats, f)