In [None]:
## This file produces data for the model to use

import os
import json
from dotenv import load_dotenv
import torch

## Load Config
with open('config/videos.json') as config_file:
    videos = json.load(config_file)
with open('config/name_to_url.json') as config_file:
    name_to_url = json.load(config_file)

load_dotenv(dotenv_path=".env")

In [None]:
from pinecone import Pinecone

pc = Pinecone(api_key=os.environ["PINECONE_KEY"])
index = pc.Index("index1")

In [None]:
oneshot = "hashing"
path_to_embeddings = f'data/outputs/{oneshot}/embeddings'
mode = "max"

In [None]:
# Prepare embeddings for upload

vectors = []
for embedding_file in os.listdir(path_to_embeddings):
    if not embedding_file.endswith('.pt'):
        continue
    if mode not in embedding_file:
        continue

    name = embedding_file[4:-3]
    metadata = {"name": name, "url": name_to_url[name]}
    tensor = torch.load(f'{path_to_embeddings}/{embedding_file}').to('cpu').numpy().tolist()
    vectors.append({"values": tensor, "id": name, "metadata": metadata})
    print(f'Loaded {name}, metadata: {metadata}')
    del tensor

    torch.cuda.empty_cache()

In [None]:
# Upload

index.upsert(vectors=vectors)

In [None]:
query_mode = "max"
with open(f'data/outputs/{oneshot}/results/{mode}-v_{query_mode}-q.txt', 'w') as f:
    for embedding_file in os.listdir(path_to_embeddings):
        if not embedding_file.endswith('.pt'):
            continue
        if query_mode not in embedding_file:
            continue
        name = embedding_file[4:-3]
        query = torch.load(f'{path_to_embeddings}/{embedding_file}').to('cpu').numpy().tolist()
        response = index.query(vector=query, top_k=5, include_values=True, include_metadata=True)
        
        f.write(f'Querying {name}:\n')
        for i, obj in enumerate(response["matches"]):
            f.write(f'    Rank: {i+1}, Metadata: {obj["metadata"]}, Distance:, Score: {obj["score"]}\n')

