In [1]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import json
import torch
import numpy as np
import random
from tqdm.notebook import tqdm
# import os
import inflect
%config Completer.use_jedi = False

In [2]:
def get_accuracy(caption_embeddings, class_embeddings, gt_label, topk=5):
    similarity_matrix = util.cos_sim(caption_embeddings, class_embeddings)
    pseudoLabel = similarity_matrix.topk(topk, 1).indices    
    gt_label = torch.Tensor(gt_label).view(-1,1).repeat(1,topk)
    matched_labels = (torch.Tensor(pseudoLabel) == torch.Tensor(gt_label))
    top1_acc = matched_labels[:,0].sum()/len(gt_label)
    topk_acc = matched_labels.any(1).sum()/len(gt_label)
    return top1_acc, topk_acc, similarity_matrix.argmax(1)

In [3]:
model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1').cuda()

In [6]:
# source = pd.read_csv('../data/ego4d/ego_train.txt', sep=";", header=None, names=['id','caption','category','label'])

ego4d_json = json.load(open("../metadata/ego4d_cooking.json"))['categories']

label_to_idx = {c['category_name']:c['category_id'] for c in ego4d_json}
idx_to_label = {v:k for k,v in label_to_idx.items()}

In [8]:
# label_to_idx = dict(zip(source.category, source.label))
# idx_to_label = dict(zip(source.label, source.category))

labels = [idx_to_label[idx] for idx in idx_to_label ]

class_embeddings = model.encode(labels)

In [9]:
for l in labels:
    print("\item \\textit\{{}\}".format(l))

ValueError: Single '}' encountered in format string

## embedding match

In [11]:
for source, domain in zip(["exo","ego"],["ego", "exo"]):
    data = pd.read_csv('../data/ego4d/{}_train.txt'.format(domain), sep=";", header=None, names=['id','caption','category','label'])

    ## embedding match
    captions = data.caption.values
    labels = data.label.values
    segids = data.id.values

    caption_embeddings = model.encode(captions)
    
    top1, top5, pseudo = get_accuracy(caption_embeddings, class_embeddings, labels)
    
    print("{}:{:.2f}/{:.2f}".format(domain, top1*100, top5*100))
    
    with open("../hard_labels/ego4d_cooking_{}_{}_embedMatchPL.txt".format(source, domain), "w") as fh:
        write_str = ""
        for fid, pl in zip(segids, pseudo):
            write_str += "{} {}\n".format(fid, pl)
        fh.write(write_str)

ego:18.05/53.66
exo:18.85/53.25


## ngram match

In [12]:
from nltk.util import ngrams
from itertools import chain

def generate_ngrams(text, n, sep=" "):
    """
    Function to generate all ngrams for a given 'n' from a string.

    :param text: The input string from which to generate ngrams.
    :param n: The size of the ngram.
    :return: A list of ngrams as strings.
    """
    if isinstance(n, list):
        ngram_list = []
        for ni in n:
            ngram_list += generate_ngrams(text, ni, sep)
        return ngram_list
    # Split the text into words
    words = text.split(sep)

    # Generate ngrams
    ngram_list = [' '.join(gram) for gram in ngrams(words, n)]

    return ngram_list

def preprocess(text):

    ngram_tags = generate_ngrams(text, [1,2,3])
#     ngram_tags = tags
    
    all_ngrams = ngram_tags
    return list(set(all_ngrams))

def get_accuracy(similarity_matrix, gt_label, topk=5):
    pseudoLabel = similarity_matrix.topk(topk, 1).indices  
    gt_label = torch.Tensor(gt_label).view(-1,1).repeat(1,topk)
    matched_labels = (torch.Tensor(pseudoLabel) == torch.Tensor(gt_label))
    top1_acc = matched_labels[:,0].sum()/len(gt_label)
    topk_acc = matched_labels.any(1).sum()/len(gt_label)
    return top1_acc, topk_acc

In [13]:
for source, did in zip(["exo","ego"],["ego", "exo"]):
    
    data = pd.read_csv('../data/ego4d/{}_train.txt'.format(did), sep=";", header=None, names=['id','caption','category','label'])

    narrations = data.caption.values
    labels = data.label.values
    segids = data.id.values
    
    all_tags = [preprocess(n) for n in narrations]
    tag_lens = [len(t) for t in all_tags]

    MAX_TAG_LEN=500#max(tag_lens)

    padded_tags = []
    mask = []
    for p in all_tags:
        pad_len = max(0, MAX_TAG_LEN-len(p))
        padded_tags.append(p[:MAX_TAG_LEN] + ['EOS']*pad_len)
        mask.append([1]*min(MAX_TAG_LEN, len(p)) + [0]*pad_len)
    mask = torch.Tensor(mask)

    flattened_tags = list(chain.from_iterable(padded_tags))

    tag_embedding = model.encode(flattened_tags, batch_size=256)

    similarity = util.cos_sim(tag_embedding, class_embeddings)

    similarity_reshaped = similarity.reshape(*mask.shape, -1)

    mask = mask[...,None]

    masked_similarity = similarity_reshaped * mask

    pseudo_labels = masked_similarity.max(1).values
    
    top1, top5 = get_accuracy(pseudo_labels, labels)
    
    print("{}:{}".format(did, top1))
    
    pseudo = pseudo_labels.argmax(1).cpu().numpy()
    
    write_str = ""
    with open("../hard_labels/ego4d_cooking_{}_{}_tagMatchPL.txt".format(source, did), "w") as fh:
        write_str = ""
        for fid, pl in zip(segids, pseudo):
            write_str += "{} {}\n".format(fid, pl)
        fh.write(write_str)

ego:0.19487804174423218
exo:0.20136381685733795


## kNN labeling

In [None]:
def get_similarity_acc(source_embed, target_embed, source_label, target_label, within=False, topks=[1,5]):
    
    topk = max(topks)
        
    if within:
        similarity_matrix = util.cos_sim(source_embed, source_embed)
        mostSimilar = similarity_matrix.topk(topk+1, 1).indices
        mostSimilar = mostSimilar[:,1:]
    else:
        similarity_matrix = util.cos_sim(source_embed, target_embed)
        mostSimilar = similarity_matrix.topk(topk, 1).indices
    
    similarLabels = torch.Tensor(target_label)[mostSimilar.long().reshape(-1)].reshape(-1, topk)
    source_label = torch.Tensor(source_label).view(-1,1).repeat(1,topk)
    
    matched_labels = (torch.Tensor(similarLabels) == torch.Tensor(source_label))
                      
    top1_acc = matched_labels[:,0].sum()/len(source_label)
    topk_acc = matched_labels.any(1).sum()/len(source_label)
        
    return top1_acc, topk_acc

def get_similarity_acc_knn(source_embed, target_embed, source_label, target_label, within=False, K=3):
    
    topk = K
    
    similarity_matrix = util.cos_sim(source_embed, target_embed)
    mostSimilar = similarity_matrix.topk(topk, 1).indices
    
    similarLabels = torch.Tensor(target_label)[mostSimilar.long().reshape(-1)].reshape(-1, topk)
    similarLabels = torch.mode(similarLabels, dim=-1, keepdim=True).values
    source_label = torch.Tensor(source_label).view(-1,1)
    
    matched_labels = (torch.Tensor(similarLabels) == torch.Tensor(source_label))
                      
    top1_acc = matched_labels[:,0].sum()/len(source_label)
#     topk_acc = matched_labels.any(1).sum()/len(source_label)
        
    return top1_acc, similarLabels.long()

In [None]:
for source, target in zip(["exo","ego"],["ego", "exo"]):
    source_data = pd.read_csv('../data/ego4d/{}_train.txt'.format(source), sep=";", header=None, names=['id','caption','category','label'])
    source_captions = source_data.caption.values
    source_labels = source_data.label.values
    segids = source_data.id.values
    source_embed = model.encode(source_captions)

    target_data = pd.read_csv('../data/ego4d/{}_train.txt'.format(target), sep=";", header=None, names=['id','caption','category','label'])
    target_captions = target_data.caption.values
    target_labels = target_data.label.values
    target_embed = model.encode(target_captions)
    
    top1, pseudo = get_similarity_acc_knn(source_embed, target_embed, source_labels, target_labels, K=3)
    
    print("{}->{}:{}".format(source, target, top1))
    
    write_str = ""
    with open("../soft_labels/ego4d_cooking_{}_{}_knnPL.txt".format(target, source), "w") as fh:
        write_str = ""
        for fid, pl in zip(segids, pseudo):
            write_str += "{} {}\n".format(fid, pl.item())
        fh.write(write_str)

## Compute accuracy

In [None]:
def percls_accuracy(all_pred, all_label, num_class=22):
    """Computes per class accuracy"""
    num_class = len(set(all_label)) if num_class == 0 else num_class
    all_pred = np.asarray(all_pred)
    all_label = np.asarray(all_label)

    cls_acc = np.zeros([num_class])
    for i in range(num_class):
        idx = (all_label == i)
        if idx.sum() > 0:
            cls_acc[i] = (all_pred[idx] == all_label[idx]).mean() * 100.0

    return cls_acc

In [None]:
for source, target in zip(["exo","ego"],["ego", "exo"]):
    for pseudo in ["embedMatch", "tagMatch", "knn", "text"]:
        
        gt_file = pd.read_csv('../data/ego4d/{}_train.txt'.format(target), sep=";", header=None, names=['id','caption','category','label'])
        gt_id_to_label = dict(zip(gt_file.id.values, gt_file.label.values))

        pl_file = pd.read_csv('../soft_labels/ego4d_cooking_{}_{}_{}PL.txt'.format(source, target, pseudo), sep=" ", header=None, names=['id','label'])
        pl_id_to_label = dict(zip(pl_file.id.values, pl_file.label.values))

        all_ids = gt_id_to_label.keys()
        labels = [gt_id_to_label[idx] for idx in all_ids]
        preds = [pl_id_to_label[idx] for idx in all_ids]

        print("{}:{}->{} = {}".format(pseudo, source, target, percls_accuracy(preds, labels).mean()))
    print()

In [None]:
ego4d = json.load(open("../metadata/ego4d_cooking.json"))

In [None]:
ego4d.keys()

In [None]:
ego4d['categories']

In [None]:
ego4d['categories']

In [None]:
ego4d['ego_train'].keys()

In [None]:
ego4d['ego_val']['clips'][0]

In [None]:
ego4d['ego_val']['annotations'][0]

In [None]:
ego4d['ego_val']['metadata'][0]

In [None]:
ego4d['ego_val']['descriptions'][0]