In [1]:
import sys
import os
import math
import random
import bisect
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import urllib
import subprocess
import re
import tempfile
import itertools
import torch
import spacy
import amrlib
import penman

from typing import List, Tuple
from operator import itemgetter 
from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification, BertTokenizer, BertForSequenceClassification
# import qgrid

In [2]:
BASE_DIR = os.path.abspath(os.getcwd()+'/../..')  # /home/gil/dev/NEBULA2/
os.chdir(os.getcwd()+'/../..')

In [3]:
from nebula_api.nebula_enrichment_api import *
from experts.common.RemoteAPIUtility import RemoteAPIUtility
from nebula_api.vlmapi import VLM_API
from nebula_api.atomic2020.comet_enrichment_api import *
from nebula_api.canonisation_api import CANON_API
import nebula_api.playground_api as pg_api

In [4]:
nre = NRE_API()
api = RemoteAPIUtility()
vlm = VLM_API()
# mdmmt = mdmmt_api.MDMMT_API()
# comet = Comet("/app/NEBULA2/nebula_api/atomic2020/comet-atomic_2020_BART")
ascore = CANON_API()
# stog = amrlib.load_stog_model(model_dir="/app/NEBULA2/models/model_stog")
# gtos = amrlib.load_gtos_model(model_dir="/app/NEBULA2/models/model_gtos")
# model_name = "Alireza1044/albert-base-v2-cola" 


# Download cola model
# cola_model = AutoModelForSequenceClassification.from_pretrained(model_name)
# tokenizer = AutoTokenizer.from_pretrained(model_name)

INFO:tensorflow:Restoring parameters from /app/NEBULA2/nebula_api/mdmmt_api/ckpts/vggish_model.ckpt


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predic

Available class names: ['clip_vit', 'clip_rn', 'mdmmt_max', 'mdmmt_mean', 'mdmmt_legacy']


In [5]:
def flatten(lst): return [x for l in lst for x in l]

def compute_batch_scores(video_emb: torch.Tensor, texts: List[str], normalize=True, **kwargs) -> List[float]:    
    emb_batch = vlm.encode_text(texts, **kwargs)
    if type(emb_batch) == list:
        emb_batch = torch.stack(emb_batch,axis=0)
    if normalize:
        video_emb = video_emb / video_emb.norm(2)
        # print("normalized video norm: {}".format(video_emb.norm(2)))
        n = (emb_batch * emb_batch).sum(axis=1).sqrt()
        emb_batch = emb_batch / n.unsqueeze(1).expand_as(emb_batch)
        # print("normalized text norms:")
        # for emb in emb_batch:
        #     print(emb.norm(2))                        
    return (video_emb.unsqueeze(0).expand_as(emb_batch)*emb_batch).sum(dim=1).cpu().numpy()


def compute_concat_score(image_emb: torch.Tensor, texts: List[str], join_on=',') -> float:
    combined_text = ""
    for t in [x.strip() for x in texts]:
        if t[-1]=='.':
            t = t[:-1]       
        t+=join_on
        t+=' '
        combined_text+=t
    print("Combined: "+combined_text)
    return torch.matmul(image_emb,mdmmt.encode_text(combined_text.strip()) )       

In [6]:
def transform_concept(c):
    exp = re.compile(r"^([a-zA-z]+)-?(\d*)$")
    r = exp.match(c)
    return r.group(1) if r else c

class ConceptManager:
    def __init__(self):
        pass
    def ground_concept(concept):
        return transform_concept(concept)

In [7]:
# class SimilarityManager:
#     def __init__(self):
#         self.nlp = spacy.load('en_core_web_lg')

#     def similarity(self, c1, c2):
#         if type(c2) is not list:
#             c2 = [c2]   
#         a = self.nlp(c1)
#         targets = self.nlp(' '.join(c2))
#         return [a.similarity(x) for x in targets]
    
class SimilarityManager:
    def __init__(self):
        self.nlp = spacy.load('en_core_web_lg')

    def similarity(self, src, target):
        rc = []
        s1 = self.nlp(src)
        s2 = self.nlp(target)
        for w in s1:
            if w.pos_ not in ['NOUN', 'ADJ', 'ADV', 'VERB', 'PROPN']:
                continue
            rc.append(max([w.similarity(x) for x in s2]))
        return np.mean(rc)
        
smanager = SimilarityManager()


In [8]:
smanager.similarity("flowers in vase","a man stands in the room near some flowers in a vase, and a cat")

1.0

In [33]:
softmax = lambda x: np.exp(x)/sum(np.exp(x))

class SubsetOptimization:
    def __init__(self, video_emb, experts: List, candidates_strings: List[str], coverage_matrix = None):
        self.video_emb = video_emb
        self.initial_temp = 10
        self.final_temp = .05
        self.alpha = 0.01
        self.theta = 0.5
        self.experts = experts
        self.candidates_strings = candidates_strings
        print("Computing batch similarity...")
        self.candidates_similarity = compute_batch_scores(self.video_emb, self.candidates_strings)
        print("Done")
        self.opt_results = []
        self.smanager = SimilarityManager()

        if coverage_matrix is not None:
            self.coverage_matrix = coverage_matrix
        else:
            self.coverage_matrix = np.zeros([len(self.experts),len(self.candidates_strings)])
            self.coverage_matrix[:] = np.nan
            for i in range(len(experts)):
                for j in range(len(candidates_strings)):
                    self.coverage_matrix[i][j]=self.concept_similarity(self.experts[i],self.candidates_strings[j])
        self.max_size = int(len(self.experts)*1.5)

    def concept_similarity(self, concept, sent):        
        # return max(self.smanager.similarity(concept,sent))
        return self.smanager.similarity(concept,sent)

    def get_coverage(self,i,j):        
        if np.isnan(self.coverage_matrix[i][j]):
            self.coverage_matrix[i][j] = self.concept_similarity(self.experts[i],self.candidates_strings[j])
        return self.coverage_matrix[i][j]

    def get_expert_coverage(self,state):
        return self.coverage_matrix[:,state].max(axis=1)

    def get_state_coverage(self,state) -> float:
        # print("State coverage for {}:".format(state))
        # print(self.get_expert_coverage(state))
        return np.mean(self.get_expert_coverage(state))

    # def get_state_coverage(self, state: List[int]) -> float:
    #     experts_coverage = [max([self.get_coverage(i,j) for j in state]) for i in range(len(self.experts))]    # A list of partial coverege        
    #     return sum(experts_coverage) / len(self.experts)

    def get_cost(self, state: List[int]) -> float:
        if not state:
            return 0
        print("get_cost: {}".format(state))
        coverage_score = self.get_state_coverage(state)   
        print("get_cost: coverage_score: {}".format(coverage_score))
        similarity_score = self.candidates_similarity[state].mean().item()
        print("get_cost: similarity_score: {}".format(similarity_score))
        return -(coverage_score + self.theta*similarity_score)


    def prob_to_remove(self, state):
        cover = self.get_state_coverage(state)
        return np.power(cover,3)
        
    # state here is assumed (and guaranteed on return) to be -sorted-
    def get_candidate(self, state: List[int]) -> List[int]:
        def compute_state_arrays(s):
            print("Computing arrays for state: ")
            print(s)
            s_score = self.candidates_similarity[s]
            s_coverage = self.coverage_matrix.mean(axis=0)[s]
            s_max_coverage = self.coverage_matrix.max(axis=0)[s]
            s_fitscore = s_coverage+self.theta*s_score

            return (s_score,s_coverage,s_max_coverage,s_fitscore)

        if not state:
            print("Empty state")
            return [random.randint(0,len(self.candidates_strings)-1)]
            
        rc = state.copy()
        s = np.array(state)
        s_score, s_coverage, s_max_coverage, s_fitscore = compute_state_arrays(s)
               
        if len(state) == self.max_size:
            print("Maximum state size, removing")
            idx = np.argmin(s_fitscore)
            del rc[idx]
            return rc
            
        remove_sentence = random.random()<self.prob_to_remove(state)      
        print("coverage of {} is {}, remove?{}".format(state,self.get_state_coverage(state),remove_sentence))
        if remove_sentence:             # We decide to remove a sentence from the set
            print("Removing")
            probs = softmax(-s_fitscore)
            idx = np.random.multinomial(1,probs).argmax()
            del rc[idx]                   
        else:                           # Add a sentence from the outside
            print("Adding")
            anti_state = []
            for i in range(len(self.candidates_strings)):
                if not i in state:
                    anti_state.append(i)
            s1 = np.array(anti_state)
            s1_score, s1_coverage, s1_max_coverage, s1_fitscore = compute_state_arrays(s1)
            # Pick an expert to try and cover
            probs = softmax(self.get_expert_coverage(s)*10)         # Coverage is in (0,1), so we use low temprature
            expert_to_cover = np.random.multinomial(1,probs).argmax()
            probs = softmax(self.coverage_matrix[expert_to_cover][s1]*10)
            idx_to_add = np.random.multinomial(1,probs).argmax()
            bisect.insort(rc,anti_state[idx_to_add])
            
        return rc

    def temp_schedule(self,i):
        schedule = [(2000,1), (5000,0.5), (10000,0.2), (15000,0.1), (20000,self.final_temp)]
        if i<schedule[0][0]:
            return schedule[0][1]
        if i>=schedule[-1][0]:
            return schedule[-1][1]
        for j in range(len(schedule)):
            if i<schedule[j+1][0]:
                break
        start = schedule[j][0]
        end = schedule[j+1][0]
        start_val = schedule[j][1]
        end_val = schedule[j+1][1]

        return ((i-start)/(end-start))*(end_val-start_val)+start_val         

    def get_scored_permutations(self, k):
        n = len(self.candidates)
        return [(x,self.get_cost(list(x))) for x in itertools.permutations(range(n),k)]
        
    def simulated_annealing(self, initial_state):
        self.opt_results = []
        current_temp = self.initial_temp
        i = 0

       # Start by initializing the current state with the initial state
        current_state = initial_state

        while current_temp > self.final_temp:
            next_cand = self.get_candidate(current_state)

            print("current cost: {} ({}). Candidate cost: {} ({})".format(self.get_cost(current_state),current_state,self.get_cost(next_cand),next_cand))

            # Check if next_cand is best so far
            cost_diff = self.get_cost(current_state) - self.get_cost(next_cand)

            # if the new solution is better, accept it
            if cost_diff > 0:
                current_state = next_cand
            # if the new solution is not better, accept it with a probability of e^(-cost/temp)
            else:
                print("chance to move: {}".format(math.exp(cost_diff / current_temp)))
                if random.uniform(0, 1) < math.exp(cost_diff / current_temp):
                    current_state = next_cand
            # decrement the temperature
            current_temp = self.temp_schedule(i)
            self.opt_results.append(-self.get_cost(current_state))
            i += 1
            if i % 1000 == 0:
                print("i: {}".format(i))

        return current_state



In [10]:
mid, elem = ('Movies/114206337', 1)
emb_video = vlm.encode_video(mid,elem)

http://ec2-18-159-140-240.eu-central-1.compute.amazonaws.com:7000/static/development/1015_27_Dresses_00_38_02_757-00_38_08_213.mp4
Movie info: {'arango_id': 'Movies/114206337', 'description': '1015_27_Dresses_00_38_02_757-00_38_08_213', 'fps': 23, 'width': 1920, 'height': 1080, 'last frame': 300, 'movie_id': 'f5ebaf4686c547a0bd8bc3859933a091', 'mdfs': [[2, 36, 71], [75, 100, 125], [129, 129, 129]], 'scene_elements': [[0, 73], [73, 127], [127, 131]]}
fn path: /tmp/video_file.mp4


In [11]:
with open('/app/gpt_output.pickle', 'rb') as f:
    gpt_outputs = pickle.load(f)
with open('/app/gpt_inputs.pickle', 'rb') as f:
    gpt_inputs = pickle.load(f)
experts = ['lady',
 'blonde',
 'white shirt',
 'woman',
 'jacket',
 'pants',
 'pony-tail',
 'pick',
 'leave',
 'bag',
 'refrigerator',
 'black and white tiles',
 'apartment',
 'flowers in a vase']

In [12]:
processed_inputs = flatten([[y,y] for (x,y) in gpt_inputs])
processed_outputs = flatten(gpt_outputs)

In [14]:
sents = list(set(processed_outputs))
# mat = None

In [34]:
optim = SubsetOptimization(emb_video, experts, sents, coverage_matrix=mat)

Computing batch similarity...
Done


In [111]:
optim.simulated_annealing([])

Empty state
get_cost: [206]
get_cost: coverage_score: 0.6526122483469191
get_cost: similarity_score: 0.2793859541416168
current cost: 0 ([]). Candidate cost: -0.7923052254177275 ([206])
get_cost: [206]
get_cost: coverage_score: 0.6526122483469191
get_cost: similarity_score: 0.2793859541416168
get_cost: [206]
get_cost: coverage_score: 0.6526122483469191
get_cost: similarity_score: 0.2793859541416168
Computing arrays for state: 
[206]
coverage of [206] is 0.6526122483469191, remove?True
Removing
get_cost: [206]
get_cost: coverage_score: 0.6526122483469191
get_cost: similarity_score: 0.2793859541416168
current cost: -0.7923052254177275 ([206]). Candidate cost: 0 ([])
get_cost: [206]
get_cost: coverage_score: 0.6526122483469191
get_cost: similarity_score: 0.2793859541416168
chance to move: 0.9237536713090237
get_cost: [206]
get_cost: coverage_score: 0.6526122483469191
get_cost: similarity_score: 0.2793859541416168
Computing arrays for state: 
[206]
coverage of [206] is 0.6526122483469191, 

[64, 357, 1100, 1268, 1301, 1963, 2612]

In [16]:
mat = optim.coverage_matrix.copy()

In [31]:
optim.temp_schedule(20000)

20000

In [100]:
list(zip(optim.experts, optim.get_expert_coverage([733, 812, 1158, 1676, 1803, 1811, 1994, 2164])))

[('lady', 1.0),
 ('blonde', 1.0),
 ('white shirt', 1.0),
 ('woman', 1.0),
 ('jacket', 1.0),
 ('pants', 1.0),
 ('pony-tail', 0.3745517432689667),
 ('pick', 0.7622023820877075),
 ('leave', 0.6976824402809143),
 ('bag', 1.0),
 ('refrigerator', 0.545698881149292),
 ('black and white tiles', 0.8004481593767802),
 ('apartment', 1.0),
 ('flowers in a vase', 0.45866620540618896)]

In [104]:
mat[13,:].copy().argsort()

array([ 193,  844, 1360, ...,  268,  151,  958])

In [93]:
res.argsort()[-10:], [733, 812, 1158, 1676, 1803, 1811, 1994, 2164]

(array([ 913,  235,  181, 1740,  182, 1753,  326, 2417, 2424, 1405]),
 [733, 812, 1158, 1676, 1803, 1811, 1994, 2164])

In [107]:
optim.candidates_similarity[151]

0.33707297