In [None]:
import sys
import os
import math
import random
import bisect
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import urllib
import subprocess
import re
import tempfile
import itertools
import torch
import spacy
import amrlib
import penman
import openai

from typing import List, Tuple
from operator import itemgetter 
from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification, BertTokenizer, BertForSequenceClassification
# import qgrid

In [None]:
BASE_DIR = os.path.abspath(os.getcwd()+'/../..')  # /home/gil/dev/NEBULA2/
os.chdir(os.getcwd()+'/../..')
OPENAI_API_KEY=''
openai.api_key = OPENAI_API_KEY

In [None]:
from nebula_api.nebula_enrichment_api import *
from experts.common.RemoteAPIUtility import RemoteAPIUtility
from nebula_api.vlmapi import VLM_API
from nebula_api.atomic2020.comet_enrichment_api import *
from nebula_api.canonisation_api import CANON_API
# from nlp_tools.light_house_generator import LightHouseGenerator

In [None]:
import nebula_api.playground_api as pg_api

In [None]:
nre = NRE_API()
api = RemoteAPIUtility()
vlm = VLM_API()


In [None]:
movies = ['Movies/114206952',
'Movies/114207205',
'Movies/114207398',
'Movies/114207499',
'Movies/114207361',
'Movies/114207740',
'Movies/114207908',
'Movies/114208744',
'Movies/114206724',
'Movies/114206548',
'Movies/114206264']

In [None]:
from IPython.display import Javascript
from IPython.display import HTML, display
import base64


def download_video_file(movie, fname='/tmp/video_file.mp4'):    
    if os.path.exists(fname):
        os.remove(fname)
    query = 'FOR doc IN Movies FILTER doc._id == "{}" RETURN doc'.format(movie)
    cursor = api.db.aql.execute(query)
    url_prefix = "http://ec2-18-159-140-240.eu-central-1.compute.amazonaws.com:7000/"
    url_link = ''
    for doc in cursor:
        url_link = url_prefix+doc['url_path']
        url_link = url_link.replace(".avi", ".mp4")   
        print(url_link)
        urllib.request.urlretrieve(url_link, fname) 
    return fname
    # video = cv2.VideoCapture(self.temp_file)
    # fps = video.get(cv2.CAP_PROP_FPS)
    # return(fps, url_link)



def read_video_segm(abspath, t_beg, t_end):
    cmd = f'ffmpeg -y -ss {t_beg} -i {abspath} -max_muxing_queue_size 9999  -loglevel error -f mp4 -vf scale="(floor(112/ih * iw/2))*2:112"  -c:a copy  -movflags frag_keyframe+empty_moov -t {t_end - t_beg} pipe:1 -nostats -hide_banner -nostdin'
    p = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE)
    assert p.returncode == 0, cmd
    buf = p.stdout
    return buf

video_id_cnt = 0    
class VideoElem:
    def __init__(self, fname, t_start=0, t_end=999):
        with open(fname, 'rb') as f:
            #data = base64.standard_b64encode(f.read())
            buf = read_video_segm(fname, t_start, t_end)
            data = base64.standard_b64encode(buf)
        global video_id_cnt
        video_id_cnt += 1
        self.video_id_cnt = video_id_cnt
        elem = HTML(f"""
            <video id="video_{self.video_id_cnt}" autoplay loop muted>
                <source src="data:video/mp4;base64,{data.decode('ascii')}" type="video/mp4">
            </video>        
        """)
        display(elem)
    
    def hide(self):
        js = f'$("#video_{self.video_id_cnt}").hide()'
        display(Javascript(js))
        
    def show(self):
        js = f'$("#video_{self.video_id_cnt}").show()'
        display(Javascript(js))

    def remove(self):
        js = f'$("#video_{self.video_id_cnt}").remove()'
        display(Javascript(js))
        
def mdmmt_video_encode(start_f, stop_f, path='/tmp/video_file.mp4', freq=24):
        t_start = start_f//freq
        t_end = stop_f//freq
        if t_start == t_end:
            t_start = t_start - 1
        print("Start/stop", t_start, " ", t_end)
        if (t_end - t_start) >= 1:
            vemb = mdmmt.encode_video(
                mdmmt.vggish_model,  # adio modality
                mdmmt.vmz_model,  # video modality
                mdmmt.clip_model,  # image modality
                mdmmt.model_vid,  # aggregator
                path, t_start, t_end)
            return(vemb)
        else:
            print("Stage too short")
            return(None)

In [None]:
def frame_to_concepts(frame)-> List:
    def transform_concept(c):
        exp = re.compile(r"^([a-zA-z]+)(\d*)$")
        r = exp.match(c)
        return r.group(1) if r else c
        
    pre_concepts = set(frame['tracker_description']).union(set(frame['step_description'])).union(set(frame['simulated_expert']))
    concepts = list(set(map(transform_concept,pre_concepts)))
    return concepts

def kgbart_fusion(frames) -> (List[str], List[str]):
    h, outname = tempfile.mkstemp(text=True)
    os.close(h)
    h, fname = tempfile.mkstemp(text=True)
    os.close(h)
    KGBART_MAIN = BASE_DIR+'/kgbart/KGBART/KGBART_training/decode_seq2seq.py'
    KGBART_CC_DIR = BASE_DIR+'/kgbart/downloaded/commongen_dataset'
    KGBART_MODEL_DIR = BASE_DIR+'/kgbart/output/best_model/model.best.bin'
    options = {
        'data_dir': KGBART_CC_DIR,
        'output_dir': os.path.dirname(outname),
        'input_file': fname,
        'model_recover_path': KGBART_MODEL_DIR,
        'output_file': os.path.basename(outname),
        'split': 'dev',
        'beam_size': 5,
        'forbid_duplicate_ngrams': True
    }
    all_concepts = []
    with open(fname, 'w') as f:
        for frame in frames:
            concepts = frame_to_concepts(frame)
            all_concepts.append(', '.join(concepts))
            f.write(' '.join(concepts)+'\n')
        
    # write expert tokens to input file
    
    cmdline = 'python '+KGBART_MAIN+' '+ ' '.join(['--{} {}'.format(k,v) for (k,v) in options.items()]) + '>/dev/null 2>&1'
    os.system(cmdline)
    with open(outname,'r') as f:
        rc = f.readlines()
    os.unlink(outname)
    os.unlink(fname)
    return all_concepts, rc

In [None]:
def flatten(lst): return [x for l in lst for x in l]

def compute_batch_scores(video_emb: torch.Tensor, texts: List[str], normalize=True, **kwargs) -> List[float]:    
    emb_batch = vlm.encode_text(texts, **kwargs)
    if type(emb_batch) == list:
        emb_batch = torch.stack(emb_batch,axis=0)
    if normalize:
        video_emb = video_emb / video_emb.norm(2)
        # print("normalized video norm: {}".format(video_emb.norm(2)))
        n = (emb_batch * emb_batch).sum(axis=1).sqrt()
        emb_batch = emb_batch / n.unsqueeze(1).expand_as(emb_batch)
        # print("normalized text norms:")
        # for emb in emb_batch:
        #     print(emb.norm(2))                        
    return (video_emb.unsqueeze(0).expand_as(emb_batch)*emb_batch).sum(dim=1).cpu().numpy()


def compute_concat_score(image_emb: torch.Tensor, texts: List[str], join_on=',') -> float:
    combined_text = ""
    for t in [x.strip() for x in texts]:
        if t[-1]=='.':
            t = t[:-1]       
        t+=join_on
        t+=' '
        combined_text+=t
    print("Combined: "+combined_text)
    return torch.matmul(image_emb,mdmmt.encode_text(combined_text.strip()) )       

In [None]:
def transform_concept(c):
    exp = re.compile(r"^([a-zA-z]+)-?(\d*)$")
    r = exp.match(c)
    return r.group(1) if r else c

class ConceptManager:
    def __init__(self):
        pass
    def ground_concept(concept):
        return transform_concept(concept)

In [None]:
class SimilarityManager:
    def __init__(self):
        self.nlp = spacy.load('en_core_web_lg')

    def similarity(self, c1, c2):
        if type(c2) is not list:
            c2 = [c2]   
        a = self.nlp(c1)
        targets = self.nlp(' '.join(c2))
        return [a.similarity(x) for x in targets]


smanager = SimilarityManager()


In [None]:
softmax = lambda x: np.exp(x)/sum(np.exp(x))

class SubsetOptimization:
    def __init__(self, video_emb, experts: List, candidates_strings: List[str]):
        self.stog = amrlib.load_stog_model(model_dir="/app/NEBULA2/models/model_stog")
        self.video_emb = video_emb
        self.initial_temp = 10
        self.final_temp = .05
        self.alpha = 0.01
        self.theta = 0.5
        self.experts = experts
        self.candidates_strings = candidates_strings
        self.candidates_amr_strings = self.stog.parse_sents(self.candidates_strings) 
        self.candidates = self.candidates_amr_strings
        self.candidates_amrs = [penman.decode(x) for x in self.candidates_amr_strings]
        self.candidates_similarity = compute_batch_scores(self.video_emb, self.candidates_strings)             
        self.opt_results = []
        self.smanager = SimilarityManager()

        self.coverage_matrix = np.zeros([len(self.experts),len(self.candidates)])
        self.coverage_matrix[:] = np.nan
        for i in range(len(experts)):
            for j in range(len(candidates_strings)):
                self.coverage_matrix[i][j]=self.concept_amr_similarity(self.experts[i],self.candidates_amrs[j])
        self.max_size = int(len(self.experts)*1.5)

    def concept_amr_similarity(self, concept, amr):
        insts = [ConceptManager.ground_concept(x.target) for x in amr.instances()]
        sims = self.smanager.similarity(concept, insts)
        return max(sims)

    def get_coverage(self,i,j):        
        if np.isnan(self.coverage_matrix[i][j]):
            self.coverage_matrix[i][j] = self.concept_amr_similarity(self.experts[i],self.candidates_amrs[j])
        return self.coverage_matrix[i][j]

    def get_expert_coverage(self,state):
        return self.coverage_matrix[:,state].max(axis=1)

    def get_state_coverage(self,state) -> float:
        print("State coverage for {}:".format(state))
        print(self.get_expert_coverage(state))
        return np.mean(self.get_expert_coverage(state))

    # def get_state_coverage(self, state: List[int]) -> float:
    #     experts_coverage = [max([self.get_coverage(i,j) for j in state]) for i in range(len(self.experts))]    # A list of partial coverege        
    #     return sum(experts_coverage) / len(self.experts)

    def get_cost(self, state: List[int]) -> float:
        if not state:
            return 0
        coverage_score = self.get_state_coverage(state)           
        similarity_score = self.candidates_similarity[state].mean().item()
        return -(coverage_score + self.theta*similarity_score)

    # state here is assumed (and guaranteed on return) to be -sorted-
    def get_candidate(self, state: List[int]) -> List[int]:
        def compute_state_arrays(s):
            print("Computing arrays for state: ")
            print(s)
            s_score = self.candidates_similarity[s]
            s_coverage = self.coverage_matrix.mean(axis=0)[s]
            s_max_coverage = self.coverage_matrix.max(axis=0)[s]
            s_fitscore = s_coverage+self.theta*s_score

            return (s_score,s_coverage,s_max_coverage,s_fitscore)

        if not state:
            print("Empty state")
            return [random.randint(0,len(self.candidates_strings)-1)]
            
        rc = state.copy()
        s = np.array(state)
        s_score, s_coverage, s_max_coverage, s_fitscore = compute_state_arrays(s)
               
        if len(state) == self.max_size:
            print("Maximum state size, removing")
            idx = np.argmin(s_fitscore)
            del rc[idx]
            return rc
            
        remove_sentence = random.random()<self.get_state_coverage(state)        
        print("coverage of {} is {}, remove?{}".format(state,self.get_state_coverage(state),remove_sentence))
        if remove_sentence:             # We decide to remove a sentence from the set
            print("Removing")
            probs = softmax(-s_fitscore)
            idx = np.random.multinomial(1,probs).argmax()
            del rc[idx]                   
        else:                           # Add a sentence from the outside
            print("Adding")
            anti_state = []
            for i in range(len(self.candidates_strings)):
                if not i in state:
                    anti_state.append(i)
            s1 = np.array(anti_state)
            s1_score, s1_coverage, s1_max_coverage, s1_fitscore = compute_state_arrays(s1)
            # Pick an expert to try and cover
            probs = softmax(self.get_expert_coverage(s)*10)         # Coverage is in (0,1), so we use low temprature
            expert_to_cover = np.random.multinomial(1,probs).argmax()
            probs = softmax(self.coverage_matrix[expert_to_cover][s1]*10)
            idx_to_add = np.random.multinomial(1,probs).argmax()
            bisect.insort(rc,anti_state[idx_to_add])
            
        return rc



    def get_scored_permutations(self, k):
        n = len(self.candidates)
        return [(x,self.get_cost(list(x))) for x in itertools.permutations(range(n),k)]
        
    def simulated_annealing(self, initial_state):
        self.opt_results = []
        current_temp = self.initial_temp

       # Start by initializing the current state with the initial state
        current_state = initial_state

        while current_temp > self.final_temp:
            next_cand = self.get_candidate(current_state)

            print("current cost: {} ({}). Candidate cost: {} ({})".format(self.get_cost(current_state),current_state,self.get_cost(next_cand),next_cand))

            # Check if next_cand is best so far
            cost_diff = self.get_cost(current_state) - self.get_cost(next_cand)

            # if the new solution is better, accept it
            if cost_diff > 0:
                current_state = next_cand
            # if the new solution is not better, accept it with a probability of e^(-cost/temp)
            else:
                print("chance to move: {}".format(math.exp(cost_diff / current_temp)))
                if random.uniform(0, 1) < math.exp(cost_diff / current_temp):
                    current_state = next_cand
            # decrement the temperature
            current_temp -= self.alpha
            self.opt_results.append(-self.get_cost(current_state))

        return current_state



In [None]:
'''
Get a list of 1-item dictionaries, return a list of the values
'''

def rearrange_concepts(concepts):
    return concepts[:7]

def permute_sentence(pen, concepts):    
    def replace_instance(g: penman.Graph, changes: List[tuple[int,str]]) -> penman.Graph :
        amr_copy = penman.Graph(triples=g.triples, epidata=g.epidata)
        for (i,val) in changes:
            b = list(amr_copy.triples[i])
            b[2] = val
            amr_copy.triples[i] = tuple(b)
        return amr_copy

    concepts = {k: rearrange_concepts(v) for (k,v) in concepts.items()}    
    insts_list = []
    rc = []
    concept_classes = []
    dims = []
    for i,triplet in enumerate(pen.triples):
        if triplet[1] == ':instance':
            entity_class = ascore.get_class_of_entity(transform_concept(triplet[2]))
            concept_class = concepts[entity_class] if entity_class in concepts.keys() else []
            if triplet[2] not in concept_class:
                concept_class.append(triplet[2])
            insts_list.append((i,triplet, entity_class))
            dims.append(range(len(concept_class)))
            concept_classes.append(concept_class)
    prods = itertools.product(*dims)
    for cand in prods:        
        changes = [(insts_list[i][0],concept_classes[i][d]) for (i,d) in enumerate(cand)]
        rc.append(replace_instance(pen,changes))
    
    return rc    

In [None]:
def rename_variables(pen, prefix='n_'):
    new_triples = []
    for t in pen.triples:
        triple = list(t)
        if triple[0] in pen.variables():
            triple[0] = prefix+triple[0]
        if triple[2] in pen.variables():
            triple[2] = prefix+triple[2]
        new_triples.append(tuple(triple))
    amr_copy = penman.Graph(triples=new_triples)
    return amr_copy            
    

def combine_place(base_amr, place_amr):
    place_copy = rename_variables(place_amr)
    combined_triples = base_amr.triples + place_copy.triples 
    combined_triples.append(tuple([base_amr.top, ':location', place_copy.top]))
    return penman.Graph(triples=combined_triples)


In [None]:
def gpt_combine(base: str, action: str, place: str, engine="text-davinci-001"):
    prompt_template = """Combine base, action, and place into one sentence.

Base: A man sitting on a bench in front of a building
Action: sit and think to himself
Place: at a train station in india
Combined: A man sits and thinks to himself at a train station in India

Base: A woman in a dress is standing in a field
Action: make her way to an empty field
Place: beside a meadow
Combined: A woman in a dress is walking to an empty field, besides a meadow

Base: A boat is in the water with a red flag
Action: chase the other boat
Place: aboard a navy ship
Combined: A small navy boat with a red flag speeds and chases another boat

Base: A dark image of a creature with a dark background
Action: shatter the reflection
Place: in a ruined courtyard
Combined: A dark image of a creature with a dark background is shattered in a ruined courtyard

Base: A woman standing in a doorway in a living room
Action: leave for home on her own
Place: just inside someone's front door of their house in the living room
Combined: A woman standing just inside the front door of someone's house is leaving home on her own

Base: A man in a suit and tie standing in front of a wall
Action: Take out a passport
Place: outside of a terminal baggage claim
Combined: A man in a suit and a tie takes out his passport outside of a terminal's baggage claim

Base: A woman pushing a suitcase with a man standing behind her
Action: set the suitcase by the door
Place: in a 19th century house
Combined: A woman sets her suitcase by the door in a 19th century house, with a man standing behind her

Base: A woman walking down a street in the dark
Action: try to evade a car
Place: on a narrow street or alley
Combined: A woman in a narrow street or alley tries to evade a car

Base: A woman walking down a street in the dark
Action: trip while she is trying to hurry
Place: on a narrow street or alley
Combined: A woman trips while she is trying to hurry in a narrow street or alley

Base: A car is driving down a dark street at night
Action: drive away quickly from the scene
Place: outside on a street at night that is filled with police cars
Combined:  A car outside on a street filled with police cars is quickly driving away from the scene

Base: A woman with a red tie is driving a car
Action: hang on tightly to the dashboard
Place: in a police car
Combined: A woman with a red tie in a police car hangs on tightly to the dashboard

Base: A man standing in front of a doorway at night
Action: turns on the light
Place: just outside of someone's home off of the porch
Combined: A man turns on the light just outside of someone's home off of the porch

Base: A man standing in front of a doorway at night
Action: call out to see if anyone is home
Place: just outside of someone's home off of the porch
Combined: A man calls out to see if anyone is home, just outside of someone's home off of the porch

Base: A young man practices karate
Action: leaps over the hedge to the next yard
Place: outside someone's home on a porch
Combined: A young man  leaps over the hedge to the next yard outside someone's home on a porch

Base: Two men standing next to each other in front of a forest
Action: stare off into the night sky
Place: near a cornfield outside someone's house
Combined: Two man stare off into the night sky near a cornfield outside someone's house

Base: A man with sunglasses and a yellow jacket standing next to a man with a yellow jacket
Action: scan the horizon
Place: at a gas station in the desert
Combined: At a gas station in the desert, a man with sunglasses and a yellow jacket scans the horizon

Base: A man in a suit and tie holding a knife
Action: present the notes to his superiors
Place: government capitol
Combined: A man in a suit and tie presents the notes to his superiors in the government capitol

Base: {}
Action: {}
Place: {}
Combined:"""

    prompt = prompt_template.format(base, action, place)
    response = openai.Completion.create(engine=engine,prompt=prompt,temperature=0.6, max_tokens=128)
    return response

def compute_gpt_lighthouses(mid, scene_element, max_lighthouses=1000, **kwargs):
    results = []
    data = nre.get_groundings_from_db(mid,scene_element)
    bases = data['base']
    places = data['places'][:5]
    actions = data['actions'][:5]
    for base in bases:
        for place in places:
            for action in actions:
                rc = gpt_combine(base,action,place, **kwargs)
                results.append((rc.choices[0].text,(base,action,place)))
                if len(results) >= max_lighthouses:
                    return results
    
    return results



In [None]:
mid = 'Movies/114206337' # 'Movies/114208338'
# mid = 'Movies/114208338' # 'Movies/114208338'
# mid = 'Movies/114207550' # 'Movies/114208338'
elem = 0
movie_info = api.get_movie_info(mid)
emb_image = vlm.encode_video(mid,elem,class_name='mdmmt_mean')
data = nre.get_groundings_from_db(mid,elem)


In [None]:
base = data['base']
places = data['places']
# experts = data['experts']
actions = data['actions']


In [None]:
# engines are ada/babbage/curie/davinci
rc_babbage = compute_gpt_lighthouses(mid,elem,engine="text-babbage-001")

In [None]:
rc_ada = compute_gpt_lighthouses(mid,elem,engine="text-ada-001")

In [None]:
rc_babbage

In [None]:
[x[0] for x in rc_ada]