In [None]:
import sys
import os
import math
import random
import bisect
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import urllib
import subprocess
import re
import tempfile
import itertools
import torch
import spacy
import amrlib
import penman

from typing import List, Tuple
from operator import itemgetter 
# import qgrid

In [None]:
BASE_DIR = os.path.abspath(os.getcwd()+'/../..')  # /home/gil/dev/NEBULA2/
os.chdir(os.getcwd()+'/../..')

In [None]:
from nebula_api.nebula_enrichment_api import *
from experts.common.RemoteAPIUtility import RemoteAPIUtility
from nebula_api.mdmmt_api import mdmmt_api
from nebula_api.atomic2020.comet_enrichment_api import *
from nebula_api.canonisation_api import CANON_API
from nlp_tools.light_house_generator import LightHouseGenerator

In [None]:
import nebula_api.playground_api as pg_api

In [None]:
nre = NRE_API()
api = RemoteAPIUtility()
mdmmt = mdmmt_api.MDMMT_API()
comet = Comet("/app/NEBULA2/nebula_api/atomic2020/comet-atomic_2020_BART")
ascore = CANON_API()
stog = amrlib.load_stog_model(model_dir="/app/NEBULA2/models/model_stog")
gtos = amrlib.load_gtos_model(model_dir="/app/NEBULA2/models/model_gtos")
# lh_gen = LightHouseGenerator(comet,stog,gtos)

In [None]:
movies = ['Movies/114206952',
'Movies/114207205',
'Movies/114207398',
'Movies/114207499',
'Movies/114207361',
'Movies/114207740',
'Movies/114207908',
'Movies/114208744',
'Movies/114206724',
'Movies/114206548',
'Movies/114206264']

In [None]:
from IPython.display import Javascript
from IPython.display import HTML, display
import base64


def download_video_file(movie, fname='/tmp/video_file.mp4'):    
    if os.path.exists(fname):
        os.remove(fname)
    query = 'FOR doc IN Movies FILTER doc._id == "{}" RETURN doc'.format(movie)
    cursor = api.db.aql.execute(query)
    url_prefix = "http://ec2-18-159-140-240.eu-central-1.compute.amazonaws.com:7000/"
    url_link = ''
    for doc in cursor:
        url_link = url_prefix+doc['url_path']
        url_link = url_link.replace(".avi", ".mp4")   
        print(url_link)
        urllib.request.urlretrieve(url_link, fname) 
    return fname
    # video = cv2.VideoCapture(self.temp_file)
    # fps = video.get(cv2.CAP_PROP_FPS)
    # return(fps, url_link)



def read_video_segm(abspath, t_beg, t_end):
    cmd = f'ffmpeg -y -ss {t_beg} -i {abspath} -max_muxing_queue_size 9999  -loglevel error -f mp4 -vf scale="(floor(112/ih * iw/2))*2:112"  -c:a copy  -movflags frag_keyframe+empty_moov -t {t_end - t_beg} pipe:1 -nostats -hide_banner -nostdin'
    p = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE)
    assert p.returncode == 0, cmd
    buf = p.stdout
    return buf

video_id_cnt = 0    
class VideoElem:
    def __init__(self, fname, t_start=0, t_end=999):
        with open(fname, 'rb') as f:
            #data = base64.standard_b64encode(f.read())
            buf = read_video_segm(fname, t_start, t_end)
            data = base64.standard_b64encode(buf)
        global video_id_cnt
        video_id_cnt += 1
        self.video_id_cnt = video_id_cnt
        elem = HTML(f"""
            <video id="video_{self.video_id_cnt}" autoplay loop muted>
                <source src="data:video/mp4;base64,{data.decode('ascii')}" type="video/mp4">
            </video>        
        """)
        display(elem)
    
    def hide(self):
        js = f'$("#video_{self.video_id_cnt}").hide()'
        display(Javascript(js))
        
    def show(self):
        js = f'$("#video_{self.video_id_cnt}").show()'
        display(Javascript(js))

    def remove(self):
        js = f'$("#video_{self.video_id_cnt}").remove()'
        display(Javascript(js))
        
def mdmmt_video_encode(start_f, stop_f, path='/tmp/video_file.mp4', freq=24):
        t_start = start_f//freq
        t_end = stop_f//freq
        if t_start == t_end:
            t_start = t_start - 1
        print("Start/stop", t_start, " ", t_end)
        if (t_end - t_start) >= 1:
            vemb = mdmmt.encode_video(
                mdmmt.vggish_model,  # adio modality
                mdmmt.vmz_model,  # video modality
                mdmmt.clip_model,  # image modality
                mdmmt.model_vid,  # aggregator
                path, t_start, t_end)
            return(vemb)
        else:
            print("Stage too short")
            return(None)

In [None]:
def frame_to_concepts(frame)-> List:
    def transform_concept(c):
        exp = re.compile(r"^([a-zA-z]+)(\d*)$")
        r = exp.match(c)
        return r.group(1) if r else c
        
    pre_concepts = set(frame['tracker_description']).union(set(frame['step_description'])).union(set(frame['simulated_expert']))
    concepts = list(set(map(transform_concept,pre_concepts)))
    return concepts

def kgbart_fusion(frames) -> (List[str], List[str]):
    h, outname = tempfile.mkstemp(text=True)
    os.close(h)
    h, fname = tempfile.mkstemp(text=True)
    os.close(h)
    KGBART_MAIN = BASE_DIR+'/kgbart/KGBART/KGBART_training/decode_seq2seq.py'
    KGBART_CC_DIR = BASE_DIR+'/kgbart/downloaded/commongen_dataset'
    KGBART_MODEL_DIR = BASE_DIR+'/kgbart/output/best_model/model.best.bin'
    options = {
        'data_dir': KGBART_CC_DIR,
        'output_dir': os.path.dirname(outname),
        'input_file': fname,
        'model_recover_path': KGBART_MODEL_DIR,
        'output_file': os.path.basename(outname),
        'split': 'dev',
        'beam_size': 5,
        'forbid_duplicate_ngrams': True
    }
    all_concepts = []
    with open(fname, 'w') as f:
        for frame in frames:
            concepts = frame_to_concepts(frame)
            all_concepts.append(', '.join(concepts))
            f.write(' '.join(concepts)+'\n')
        
    # write expert tokens to input file
    
    cmdline = 'python '+KGBART_MAIN+' '+ ' '.join(['--{} {}'.format(k,v) for (k,v) in options.items()]) + '>/dev/null 2>&1'
    os.system(cmdline)
    with open(outname,'r') as f:
        rc = f.readlines()
    os.unlink(outname)
    os.unlink(fname)
    return all_concepts, rc

In [None]:
def flatten(lst): return [x for l in lst for x in l]

def compute_batch_scores(image_emb: torch.Tensor, texts: List[str]) -> List[float]:
    emb_batch = mdmmt.batch_encode_text(texts)
    return (emb_image.expand_as(emb_batch)*emb_batch).sum(dim=1).cpu().numpy()


def compute_concat_score(image_emb: torch.Tensor, texts: List[str], join_on=',') -> float:
    combined_text = ""
    for t in [x.strip() for x in texts]:
        if t[-1]=='.':
            t = t[:-1]       
        t+=join_on
        t+=' '
        combined_text+=t
    print("Combined: "+combined_text)
    return torch.matmul(image_emb,mdmmt.encode_text(combined_text.strip()) )       

In [None]:
def transform_concept(c):
    exp = re.compile(r"^([a-zA-z]+)-?(\d*)$")
    r = exp.match(c)
    return r.group(1) if r else c

class ConceptManager:
    def __init__(self):
        pass
    def ground_concept(concept):
        return transform_concept(concept)

In [None]:
class SimilarityManager:
    def __init__(self):
        self.nlp = spacy.load('en_core_web_lg')

    def similarity(self, c1, c2):
        if type(c2) is not list:
            c2 = [c2]   
        a = self.nlp(c1)
        targets = self.nlp(' '.join(c2))
        return [a.similarity(x) for x in targets]


smanager = SimilarityManager()


In [None]:
softmax = lambda x: np.exp(x)/sum(np.exp(x))

class SubsetOptimization:
    def __init__(self, video_emb, experts: List, candidates_strings: List[str]):
        self.stog = amrlib.load_stog_model(model_dir="/app/NEBULA2/models/model_stog")
        self.video_emb = video_emb
        self.initial_temp = 10
        self.final_temp = .05
        self.alpha = 0.01
        self.theta = 0.5
        self.experts = experts
        self.candidates_strings = candidates_strings
        self.candidates_amr_strings = self.stog.parse_sents(self.candidates_strings) 
        self.candidates = self.candidates_amr_strings
        self.candidates_amrs = [penman.decode(x) for x in self.candidates_amr_strings]
        self.candidates_similarity = compute_batch_scores(self.video_emb, self.candidates_strings)             
        self.opt_results = []
        self.smanager = SimilarityManager()

        self.coverage_matrix = np.zeros([len(self.experts),len(self.candidates)])
        self.coverage_matrix[:] = np.nan
        for i in range(len(experts)):
            for j in range(len(candidates_strings)):
                self.coverage_matrix[i][j]=self.concept_amr_similarity(self.experts[i],self.candidates_amrs[j])
        self.max_size = int(len(self.experts)*1.5)

    def concept_amr_similarity(self, concept, amr):
        insts = [ConceptManager.ground_concept(x.target) for x in amr.instances()]
        sims = self.smanager.similarity(concept, insts)
        return max(sims)

    def get_coverage(self,i,j):        
        if np.isnan(self.coverage_matrix[i][j]):
            self.coverage_matrix[i][j] = self.concept_amr_similarity(self.experts[i],self.candidates_amrs[j])
        return self.coverage_matrix[i][j]

    def get_expert_coverage(self,state):
        return self.coverage_matrix[:,state].max(axis=1)

    def get_state_coverage(self,state) -> float:
        print("State coverage for {}:".format(state))
        print(self.get_expert_coverage(state))
        return np.mean(self.get_expert_coverage(state))

    # def get_state_coverage(self, state: List[int]) -> float:
    #     experts_coverage = [max([self.get_coverage(i,j) for j in state]) for i in range(len(self.experts))]    # A list of partial coverege        
    #     return sum(experts_coverage) / len(self.experts)

    def get_cost(self, state: List[int]) -> float:
        if not state:
            return 0
        coverage_score = self.get_state_coverage(state)           
        similarity_score = self.candidates_similarity[state].mean().item()
        return -(coverage_score + self.theta*similarity_score)

    # state here is assumed (and guaranteed on return) to be -sorted-
    def get_candidate(self, state: List[int]) -> List[int]:
        def compute_state_arrays(s):
            print("Computing arrays for state: ")
            print(s)
            s_score = self.candidates_similarity[s]
            s_coverage = self.coverage_matrix.mean(axis=0)[s]
            s_max_coverage = self.coverage_matrix.max(axis=0)[s]
            s_fitscore = s_coverage+self.theta*s_score

            return (s_score,s_coverage,s_max_coverage,s_fitscore)

        if not state:
            print("Empty state")
            return [random.randint(0,len(self.candidates_strings)-1)]
            
        rc = state.copy()
        s = np.array(state)
        s_score, s_coverage, s_max_coverage, s_fitscore = compute_state_arrays(s)
               
        if len(state) == self.max_size:
            print("Maximum state size, removing")
            idx = np.argmin(s_fitscore)
            del rc[idx]
            return rc
            
        remove_sentence = random.random()<self.get_state_coverage(state)        
        print("coverage of {} is {}, remove?{}".format(state,self.get_state_coverage(state),remove_sentence))
        if remove_sentence:             # We decide to remove a sentence from the set
            print("Removing")
            probs = softmax(-s_fitscore)
            idx = np.random.multinomial(1,probs).argmax()
            del rc[idx]                   
        else:                           # Add a sentence from the outside
            print("Adding")
            anti_state = []
            for i in range(len(self.candidates_strings)):
                if not i in state:
                    anti_state.append(i)
            s1 = np.array(anti_state)
            s1_score, s1_coverage, s1_max_coverage, s1_fitscore = compute_state_arrays(s1)
            # Pick an expert to try and cover
            probs = softmax(self.get_expert_coverage(s)*10)         # Coverage is in (0,1), so we use low temprature
            expert_to_cover = np.random.multinomial(1,probs).argmax()
            probs = softmax(self.coverage_matrix[expert_to_cover][s1]*10)
            idx_to_add = np.random.multinomial(1,probs).argmax()
            bisect.insort(rc,anti_state[idx_to_add])
            
        return rc



    def get_scored_permutations(self, k):
        n = len(self.candidates)
        return [(x,self.get_cost(list(x))) for x in itertools.permutations(range(n),k)]
        
    def simulated_annealing(self, initial_state):
        self.opt_results = []
        current_temp = self.initial_temp

       # Start by initializing the current state with the initial state
        current_state = initial_state

        while current_temp > self.final_temp:
            next_cand = self.get_candidate(current_state)

            print("current cost: {} ({}). Candidate cost: {} ({})".format(self.get_cost(current_state),current_state,self.get_cost(next_cand),next_cand))

            # Check if next_cand is best so far
            cost_diff = self.get_cost(current_state) - self.get_cost(next_cand)

            # if the new solution is better, accept it
            if cost_diff > 0:
                current_state = next_cand
            # if the new solution is not better, accept it with a probability of e^(-cost/temp)
            else:
                print("chance to move: {}".format(math.exp(cost_diff / current_temp)))
                if random.uniform(0, 1) < math.exp(cost_diff / current_temp):
                    current_state = next_cand
            # decrement the temperature
            current_temp -= self.alpha
            self.opt_results.append(-self.get_cost(current_state))

        return current_state



In [None]:
def generate_concepts(mid, scene_elem, use_db=False):
    if use_db:
        return nre.get_groundings_from_db(mid, scene_elem)
    
    concepts, attributes, persons, triplets, verbs = lh_gen.decompose_lighthouse(events=events, actions=[],
                                                                             places=places)
    concepts = flatten(concepts.values())
    attributes = flatten(attributes.values())
    triplets = flatten(triplets.values())
    persons = flatten(persons.values())
    
    return concepts, attributes, persons, triplets, verbs['verbs']

# get sets of concepts and triplets and return set of amrs/sentences

def generate_candidates(concepts, attributes, persons, triplets, verbs):
    return lh_gen.generate_from_concepts(concepts, attributes, persons, triplets, verbs,
                                                    places, None)
    
# def fusion_pipeline(mid: str, scene_elem: int, **kwargs):
    
    

In [None]:
'''
Get a list of 1-item dictionaries, return a list of the values
'''

def rearrange_concepts(concepts):
    return [list(x.values())[0] for x in concepts]

def permute_sentence(sentence, concepts):    
    def replace_instance(g: penman.Graph, changes: List[tuple[int,str]]) -> penman.Graph :
        amr_copy = penman.Graph(triples=g.triples, epidata=g.epidata)
        for (i,val) in changes:
            b = list(amr_copy.triples[i])
            b[2] = val
            amr_copy.triples[i] = tuple(b)
        return amr_copy

    concepts = {k: rearrange_concepts(v) for (k,v) in concepts.items()}
    s = re.sub('[0-9]+', 'man', sentence.strip())
    # s = re.sub('___', 'man', x.strip())
    print("Original Sentence: {}".format(s))
    [amr] = stog.parse_sents([s])
    pen = penman.decode(amr)
    insts_list = []
    rc = []
    dims = []
    for i,triplet in enumerate(pen.triples):
        if triplet[1] == ':instance':
            entity_class = ascore.get_class_of_entity(transform_concept(triplet[2]))
            if entity_class == 'none':                          # This instance has no class, so we create its own special class to take care of the edge case
                entity_class = 'none{}'.format(i)
                concepts[entity_class] = []
            if triplet[2] not in concepts[entity_class]:
                concepts[entity_class].append(triplet[2])
            insts_list.append((i,triplet, entity_class))
            dims.append(range(len(concepts[entity_class])))
    prods = itertools.product(*dims)
    for cand in prods:        
        changes = [(insts_list[i][0],concepts[insts_list[i][2]][d]) for (i,d) in enumerate(cand)]
        rc.append(replace_instance(pen,changes))
    
    return pen, rc    

In [None]:
mid = movies[1]
events, places = comet.get_playground_data(mid, 0)
video = VideoElem(download_video_file(mid))
movie_info = api.get_movie_info(mid)
emb_image = mdmmt_video_encode(*movie_info['scene_elements'][0])
movie_info


In [None]:
# sentences = [x[0] for x in z4[-10:]]
sentences = ['woman in the alleyway',
             'woman alley outside apartments',
             'woman enters the narrow street',
             'woman need find a dark street corner',
             'woman the dark street corner',
             'woman need to find the alley way',
             'woman sees the friend across the street',
             'woman need to find a narrow street',
             'woman narrow street',
             'woman  the alleyway']


In [None]:
data = nre.get_groundings_from_db("Movies/114206816",0)
data.keys()

In [None]:
s = '1 seem like she cannot believe what she is seeing'
pen, rc = permute_sentence(s,data['concepts'][s])

In [None]:
bla = gtos.generate([penman.encode(x) for x in rc[:10000]])
# penman.encode(pen)

In [None]:
scores = compute_batch_scores(emb_image, bla[0])

In [None]:
s = list(rc['triplets'].keys())[1]

In [None]:
re.sub('[0-9]+','___',s)

In [None]:
rc = [(x,ascore.get_class_of_entity(x)) for x in ["stand", "sit", "take", "give", "look", "see", "smile", "eat", "hold", "drink", "put", "think", "comprehend", "speak", "talk", "raise", "pick", "announce", "laugh", "run", "walk"]]
print('-------------------------------------------------------')
print(rc)


In [None]:
opt = SubsetOptimization(emb_image, ["woman", "friend", "dark", "hurry"], sentences)

In [None]:
# rc = opt.get_scored_permutations(3)
rc = opt.simulated_annealing([])

In [None]:
plt.figure(figsize=(15, 8))
sns.lineplot(x=(range(len(opt.opt_results))),y=opt.opt_results)

In [None]:
rc = nre.get_groundings_from_db(mid, 0)

In [None]:
rc['verbs']

In [None]:
concepts, attributes, persons, triplets, verbs = lh_gen.decompose_lighthouse(events=events, actions=[],
                                                                             places=places)

concepts = flatten(concepts.values())
attributes = flatten(attributes.values())
triplets = flatten(triplets.values())
persons = flatten(persons.values())

In [None]:
concepts = [x.strip() for x in [' suitcase', ' car', ' car seat']]
attributes = [x.strip() for x in [' hurriedly', ' rushed', ' hurried']]
persons = [x.strip() for x in [' gets hit by a car', ' to get in the car', ' to get to the car']]
triplets = [x.strip() for x in [' PersonX gets hit by a car',
 ' PersonX steps off of the curb',
 ' PersonX is running late for work']]
verbs = {}
verbs['verbs'] = [x.strip() for x in ['get', 'accept', 'acquire']]
places = [x.strip() for x in ['on a narrow street or alley',
 'on a narrow street',
 'outside in a large alley']]

In [None]:
best_sent = lh_gen.generate_from_concepts(concepts[:3], attributes[:3], persons[:3], triplets[:3], verbs['verbs'][:3],
                                                    places[:3], None)


In [None]:
cands = comet.get_groundings(events,places,type="triplet")
for c in cands.keys():
    texts = cands[c]
    cands[c] = [x for x in texts if x.strip()]
# compute_batch_scores(embamrs = stog.parse_sents(sentences)_image,["woman hurriedly steps off of the curb into the street carrying her luggage to the car", "hurriedly steps out of the street", "hurriedly steps out of the street carrying her luggage to the car"])
# compute_batch_scores(emb_image,["man is wearing a wide-brimmed hat", "man is wearing a wide - brimmed floral hat", "man wants to take off their hat", "man with a hat"])
# compute_batch_scores(emb_image,["woman hurriedly steps off of the curb into the street carrying her luggage to the car", "woman hurriedly steps out of the street", "woman hurriedly steps out of the street carrying her luggage to the car"])
diffs = []
orig_c = []
best_c = []
all_output = []
all_scores = []
inp_scores = []
for c in cands.keys():
    # print('working on: {}'.format(c))
    final_cands = [re.sub('PersonX', 'woman', x.strip()) for x in cands[c]]
    final_c = re.sub('PersonX', 'woman', c.strip())
    orig_score = compute_batch_scores(emb_image, [final_c])
    rc = compute_batch_scores(emb_image, final_cands)
    best_score = rc.max()
    ind = rc.cpu().numpy().argmax()
    all_output.append(final_cands)
    all_scores.append(rc)
    orig_c.append(final_c)
    inp_scores.append(orig_score)
    best_c.append(final_cands[ind])
    diffs.append(rc.max()-orig_score[0])
    # print("orig score for {}: {}. Max score after: {}. Diff: {}".format(final_c,orig_score,rc.max(), rc.max()-orig_score[0]))

In [None]:
['{} -> {} : {}'.format(x,y,z.item()) for (x,y,z) in zip(orig_c,best_c,diffs)]

In [None]:
z1 = [x for l in all_output for x in l]
z2 = torch.concat(all_scores)
z3 = list(zip(z1,[round(x,4) for x in z2.cpu().tolist()]))
z3 = list(set(z3))
z4 = sorted(z3,key=lambda x: x[1])


In [None]:
opt.candidates_similarity.cpu().numpy()

In [None]:
itemgetter(*list(reversed(sorted(rc,key = lambda x: x[1])))[0][0])(opt.candidates_strings)

In [None]:
[b, c, d] = stog.parse_sents(['He comes from New York', 'man sits in a chair waiting for someone', "he drives carefully"])
a = penman.decode(c)

In [None]:
b = list(a.triples[-3])
b[2] = "love"
a.triples[-3] = tuple(b)

In [None]:
a1 = penman.Graph(triples=a.triples,
      epidata=a.epidata)

In [None]:
a1.triples

In [None]:
a.triples.index(a._filter_triples('c', ':instance', None)[0])

In [None]:
print(c)

In [None]:
penman.encode(a)

In [None]:
gtos.generate([penman.encode(a)])

In [None]:
concept_amr_similarity("male",a)

In [None]:
nlp = spacy.load('en_core_web_lg')
z = nlp('dog cat milk')

In [None]:
z[1:]

In [None]:
gtos = amrlib.load_gtos_model()

In [None]:
z = stog.parse_sents(['The woman looks at a rabid dog as it bites an old, well-dressed man',
                     'I was late to the airport and almost missed my flight'])
gtos.generate(z)[0]


In [None]:
ascore.get_concept_from_entity('friendship')

In [None]:
ConceptManager.ground_concept('bite-01')

In [None]:
from nltk.corpus import wordnet
from nltk.stem.wordnet import WordNetLemmatizer


In [None]:
get_giltest('love')

In [None]:
WordNetLemmatizer().lemmatize('chairs', 'n')

In [None]:
all_movies = comet.get_playground_movies()

In [None]:
for m in all_movies:
    df = pg_api.get_or_create_normalized_video(m)
    print(len(df))

In [None]:
import sys
import os
BASE_DIR = os.path.abspath(os.getcwd()+'/../..')  # /home/gil/dev/NEBULA2/
os.chdir(os.getcwd()+'/../..')
from nebula_api.mdmmt_api import mdmmt_api
mdmmt = mdmmt_api.MDMMT_API()
def mdmmt_video_encode(start_f, stop_f, path='/tmp/video_file.mp4', freq=24):
    t_start = start_f//freq
    t_end = stop_f//freq
    if t_start == t_end:
        t_start = t_start - 1
    print("Start/stop", t_start, " ", t_end)
    if (t_end - t_start) >= 1:
        vemb = mdmmt.encode_video(
            mdmmt.vggish_model,  # adio modality
            mdmmt.vmz_model,  # video modality
            mdmmt.clip_model,  # image modality
            mdmmt.model_vid,  # aggregator
            path, t_start, t_end)
        return(vemb)
    else:
        print("Stage too short")
        return(None)
emb_image = mdmmt_video_encode(0,48)        