In [1]:
# ! pip install nltk
# ! pip install stanza

In [2]:
import stanza
import os
HOME = os.getcwd()
print(HOME)

from stanza.pipeline.core import DownloadMethod
NLP = stanza.Pipeline('en', download_method=DownloadMethod.REUSE_RESOURCES, use_gpu=True) # to avoid downloading the models every time
# nlp is a pipeline

  from .autonotebook import tqdm as notebook_tqdm


c:\Users\bouab\DEV\see-and-tell


2023-05-06 19:03:30 INFO: Loading these models for language: en (English):
| Processor    | Package   |
----------------------------
| tokenize     | combined  |
| pos          | combined  |
| lemma        | combined  |
| constituency | wsj       |
| depparse     | combined  |
| sentiment    | sstplus   |
| ner          | ontonotes |

2023-05-06 19:03:31 INFO: Using device: cuda
2023-05-06 19:03:31 INFO: Loading: tokenize
2023-05-06 19:03:35 INFO: Loading: pos
2023-05-06 19:03:35 INFO: Loading: lemma
2023-05-06 19:03:35 INFO: Loading: constituency
2023-05-06 19:03:36 INFO: Loading: depparse
2023-05-06 19:03:36 INFO: Loading: sentiment
2023-05-06 19:03:37 INFO: Loading: ner
2023-05-06 19:03:38 INFO: Done loading processors!


In [3]:
sentences = ['The woman in the garden is reading a book next to her husband', 'A man in a blue shirt is talking to another man', "A young girl is having breakfast with her mother"]
final_str = ". ".join(sentences)
# let's see how things go now !!
doc = NLP(final_str)

In [4]:
sentences = doc.sentences
tree = sentences[0].constituency
c = tree.children[0].children

In [5]:
# the first 2 in general are a NP and VP
# let's check the first component: NP
np1 = c[0]
res = stanza.models.constituency.parse_tree.Tree.get_compound_constituents([np1])

In [6]:
help(stanza.models.constituency.parse_tree.Tree)

Help on class Tree in module stanza.models.constituency.parse_tree:

class Tree(stanza.models.common.stanza_object.StanzaObject)
 |  Tree(label=None, children=None)
 |  
 |  A data structure to represent a parse tree
 |  
 |  Method resolution order:
 |      Tree
 |      stanza.models.common.stanza_object.StanzaObject
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __eq__(self, other)
 |      Return self==value.
 |  
 |  __format__(self, spec)
 |      Turn the tree into a string representing the tree
 |      
 |      Note that this is not a recursive traversal
 |      Otherwise, a tree too deep might blow up the call stack
 |      
 |      There is a type specific format:
 |        O       -> one line PTB format, which is the default anyway
 |        L       -> open and close brackets are labeled, spaces in the tokens are replaced with _
 |        P       -> pretty print over multiple lines
 |        V       -> surround lines with <s>...</s>, don't print ROOT, and turn 

In [7]:
# let's try to understand how to identify expressions that could represent humans
from nltk.corpus import wordnet as wn
PEOPLE = wn.synset('people.n.01')
PERSON = wn.synset('person.n.01')

def words_tags(tree):
    return [(x.label, x.children[0].label) for x in tree.yield_preterminals()]

print(words_tags(c[1]))
print(words_tags(c[1].children[1]))

[('VBZ', 'is'), ('VBG', 'reading'), ('DT', 'a'), ('NN', 'book'), ('RB', 'next'), ('IN', 'to'), ('PRP$', 'her'), ('NN', 'husband')]
[('VBG', 'reading'), ('DT', 'a'), ('NN', 'book'), ('RB', 'next'), ('IN', 'to'), ('PRP$', 'her'), ('NN', 'husband')]


In [8]:
def person_word(word:str, threshold: float=0.2, synset_reference=None) -> bool:
    # this function will return 2 boolean values:
    # first if the word could possibly mean the word "person"     

    # first extract the possible meanings of such word
    if synset_reference is None:
        synset_reference = PERSON # this way we can use PEOPLE as well with the same function

    is_person = False
    for meaning in wn.synsets(word):
        if meaning.path_similarity(PERSON) >= threshold:
            is_person = True
            break
    
    return is_person


In [9]:
for meaning in wn.synsets('mother'):
    print(meaning.definition())
    print(meaning.path_similarity(PERSON))


a woman who has given birth to a child (also used as a term of address to your mother)
0.14285714285714285
a stringy slimy substance consisting of yeast cells and bacteria; forms during fermentation and is added to cider or wine to produce vinegar
0.125
a term of address for an elderly woman
0.2
a term of address for a mother superior
0.16666666666666666
a condition that is the inspiration for an activity or situation
0.09090909090909091
care for like a mother
0.1
make children
0.125


In [10]:
def lemmatize(word: str , tag: str):
    try:
        pos = tag.lower()[0]
        lemmas = wn._morphy(word, pos)
        return min(lemmas, key=len) if lemmas else word
    except:
        # assume it is a noun
        lemmas = wn._morphy(word, 'n')
        return min(lemmas, key=len) if lemmas else word
    

In [11]:
def num_person_words(noun_phrase: list[tuple[str, str]], known_person_words: set):
    # first convert all the words to their lemmas
    lemmas = [(t[0], lemmatize(t[1], t[0])) for t in noun_phrase]    
    person_lists = [t[1] for t in lemmas if t[0] == 'NN' and (t[1] in known_person_words or 
                    person_word(t[1], synset_reference=PERSON) or person_word(t[1], synset_reference=PEOPLE))]

    # make sure to add the words that could possibly mean PERSON to the set
    known_person_words.update(person_lists)
    return person_lists, known_person_words

In [12]:
def get_NP_components(root, person_np: set=None):
    # this function is supposed to return the largest NPs including exactly one word with a close meaning to 'PERSON'  
    if person_np is None:
        person_np = set()
    
    result = []
    # only consider Noun Phrases
    if root.label == 'NP':
        # if the current root reprsents a Noun Phrase, 
        noun_phrase = words_tags(root)
        # extract the number of words that represent a PERSON
        num_persons, person_np = num_person_words(noun_phrase, person_np)

        if len(num_persons) == 1:
            result.append(noun_phrase)
        
        elif len(num_persons) >= 2: 
            for child in root.children:
                result.extend(get_NP_components(child, person_np))

    else:
        for child in root.children:
            result.extend(get_NP_components(child, person_np))

    return result 

In [13]:
def convert_to_text(l: list[tuple[str, str]], filter):
    if filter:
        return " ".join([t[1] for t in l if t[0] in ['NN', 'JJ']]).strip().lower() 
    return " ".join([t[1] for t in l]).strip().lower()


def extract_NP_text(text: str, nlp_object=None, plain_text:bool=True, filter:bool=True):
    if nlp_object is None:
        nlp_object = NLP
    
    doc = nlp_object(text)
    np_components = []
    person_words = set()
    # iterate through sentences
    for s in doc.sentences:
        tree = s.constituency
        c = tree.children[0]
        np_components.append(get_NP_components(c, person_words))

    if plain_text:
        return [[convert_to_text(t, filter=filter) for t in component] for component in np_components]
    
    return np_components


In [14]:
sentences = ['The woman in the garden is reading a book next to her husband', 
             'A man in a blue shirt is talking to another man', 
             "A young girl is having breakfast with her mother", 
             "One boy and one man were hitting an old woman"]
final_str = ". ".join(sentences)
print(final_str)
results = extract_NP_text(final_str, plain_text=True, filter=True)
print(results)

The woman in the garden is reading a book next to her husband. A man in a blue shirt is talking to another man. A young girl is having breakfast with her mother. One boy and one man were hitting an old woman
[['woman garden', 'husband'], ['man blue shirt', 'man'], ['young girl', 'mother'], ['boy', 'man', 'old woman']]


In [15]:
# for res, s in zip(results, sentences):
#     print(s)
#     print("Person Noun Phrases")
#     for np in res:
#         print(" ".join([w[1] for w in np]))

#     print("#" * 100)


# Face recognition

In [16]:
from pathlib import Path
HOME = os.getcwd()
import json
with open(os.path.join(HOME,'src','TBBT_embeddings_16_256.json')) as f:
  bbt_embeddings = json.load(f)


In [17]:
from src.experimental.exp import get_caption
from src.face.face_recognition import recognize_faces
def get_result_image(image_path):
    # extract the caption
    caption = get_caption(image_path)
    # get the faces present in the image 
    o1 = recognize_faces(image_path, embeddings=bbt_embeddings, possible_classes=list(bbt_embeddings.keys()), display=False)    
    print(caption)
    print(o1)
    return caption, o1
import os

f = os.path.join(HOME, 'src', 'frames')
frames = os.listdir(f)

In [18]:
# images = [os.path.join(f, frames[i]) for i in range()]

In [21]:
f2 = os.path.join(HOME, 'src', 'frames')
frames2 = os.listdir(f2)
frames2 = sorted([f for f in frames2 if not os.path.basename(f).startswith('0')], key=lambda x: int(x[:-4]))

images = [os.path.join(f2, frames2[i]) for i in range(1, 5)]

In [None]:

preds = get_caption(images, False)

In [None]:
import pickle
# stop
results = [get_result_image(os.path.join(f, frames[i])) for i in range(0, 40, 2)]
# let's take 20 images

In [None]:
captions, preds = list(map(list, zip(*results)))
# let's save the results
# with open( "captions.p", "wb" ) as f:
# 	pickle.dump(captions, f)

# with open( "preds.p", "wb" ) as f:
# 	pickle.dump(preds, f)


In [None]:
from collections import Counter
import numpy as np

def char_names_predictions(face_predictions: list[list[list[str, np.array]]]):
    # first extract only the character names as needed
    char_names  = [[l[0] for l in p] for p in face_predictions]
    return char_names


In [None]:

def build_captions_class_matrix(captions: list[list[str]], face_predictions: list[list[str]]):
    class_np_counter = Counter() # to save the frequency of each np with each class
    np_components = set()
    np_class_counter = {} # to save which classes each np was seen with
    for cap_list, pred_list in captions, face_predictions:
        if len(cap_list) == len(pred_list):
            # iterate through both captions and predictions and
            for cap, pred in zip(cap_list, pred_list):
                # first add each part of the caption to the np_components
                np_components.update(np_components.split(" "))
                if pred not in class_np_counter:
                    class_np_counter[pred] = Counter() # a dictionary for each of the classes        
                # increase the frequency of each term in the caption, in the dictionary associated with the class
                class_np_counter[pred].update(dict([(word, 1) for word in cap.split(" ")]))
                # add the class to the occurences of each of the np in the captions
                for np in cap.split(" "):
                    if np not in np_class_counter:
                        np_class_counter[np] = set() 
                    np_class_counter[np].add(pred)
            
    return class_np_counter, np_class_counter, np_components


In [None]:


def find_decided_captions(captions: list[str], face_predictions: list[list[str]]):
    # this function will just return the captions and face_predictions with length 1
    temp_list =  [(c[0], fp[0]) for c, fp in zip(captions, face_predictions) if len(c) == len(fp) == 1]
    # convert the list of tuples to 2 lists
    captions, predictions = list(map(list, zip(*temp_list)))
    # build a counter to map each class to its decided captions
    decisive_class_np_counter = Counter()
    for c, pred in zip(captions, predictions):
        if pred not in decisive_class_np_counter:
            decisive_class_np_counter[pred] = Counter()
        decisive_class_np_counter[pred].update(dict([(w, 1) for w in c.split(" ")]))
    return decisive_class_np_counter


In [None]:

def np_class_score(np: str, class_prediction: str, class_np_counter: Counter, np_class_counter: Counter, decided_class_np: Counter):
    def word_class_score(word: str):
        # first let's build the numerator:1 + the number of occurences of the word with the class pred + the number of times it was seen in a decisive prediction
        numerator = 1 + (class_np_counter[class_prediction][word] if word in class_np_counter[class_prediction] else 0) + \
                        (decided_class_np[class_prediction][word] if word in decided_class_np else 0)
        # the denominator: the number of unique classes the word was associated with + 1 
        denominator = 1 + len(np_class_counter[word]) 

        return np.log(numerator) - np.log(denominator) + 1
    
    return np.mean([word_class_score(w) for w in np.split(" ")])

    

In [None]:
import pandas as pd

def map_np_char_name(nps: list[str], face_predictions: list[str], class_np_counter: Counter, np_class_counter: Counter, decided_class_np:Counter):
    # create a dataframe to save the score of each noun phrase with the class predicted
    np_scores = pd.DataFrame(data=[], index=nps, columns=[face_predictions])
    for np in nps:
        for face_pred in face_predictions:
            np_scores.at[np, face_pred] = np_class_score(np, face_pred, class_np_counter, np_class_counter, decided_class_np)

    mapping = {}
    while not np_scores.empty:
            
        best_res, best_index, best_face_pred = -10, None, None
        for face_pred in face_predictions:
            index_value = np_scores[[face_pred]].idmax().iloc[0]
            if np_scores[index_value, face_pred] > best_res:
                best_index = index_value
                best_face_pred = face_pred
        # map the best_face_pred to the best_index
        mapping[best_face_pred] = best_index
        # remove the index and the face from np_scores
        np_scores.drop(columns=best_face_pred, inplace=True)
        np_scores.drop(index=best_index, inplace=True)

    return mapping    

In [None]:
def replace_with_char_names(captions: list[str], face_predictions: list[list[list[str, np.array]]]):
    # first off extract plain names 
    face_predictions = char_names_predictions(face_predictions)
    # extract the nps from the captions
    nps = extract_NP_text(". ".join(captions))
    # first extract the captions as plain text
    plain_text_nps = [convert_to_text(t, filter=False) for t in nps]
    # extract the filtered version of each caption
    filtered_text_nps = [convert_to_text(t, filter=True) for t in nps]
    # now we have the captions and the predictions ready 
    # time to build the matrix
    class_np_counter, np_class_counter, _ = build_captions_class_matrix(filtered_text_nps, face_predictions)
    # find the decided captions
    decisive_class_np_counter = find_decided_captions(filtered_text_nps, face_predictions)
    # iterate through each of the predictions and captions
    final_captions = []
    for np_list_index, (np_list, pred_list) in enumerate(zip(filtered_text_nps, face_predictions)):
        if len(np_list) != len(pred_list):
            final_captions.append(captions[np_list_index])
        else:
            original_plain_text = plain_text_nps[np_list_index].copy()
            mapping = map_np_char_name(np_list, pred_list, class_np_counter, np_class_counter, decisive_class_np_counter)
            for np, face_pred in zip(mapping):
                np_index = np_list.index(np)
                # extract the original sentence to which the filtered belongs
                original_plain_text[np_index] = face_pred
            final_captions.append(original_plain_text)

    return final_captions

In [None]:
f2 = os.path.join(HOME, 'frames')
frames2 = os.listdir(f2)
frames2 = sorted([f for f in frames2 if not os.path.basename(f).startswith('0')], key=lambda x: int(x[:-4]))

In [None]:
import pickle
results = [get_result_image(os.path.join(f2, frames2[i])) for i in range(1, 25)]