In [1]:
#| default_exp demo

# Demo

## Visualization Code

In [2]:
#| export
import spacy
import numpy as np
from spacy import displacy
from typing import List, Dict
import matplotlib.pyplot as plt
import gradio as gr


def index_to_hex_color(idx: int, total: int, cmap: str ="viridis"):
    """
    Get RGB values from the colormap based on normalized index

    Parameters:
        idx (int): Index of the color
        total (int): Total number of colors
        cmap (str): Colormap name

    Returns:
        hex_color (str): Hex color code

    """
    normalized_idx = idx / float(total - 1)
    r, g, b, _ = plt.get_cmap(cmap)(normalized_idx)
    hex_color = "#{:02x}{:02x}{:02x}".format(int(255 * r), int(255 * g), int(255 * b))

    return hex_color

def flatten_matches(target_items, response_units, match_matrix):

    matched_target_items = []
    matched_response_units = []
    for idx, row in enumerate(match_matrix):
        if np.any(row):
            matched_target_items.append(target_items[idx])
            matched_response_units.append(response_units[np.argmax(row)])

    return matched_target_items, matched_response_units

def render_matched_response_spans(
        response: str, target_items, response_units, match_matrix, title):
    """
    Renders matched response units as spans in a response.

    Returns:
        html (str): HTML code for rendering the response with matched response units as spans.
    """

    matched_target_items, matched_response_units = flatten_matches(target_items, response_units, match_matrix)

    # create spacy span objects for each matched response unit
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(response)

    if len(matched_response_units) == 0:
        return displacy.render(doc, style="ent")
    
    spans = []
    for idx, unit in enumerate(matched_response_units):
        if type(matched_target_items[idx]) is dict:
            span_label = matched_target_items[idx]["text"]
        else:
            span_label = matched_target_items[idx]

        spans.append(doc.char_span(
            start_idx=unit["spans"][0][0], end_idx=unit["spans"][0][1], 
            label=span_label))
        if spans[-1] is None:
            raise ValueError(f"Could not create span for {unit['spans'][0]}")

    # Generate colors based on the colormap
    colors = {span.label_: index_to_hex_color(idx, len(spans)) for idx, span in enumerate(spans)}
    doc.spans["sc"] = spans
    doc.user_data["title"] = title

    return displacy.render(doc, style="span", page=True, options={"colors": colors})

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import os
from response_sequencer.datasets import SensesDataset
from response_sequencer.segmenting import MultiSentenceFragmentsSegmenter, SimpleSentenceSegmenter, SentenceSegmenter
from response_sequencer.scoring import SentenceTransformerScorer, CrossEncoderScorer, ContextualizedEmbeddingScorer
from response_sequencer.matching import MaximumScoreMatcher, OptimalSingleAssignmentMatcher
from response_sequencer.sequencing import PipelineSequencer

In [4]:
section_tag = 'base' # unique identifier for this variation of notebook parameters
output_dir = '../data/'

dataset = SensesDataset(
    os.path.join(output_dir, f'{section_tag}_senses.h5'), 
    os.path.join(output_dir, f'{section_tag}_sense_pool.txt'))

segmenter = MultiSentenceFragmentsSegmenter(max_sentences=2, min_tokens=2)
# scorer = SentenceTransformerScorer("all-mpnet-base-v2")
scorer = ContextualizedEmbeddingScorer(
    model_name='sentence-transformers/all-mpnet-base-v2', layer_depth=1)
matcher = OptimalSingleAssignmentMatcher(scorer)
sequencer = PipelineSequencer(segmenter, matcher)


In [5]:
def render_human_sense_annotations_for_trial(trial_index):
    trial_result = dataset.__getitem__(trial_index)
    return render_matched_response_spans(
        trial_result['response_transcript'], trial_result['target_items'], 
        trial_result['response_units'], trial_result['matches'], 
        title="Human Annotation")

def render_machine_sense_annotations_for_trial(trial_index):
    trial_result = dataset.__getitem__(trial_index)

    matched_target_items, _ = flatten_matches(
        trial_result['target_items'], trial_result['response_units'], trial_result['matches'])
    
    result = sequencer(trial_result['response_transcript'], matched_target_items)

    return render_matched_response_spans(
        trial_result['response_transcript'], matched_target_items,
        result['response_units'], result['matches'], 
        title="Machine Annotation")

def render_response_transcript(trial_index):
    trial_result = dataset.__getitem__(trial_index)
    return trial_result['response_transcript']

In [6]:
trial_index = 1500

display(dataset[trial_index]['response_transcript'])
render_human_sense_annotations_for_trial(trial_index)
render_machine_sense_annotations_for_trial(trial_index)

"Grill. Okay. Um, think of the literal grill, uh, something that you might cook your food on. It's like a positive association with your friends and family and gathering for the summer. And, um, then in direct opposition, I think of someone that is like the phrasing of like you're in my grill. Like my space is being invaded, please step away. Um, that is, is definitely more of a, like a negative association. And then I would think grill of a car, um, so in front of a car and, um, grilling in terms of like questioning, like I'm really gonna grill someone, I'm gonna ask them a lot of questions. Um, so which feels kind of similar to like being in your grill, if, if you were a car and you had a grill, I guess that's maybe what this, what that phrasing is from. I don't know. Um, but, uh, but yeah, like say like, you're really gonna be coming on strong with someone with an intent that to like, I don't know, get an answer from them."

In [7]:
import gradio as gr

In [8]:
#| slow
def senses_demo(share=False):

    with gr.Blocks() as demo:
        title = gr.Markdown("# Senses Dataset Demo\nSelect a trial and run to see how target units were matched to the response. If a run doesn't produce an output, there may be no transcript for this trial.")
        slider = gr.Slider(minimum=0, maximum=len(dataset), step=1, label="Trial Index")
        out0 = gr.Markdown()
        with gr.Row():
            out1 = gr.HTML()
            out2 = gr.HTML()
        btn = gr.Button("Run")
        btn.click(fn=render_human_sense_annotations_for_trial, inputs=slider, outputs=out1)
        btn.click(fn=render_machine_sense_annotations_for_trial, inputs=slider, outputs=out2)
        btn.click(fn=render_response_transcript, inputs=slider, outputs=out0)

    demo.launch(share=share)

senses_demo(share=True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://24af3fabccc3219f4f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


## Narrative Recall Demo

In [None]:
import os
from response_sequencer.datasets import SBS_NarrativeDataset
from response_sequencer.segmenting import MultiSentenceFragmentsSegmenter, SimpleSentenceSegmenter, SentenceSegmenter
from response_sequencer.scoring import SentenceTransformerScorer, CrossEncoderScorer, ContextualizedEmbeddingScorer
from response_sequencer.matching import MaximumScoreMatcher, OptimalSingleAssignmentMatcher
from response_sequencer.sequencing import PipelineSequencer

In [None]:
data_directory = 'C:/Users/gunnj/compmempy/data/narrative'
dataset = SBS_NarrativeDataset(data_directory)

segmenter = MultiSentenceFragmentsSegmenter(max_sentences=1, min_tokens=2)
# scorer = SentenceTransformerScorer("all-mpnet-base-v2")
scorer = ContextualizedEmbeddingScorer(
    model_name='sentence-transformers/all-mpnet-base-v2', layer_depth=1)
matcher = MaximumScoreMatcher(scorer)
sequencer = PipelineSequencer(segmenter, matcher)


In [None]:
def render_human_sbs_narrative_annotations_for_trial(trial_index):
    trial_result = dataset.__getitem__(trial_index)
    return render_matched_response_spans(
        trial_result['response_transcript'], trial_result['target_items'], 
        trial_result['response_units'], trial_result['matches'], 
        title="Human Annotation")

def render_machine_sbs_narrative_annotations_for_trial(trial_index):
    trial_result = dataset.__getitem__(trial_index)

    matched_target_items, _ = flatten_matches(
        trial_result['target_items'], trial_result['response_units'], trial_result['matches'])
    
    result = sequencer(trial_result['response_transcript'], matched_target_items, trial_result['target_context'])

    return render_matched_response_spans(
        trial_result['response_transcript'], matched_target_items,
        result['response_units'], result['matches'], 
        title="Machine Annotation")

def render_response_transcript(trial_index):
    trial_result = dataset.__getitem__(trial_index)
    return trial_result['response_transcript']

In [None]:
trial_index = 0

display(dataset[trial_index]['response_transcript'])
render_human_sbs_narrative_annotations_for_trial(trial_index)
render_machine_sbs_narrative_annotations_for_trial(trial_index)

'It was a hot, sunny day and Rachel and her friend Kaylie decided it would be a good day to go to the beach.  They headed out to Old Orchard beach in Maine.  The sun looked like a hot, yellow pellet in the sky.   There were people laying under beach umbrellas wearing wide brimmed hats. Rachael decided that she would like to lay down in the sun before going swimming.  Kaylie dedcided to go for a swim.  A little girl in a light purple bathing suit with two floaties darted out in front of her followed closely by a boy in green shorts.  Kaylie smiled to herself remembering long past beach days with her brothers.  She saw 4 teenagers flying paragliders.  She thought about this as she walked over to lay down next to Rachel, thinking about what a perfect day this was turning out to be.'

In [None]:
#| slow

def narratives_demo(share=False):

    with gr.Blocks() as demo:
        title = gr.Markdown("# SBS Narrative Dataset Demo\nSelect a trial and run to see how target units were matched to the response. If a run doesn't produce an output, there may be no transcript for this trial.")
        slider = gr.Slider(minimum=0, maximum=len(dataset), step=1, label="Trial Index")
        out0 = gr.Markdown()
        with gr.Row():
            out1 = gr.HTML()
            out2 = gr.HTML()
        btn = gr.Button("Run")
        btn.click(fn=render_human_sbs_narrative_annotations_for_trial, inputs=slider, outputs=out1)
        btn.click(fn=render_machine_sbs_narrative_annotations_for_trial, inputs=slider, outputs=out2)
        btn.click(fn=render_response_transcript, inputs=slider, outputs=out0)

    demo.launch(share=share)

narratives_demo(True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://170e746d67e43c918e.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()