## Load data

In [1]:
import pymysql
from sqlalchemy import create_engine

conn_str = 'mysql+pymysql://fnbrasil@localhost:3307/fnbr_db'

engine = create_engine(conn_str, pool_recycle=3600)

In [2]:
import pandas as pd

df_frm = pd.read_sql('''
    select f.idFrame, e.name, e.description
    from frame f
    join entry e on e.entry = f.entry
    where idLanguage = 2;
''', engine).set_index("idFrame", drop=False)

In [3]:
df_fe = pd.read_sql('''
    select fe.idFrameElement, fe.coreType, e.name, e.description, fe.idFrame
    from frameelement fe
    join entry e on e.entry = fe.entry
    where idLanguage = 2;
''', engine).set_index("idFrameElement", drop=False)

In [4]:
df_lu = pd.read_sql('''
    select lu.idLU, lu.name, lu.senseDescription, lu.idFrame 
    from lu
    join lemma le on le.idLemma = lu.idLemma 
    where idLanguage = 2;
''', engine).set_index("idLU", drop=False)

In [5]:
df_rel = pd.read_sql('''
    select r.entry, f1.idFrame as 'idFrame1', f2.idFrame as 'idFrame2'
    from entityrelation e
    join relationtype r on r.idRelationType = e.idRelationType  
    join frame f1 on e.idEntity1 = f1.`idEntity` 
    join frame f2 on e.idEntity2 = f2.`idEntity`;
''', engine)

In [6]:
import networkx as nx

df_inh = df_rel[df_rel["entry"] == "rel_inheritance"]

fn = nx.DiGraph()
fn.add_nodes_from(pd.concat([df_inh['idFrame1'], df_inh['idFrame2']]).unique())
fn.add_edges_from([(j, i) for i, j in zip(df_inh['idFrame1'], df_inh['idFrame2'])])

## Preprocess

In [7]:
# Remove "meta"
df_frm["description"] = df_frm["description"].str.replace('\\r\\n', '')
df_frm["description"] = df_frm["description"].str.replace('\<ex\>.*$', '', regex=True)
df_frm["description"] = df_frm["description"].str.replace('#(\w+)', r'\1', regex=True)
df_frm["description"] = df_frm["description"].str.strip()

df_fe["description"] = df_fe["description"].str.replace('\\r\\n', '')
df_fe["description"] = df_fe["description"].str.replace('\<ex\>.*$', '', regex=True)
df_fe["description"] = df_fe["description"].str.replace('#(\w+)', r'\1', regex=True)
df_fe["description"] = df_fe["description"].str.strip()

df_lu["senseDescription"] = df_lu["senseDescription"].str.replace('FN: ', '')
df_lu["senseDescription"] = df_lu["senseDescription"].str.strip()

# Other
df_lu["name"] = df_lu["name"].str.replace('_', ' ')
df_lu["name"] = df_lu["name"].str.replace('[', '(')
df_lu["name"] = df_lu["name"].str.replace(']', ')')

## Generate prompts

This are the utility functions used by the prompt generators

In [17]:
import math
from random import random
from collections import OrderedDict
from networkx import all_neighbors

POS = {
    "v": "verb",
    "a": "adjective",
    "n": "noun",
    "adv": "adverb",
    "prep": "preposition",
    "idio": "idiomatic expression"
}


def get_prompt_data(idFrame):
    frm = df_frm.loc[idFrame]
    core_fes = df_fe[(df_fe["idFrame"] == idFrame) & (df_fe["coreType"] == "cty_core")]
    unexp_fes = df_fe[(df_fe["idFrame"] == idFrame) & (df_fe["coreType"] == "cty_core-unexpressed")]
    lus = df_lu[df_lu["idFrame"] == idFrame]
    
    core_fes = core_fes[["name", "description"]].to_dict('records')
    unexp_fes = unexp_fes[["name", "description"]].to_dict('records')
    lus = lus[["name", "senseDescription"]].to_dict('records')
    
    return {
        "id": frm["idFrame"],
        "name": frm["name"],
        "description": frm["description"],
        "core_fes": core_fes,
        "unexp_fes": unexp_fes,
        "lus": lus
    }


def fmt_entity(text):
    return text.replace('_', ' ')

def fmt_definition(text):
    return text.strip()[:-1] if text.strip()[-1] == '.' else text

def to_comma(arr):
    if len(arr) == 1:
        return f'"{fmt_entity(arr[0])}"'
    
    if len(arr) == 2:
        return f'"{fmt_entity(arr[0])}" and "{fmt_entity(arr[-1])}"'
    
    other = ''.join([f', "{fmt_entity(w)}"' for w in arr[1:-1]])

    return f'"{fmt_entity(arr[0])}"{other} and "{fmt_entity(arr[-1])}"'


def pos_counts(lus):
    counts = OrderedDict()
    
    for lu in lus:
        pos = lu[1]
        if pos in counts:
            counts[pos] += 1
        else:
            counts[pos] = 1
    
    return counts


def pos_names(counts):
    pos_dict = POS.copy()
    
    for k, v in counts.items():
        if v > 1:
            # add plural
            pos_dict[k] += 's'
            
    return pos_dict


def fe_text(core_fes, unexp_fes = []):
    core_fes = [f["name"] for f in core_fes]
    unexp_fes = [f["name"] for f in unexp_fes]
    
    prompt = ''
    
    if len(core_fes) == 1:
        prompt += f' The core frame element in this frame is "{fmt_entity(core_fes[0])}".'
    else:
        prompt += f' Core frame elements in this frame are {to_comma(core_fes)}.'
    
    if len(unexp_fes) == 1:
        prompt += f' The core unexpressed frame element in this frame is "{fmt_entity(unexp_fes[0])}".'
    elif len(unexp_fes) > 1:
        prompt += f' Core unexpressed frame elements in this frame are {to_comma(unexp_fes)}.'
        
    return prompt


def lu_text(lus):
    lus = sorted((lu["name"].split('.') for lu in lus), key=lambda lu: lu[1])
    
    if len(lus) == 1:
        return f' This frame is evoked by the {POS[lus[0][1]]} "{lus[0][0]}"'
    else:
        pos = lus[0][1]
        pos_i = 1
        counts = pos_counts(lus)
        names = pos_names(counts)
        order = list(counts.keys())
        text = f' Words evoking this frame are the {names[pos]} "{lus[0][0]}"'
        
        for i, lu in enumerate(lus[1:]):
            if lu[1] == pos:
                pos_i += 1
                last = i+2 == len(lus) or pos_i == counts[pos]
                sep = ' and' if last else ','
                text += f'{sep} "{lu[0]}"'
            else:
                pos = lu[1]
                pos_i = 0
                sep = ' and' if pos == order[-1] else ','
                text += f'{sep} the {names[pos]} "{lu[0]}"'
        
        return text + '.'


def random_child_frm(anchor, graph):
    # Get "neighbors" with the correct direction
    options = [n for n in all_neighbors(graph, anchor) if graph.has_edge(n, anchor)]
    return options[math.floor(random() * len(options))]

In [18]:
# def prompt_create_from_example(data):
#     # FE names
#     fes = [f["name"] for f in data["core_fes"]]
    
#     if len(fes) == 1:
#         prompt = f'The semantic frame for "{data["name"]}" has the "{fes[0]}" core element.'
#     else:
#         prompt = f'The semantic frame for "{data["name"]}" has the following core elements: {to_comma(fes)}.'
    
#     # LUs
#     prompt += lu_text(data["lus"])
        
#     # FE definitions
#     for fe in data["core_fes"]:
#         prompt += f' {fmt_definition(fe["description"])}'
    
#     # Request part
#     prompt += f' Please provide the core frame elements for the "{data["name"]}" frame and the words used to evoke this frame.'
    
#     return prompt


def prompt_create_from_lus(data, new_lus):
    # Frame definition
    prompt = f'The semantic frame for "{fmt_entity(data["name"])}" is defined as follows:'
    prompt += f' "{fmt_definition(data["description"])}".'
    
    # FEs
    fes = [f["name"] for f in data["core_fes"]]
    
    if len(fes) == 1:
        prompt += f' The semantic frame for "{fmt_entity(data["name"])}" has one core frame element:'
        prompt += f' "{fmt_entity(fes[0])}".'
    else:
        prompt += f' The semantic frame for "{fmt_entity(data["name"])}" has {len(fes)} core elements:'
        prompt += f' {to_comma(fes)}.'

    for fe in data["core_fes"]:
        prompt += f' The definition of the "{fmt_entity(fe["name"])}" frame element is as follows:'
        prompt += f' "{fmt_definition(fe["description"])}".'

    # LUs
    prompt += lu_text(data["lus"])
    
    # Request part
    prompt += f' First, propose a semantic frame evoked by words such as {to_comma(new_lus)}.'
    prompt += f' Second, please propose semantic frames for other kinds of "{fmt_entity(data["name"])}".'
    prompt += ' Present them as table in which columns are "Frame Name", "Frame Definition", "Frame Elements", "Frame Element Definition" and "Words evoking the frame".'
        
    return prompt


def prompt_create_from_inheritance(data, child_data):
    # Frame definition
    prompt = f'There is a semantic frame for "{fmt_entity(data["name"])}", whose definition is as follows:'
    prompt += f' "{fmt_definition(data["description"])}".'
    
    # FEs
    prompt += fe_text(data["core_fes"], data["unexp_fes"])
    
    # Child frame
    prompt += f' The "{fmt_entity(child_data["name"])}" frame inherits the "{fmt_entity(data["name"])}" frame.'
    
    # Child FEs
    prompt += fe_text(data["core_fes"])
    
    # Child LUs
    prompt += lu_text(child_data["lus"])
    
    # Request part
    prompt += f' Please propose other semantic frames inheriting the "{fmt_entity(data["name"])}" frame.'
    prompt += ' Present them as a table in which columns are "Frame Name", "Frame Definition", "Frame Elements", "Frame Element Definition" and "Words evoking the frame".'
        
    return prompt


In [19]:
df_frm[df_frm["name"].str.contains("Avoidin")]

Unnamed: 0_level_0,idFrame,name,description
idFrame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
249,249,Avoiding,An Agent avoids an Undesirable_situation under...


In [20]:
# print(prompt_create_from_example(get_prompt_data(155)))
# print("==================================")
print(prompt_create_from_lus(get_prompt_data(226), ["god", "saint", "deity", "goddess"]))
print("==================================")
# 503
print(prompt_create_from_inheritance(get_prompt_data(178), get_prompt_data(random_child_frm(178, fn))))

The semantic frame for "Entity" is defined as follows: "This frame is for words that denote highly schematic entities". The semantic frame for "Entity" has one core frame element: "Entity". The definition of the "Entity" frame element is as follows: "A thing (either abstract or physical) that exists with some degree of permanence". Words evoking this frame are the adverb "anything" and the nouns "item", "entity", "object", "thing", "individual", "what" and "material". First, propose a semantic frame evoked by words such as "god", "saint", "deity" and "goddess". Second, please propose semantic frames for other kinds of "Entity". Present them as table in which columns are "Frame Name", "Frame Definition", "Frame Elements", "Frame Element Definition" and "Words evoking the frame".
There is a semantic frame for "Intentionally act", whose definition is as follows: "This is an abstract frame for acts performed by sentient beings". The core frame element in this frame is "Agent". The core une

In [None]:
print(prompt_create_from_inheritance(get_prompt_data(178), get_prompt_data(249)))