In [25]:
import pandas as pd

df = pd.read_csv("FNBr-cxns-ces.csv")

Filter out portuguese names and definitions for English constructions

In [26]:
df = df[(df["cxnLanguage"] == 1) | (df["txtLanguage"] == 2)]

In [70]:
def get_with_translation(row, col):
    col_pt = col + "_pt"
    
    if isinstance(row[col_pt], str):
        return f"{row[col]} (in portuguese: {row[col_pt]})"
    else:
        return row[col]

prompts = []

for idx, group in df.groupby(["cxnLanguage", "idConstruction"]):
    language = "Brazilian Portuguese" if idx[0] == 1 else "English"

    pt_group = group[group["txtLanguage"] == 1]
    en_group = group[group["txtLanguage"] == 2]
    flat_group = en_group.merge(pt_group, on=["idConstruction", "idConstructionElement"],
                                how="left", suffixes=("", "_pt"))

    cxnName = get_with_translation(flat_group.iloc[0], "cxnName")
    cxnDef = get_with_translation(flat_group.iloc[0], "cxnDefinition")

    text = f"""In the context of the theory of Construction Grammar and of the {language} language, the {cxnName} construction can be defined as such: "{cxnDef}".
The {len(flat_group)} construction elements (CEs) of this construction are:\n"""

    for _, row in flat_group.iterrows():
        ceName = row["ceName"]
        ceDef = row["ceDefinition"]
        text += f"- {ceName}: {ceDef}"

        if isinstance(row["ceName_pt"], str):
            ceNamePt = row["ceName_pt"]
            ceDefPt = row["ceDefinition_pt"]
            text += f" ({ceNamePt}: {ceDefPt})\n"
        else:
            text += "\n"

    prompts.append(text)

In [77]:
print("Number of prompts:" , len(prompts))
print("e.g.:\n", prompts[187])

Number of prompts: 230
e.g.:
 In the context of the theory of Construction Grammar and of the Brazilian Portuguese language, the verb_modification (in portuguese: Auxiliação) construction can be defined as such: "verb_modification (in portuguese: Construção genérica que congrega as construções de tempo, modo, modalidade e aspecto.)".
The 2 construction elements (CEs) of this construction are:
- Verbo: verb_modification_verb (Verbo: Verbo principal.)
- Auxiliar: verb_modification_modifier (Auxiliar: Auxiliar indicativo de tempo, modo, modalidade ou aspecto.)



In [82]:
import json

with open("constructicon-prompts.json", "w") as fp:
    json.dump(prompts, fp)