# Formatting Annotations into Test Data Format  

This script is to automatically format the transriptions with their annotations into the proper format that RASA accepts as training data.

## Importing Completed Transcripts

In [46]:
# Array of names of transcripts with the "Complete" tag
completedTranscripts = []

# Going through all 115 transcript files
for i in range(1, 116):
    # Opening transcript files
    fileName = "transcript_" + str(i) + ".txt"
    file = open("./data/transcripts/" + fileName, "r")

    # Parsing away the name
    file.readline()

    # Checking for "Complete" tag and appending file name if completed
    complete = file.readline()[8:16]
    if (complete == "Complete"):
        completedTranscripts.append(fileName)

    # Closing files
    file.close()

# Sanity Check
print(completedTranscripts)

['transcript_10.txt', 'transcript_22.txt', 'transcript_44.txt', 'transcript_56.txt']


## Filtering and Sorting Different Annotations  

**Possible Annotations:**
- _[question]_
- _[options]_
- _[accept-answer]_
- [offer-answer()]
- [offer-to-answer]
- [check-answer]
- [agreement]
- [ask-agreement]
- [chit-chat]
- [final-answer()]
- [confirm-agreement]
- [confirm-final-answer()]
- [reject-option()]
- [reject-option-agreement()]

_Italic_ are host annotations only

In [47]:
# Without Input
question = ["question"]
options = ["options"]
accept_answer = ["accept-answer"]
offer_to_answer = ["offer-to-answer"]
check_answer = ["check-answer"]
agreement = ["agreement"]
ask_agreement = ["ask-agreement"]
chit_chat = ["chit-chat"]
confirm_agreement = ["confirm-agreement"]

# With Input
offer_answer = ["offer-answer"]
final_answer = ["final-answer"]
confirm_final_answer = ["confirm-final-answer"]
reject_option = ["reject-option"]
reject_option_agreement = ["reject-option-agreement"]

for fileName in completedTranscripts:
    file = open("./data/transcripts/" + fileName, "r")
    for i in range(0, 15):
        file.readline()

    for line in file:
        if (line[0] == 'S'):
            line = line[3:]
        elif (line[0] == 'U'):
            line = line[4:]

        annotation = line[line.find(
            '['):line.find(']') + 1].replace(" ", "")[1:-1]

        if (annotation == "question"):
            question.append(line[:line.find("[") - 1])
        elif (annotation == "options"):
            options.append(line[:line.find("[") - 1])
        elif (annotation == "offer-to-answer"):
            offer_to_answer.append(line[:line.find("[") - 1])
        elif (annotation == "check-answer"):
            check_answer.append(line[:line.find("[") - 1])
        elif (annotation == "agreement"):
            agreement.append(line[:line.find("[") - 1])
        elif (annotation == "ask-agreement"):
            ask_agreement.append(line[:line.find("[") - 1])
        elif (annotation == "chit-chat"):
            chit_chat.append(line[:line.find("[") - 1])
        elif (annotation == "accept-answer"):
            accept_answer.append(line[:line.find("[") - 1])
        elif (annotation == "confirm-agreement"):
            confirm_agreement.append(line[:line.find("[") - 1])
        else:
            annotationName = annotation[:annotation.find('(')]
            if (annotationName == "offer-answer"):
                offer_answer.append([line[:line.find(
                    "[") - 1], annotation[annotation.find('(') + 1:annotation.find(')')]])
            elif (annotationName == "final-answer"):
                final_answer.append([line[:line.find(
                    "[") - 1], annotation[annotation.find('(') + 1:annotation.find(')')]])
            elif (annotationName == "confirm-final-answer"):
                confirm_final_answer.append([line[:line.find(
                    "[") - 1], annotation[annotation.find('(') + 1:annotation.find(')')]])
            elif (annotationName == "reject-option"):
                reject_option.append([line[:line.find(
                    "[") - 1], annotation[annotation.find('(') + 1:annotation.find(')')]])
            elif (annotationName == "reject-option-agreement"):
                reject_option_agreement.append([line[:line.find(
                    "[") - 1], annotation[annotation.find('(') + 1:annotation.find(')')]])

    file.close()


## Writing the Annotation Examples into a YAML File

In [48]:
trainingDataNoInput = [
    offer_to_answer,
    check_answer,
    agreement,
    ask_agreement,
    chit_chat,
    confirm_agreement
]

trainingDataInput = [
    offer_answer,
    final_answer,
    confirm_final_answer,
    reject_option,
    reject_option_agreement
]

file = open("./RASA_Training_Data/nlu.txt", "w")
file.write("version: \"3.1\"\n\nnlu:\n")

for annotation in trainingDataNoInput:
    file.write("- intent: " + annotation.pop(0) + "\n  examples: |\n")
    for example in annotation:
        file.write("    - " + example + "\n")
    file.write("\n")

for annotation in trainingDataInput:
    file.write("- intent: " + annotation.pop(0) + "\n  examples: |\n")
    for example in annotation:
        example[0].replace(example[1], "[" + example[1] + "]")
        file.write("    - " + example[0] + "\n")
    file.write("\n")

file.close()
