In [2]:
import pandas as pd

In [3]:
def convert_all_to_string(data):
    """
    Recursively convert all data in a dictionary or list to string.
    """
    if isinstance(data, dict):
        return {key: convert_all_to_string(value) for key, value in data.items()}
    elif isinstance(data, list):
        return [convert_all_to_string(item) for item in data]
    else:
        return str(data)

In [336]:
#load the xls file without containing any links
df = pd.read_excel('testing_excel.xlsx', sheet_name='Greeting_Goodbyes')

In [179]:
df = df.replace(u'\xa0', u' ', regex=True)

In [180]:
df

Unnamed: 0,Greetings,Voicetype,Goodbyes
0,"""Hmm?""",Any,"""Until next time."""
1,"""Need something?""",Any,"""Hmm hmm."""
2,"""Yes?""",Any,"""All right, then."""
3,"""Ours is to smile at your passing, friend.(F)""",MaleArgonian,
4,"""It fills us with rage. What harm does it brin...",MaleArgonian,
...,...,...,...
155,"""What can I do for you?""",FemaleYoungEager,
156,"""Oh, did you need something?""",MaleYoungEager,
157,"""Oh, did you need something?""",FemaleYoungEager,
158,"""It's a fine day with you around.(F)""",FemaleYoungEager,


In [181]:
#split the dataframe in two dataframes, one with the columns Greetings and Voicetype and the other with the columns Goodbye and Voicetype
Greetings = df[['Greetings', 'Voicetype']]

#Drop the rows where Greetings is NaN
Greetings = Greetings.dropna(subset=['Greetings'])

Goodbyes = df[['Goodbyes', 'Voicetype']]

#drop the rows where goodbyes is NaN
Goodbyes = Goodbyes.dropna(subset=['Goodbyes'])

In [182]:
Greetings

Unnamed: 0,Greetings,Voicetype
0,"""Hmm?""",Any
1,"""Need something?""",Any
2,"""Yes?""",Any
3,"""Ours is to smile at your passing, friend.(F)""",MaleArgonian
4,"""It fills us with rage. What harm does it brin...",MaleArgonian
...,...,...
155,"""What can I do for you?""",FemaleYoungEager
156,"""Oh, did you need something?""",MaleYoungEager
157,"""Oh, did you need something?""",FemaleYoungEager
158,"""It's a fine day with you around.(F)""",FemaleYoungEager


In [183]:
Goodbyes

Unnamed: 0,Goodbyes,Voicetype
0,"""Until next time.""",Any
1,"""Hmm hmm.""",Any
2,"""All right, then.""",Any
21,"""I'll take my leave, then.""",MaleCommander
22,"""I'll take my leave, then.""",MaleNordCommander
30,"""I'll take my leave, then.""",FemaleCommander
42,"""Now if you'll excuse me...""",MaleCondescending
45,Now if you'll excuse me...,FemaleCondescending
60,"""Safe travels.""",MaleDarkElf
63,"""Safe travels.""",FemaleDarkElf


In [184]:
#create an new column in the greetings dataframe called Tone 
Greetings['Tone'] = 'Neutral'

# check the rows of the dataframe to see if there is a (H) or a (F) in the rows and change the tone to Happy or Formal
for index, row in Greetings.iterrows():
    if '(H)' in row['Greetings']:
        Greetings.at[index, 'Tone'] = 'Hostile'
        #remove the (H) from the row
        Greetings.at[index, 'Greetings'] = Greetings.at[index, 'Greetings'].replace('(H)', '')
    elif '(F)' in row['Greetings']:
        Greetings.at[index, 'Tone'] = 'Friendly'
        #remove the (F) from the row
        Greetings.at[index, 'Greetings'] = Greetings.at[index, 'Greetings'].replace('(F)', '')
    

In [185]:
#remove the (H) and (F) from the rows of the Greetings column
Greetings['Greetings'] = Greetings['Greetings'].str.replace(r"\(H\)", "")

In [186]:
Greetings

Unnamed: 0,Greetings,Voicetype,Tone
0,"""Hmm?""",Any,Neutral
1,"""Need something?""",Any,Neutral
2,"""Yes?""",Any,Neutral
3,"""Ours is to smile at your passing, friend.""",MaleArgonian,Friendly
4,"""It fills us with rage. What harm does it brin...",MaleArgonian,Hostile
...,...,...,...
155,"""What can I do for you?""",FemaleYoungEager,Neutral
156,"""Oh, did you need something?""",MaleYoungEager,Neutral
157,"""Oh, did you need something?""",FemaleYoungEager,Neutral
158,"""It's a fine day with you around.""",FemaleYoungEager,Friendly


In [187]:
#check if there is a null in the voicetype column 
Greetings['Voicetype'].isnull().values.any()

False

In [188]:
import json

# create a dictionary with the structure above
Greetings_data = [
    {
    "Speaker": Greetings['Voicetype'][index],
    "Dialogue": Greetings['Greetings'][index],
    "Tone": Greetings['Tone'][index],
    "Event": "Greeting",
} for index, row in Greetings.iterrows()]

data = Greetings_data   


In [189]:
Greetings_data = convert_all_to_string(Greetings_data)

In [190]:
data_list = [Greetings_data]

In [191]:
#write the Greeetings_data to a json file in the testing_for_error folder
with open('testing_for_error/Greetings_data.json', 'w') as f:
    f.write(json.dumps(Greetings_data, indent=4))


In [192]:
#do the same with the Goodbyes dataframe
Goodbyes_data = [
    {
    "Speaker": Goodbyes['Voicetype'][index],
    "Dialogue": Goodbyes['Goodbyes'][index],
    "Event": "Goodbye",
} for index, row in Goodbyes.iterrows()]

data = data + Goodbyes_data

In [193]:
Goodbyes_data = convert_all_to_string(Goodbyes_data)

In [194]:
data_list.append(Goodbyes_data)

In [195]:
#write the Goodbyes_data to a json file in the testing_for_error folder
with open('testing_for_error/Goodbyes_data.json', 'w') as f:
    f.write(json.dumps(Goodbyes_data, indent=4))

In [196]:
#import the second sheet of the excel file to a dataframe
vampire = pd.read_excel('testing_excel.xlsx', sheet_name='Vampire')
vampire = vampire.replace(u'\xa0', u' ', regex=True)

In [197]:
#duplicate the rows
vampire = vampire.copy()

vampire= pd.concat([vampire]*2, ignore_index=True)

# Replace the first 8 values of the 'Voicetype' column with the first value
vampire.loc[0:7, 'Dialogue'] = vampire['Dialogue'][0]

# Replace the next 8 values of the 'Voicetype' column starting from the 8th index with the value at the 8th index
vampire.loc[8:15, 'Dialogue'] = vampire['Dialogue'][9]

In [198]:
vampire

Unnamed: 0,Voicetype,Dialogue
0,MaleCommoner,"""Don't like those eyes you got. There's a bad ..."
1,MaleCommonerAccented,"""Don't like those eyes you got. There's a bad ..."
2,FemaleCommoner,"""Don't like those eyes you got. There's a bad ..."
3,MaleEvenToned,"""Don't like those eyes you got. There's a bad ..."
4,MaleEvenTonedAccented,"""Don't like those eyes you got. There's a bad ..."
5,FemaleEvenToned,"""Don't like those eyes you got. There's a bad ..."
6,MaleNord,"""Don't like those eyes you got. There's a bad ..."
7,FemaleNord,"""Don't like those eyes you got. There's a bad ..."
8,MaleCommoner,"""Your skin's as pale as the snow. You scared o..."
9,MaleCommonerAccented,"""Your skin's as pale as the snow. You scared o..."


In [199]:
# add the vampire dataframe to the json file
Vampire_data = [
    {
    "Speaker": vampire['Voicetype'][index],
    "Dialogue": vampire['Dialogue'][index],
    "Event": "the Player being a vampire in front of an NPC",
} for index, row in vampire.iterrows()]

data = data + Vampire_data

In [200]:
Vampire_data = convert_all_to_string(Vampire_data)

In [201]:
data_list.append(Vampire_data)

In [202]:
#writing the vampire data to a json file on the testing_for_error folder
with open('testing_for_error/Vampire_data.json', 'w') as f:
    f.write(json.dumps(Vampire_data, indent=4))

In [203]:
werewolf = pd.read_excel('testing_excel.xlsx', sheet_name='Werewolf')
werewolf = werewolf.replace(u'\xa0', u' ', regex=True)

In [204]:
werewolf

Unnamed: 0,Voicetype,Dialogue
0,Guard,"""Werewolf!"""
1,Guard,"""It's a werewolf! To arms! To arms!"""
2,Guard,"""By the gods! It..it can't be!"""
3,MaleCommoner,"""Come on! You monster!"""
4,MaleCommonerAccented,"""Come on! You monster!"""
5,FemaleCommoner,"""Come on! You monster!"""
6,MaleEvenToned,"""Come on! You monster!"""
7,MaleEvenTonedAccented,"""Come on! You monster!"""
8,FemaleEvenToned,"""Come on! You monster!"""
9,MaleNord,"""Come on! You monster!"""


In [205]:
#add the werewolf dataframe to the json file
Werewolf_data = [
    {
    "Speaker": werewolf['Voicetype'][index],
    "Dialogue": werewolf['Dialogue'][index],
    "Event": "Player being a werewolf in front of an NPC",
    "Condition": "the Player having transformed into a werewolf"
} for index, row in werewolf.iterrows()]

data = data + Werewolf_data

In [206]:
Werewolf_data = convert_all_to_string(Werewolf_data)

In [207]:
data_list.append(Werewolf_data)

In [208]:
#write the werewolf data to a json file in the testing_for_error folder
with open('testing_for_error/Werewolf_data.json', 'w') as f:
    f.write(json.dumps(Werewolf_data, indent=4))

In [209]:
#import the Generic_Events sheet
generic_events = pd.read_excel('testing_excel.xlsx', sheet_name='Generic_Events')

In [210]:
generic_events = generic_events.replace(u'\xa0', u' ', regex=True)

In [211]:
generic_events

Unnamed: 0,Event,Dialogue
0,the an NPC witnessing a theft,"""You dirty thief!"""
1,the an NPC witnessing a theft,"""Stop, thief!"""
2,the Player trespassing,"""You're not supposed to be in here."""
3,the Player trespassing,"""Last warning. Leave, now."""
4,the Player trespassing,"""I'm not going to warn you again. Get out, or ..."
...,...,...
89,an NPC witnessing a murder,"""Guess they deserved it..."""
90,an NPC witnessing a murder,"""What's done is done."""
91,an NPC witnessing a murder,"""Well... that was an unpleasant bit of business."""
92,an NPC witnessing a murder,"""I didn't know you were so cold-blooded..."""


In [212]:
#add the generic_events dataframe to the json file
Generic_Events_data = [
    {
    "Speaker": "Any",
    "Dialogue": generic_events['Dialogue'][index],
    "Event": generic_events['Event'][index],
} for index, row in generic_events.iterrows()]

data = data + Generic_Events_data

In [213]:
Generic_Events_data = convert_all_to_string(Generic_Events_data)

In [214]:
data_list.append(Generic_Events_data)

In [215]:
#write the generic_events data to a json file in the testing_for_error folder
with open('testing_for_error/Generic_Events_data.json', 'w') as f:
    f.write(json.dumps(Generic_Events_data, indent=4))

In [216]:
# import the Flames_Girls sheet
flames_gifts= pd.read_excel('testing_excel.xlsx', sheet_name='Flames_Gifts')

In [217]:
flames_gifts = flames_gifts.replace(u'\xa0', u' ', regex=True)

In [218]:
#add the flames_gifts dataframe to the json file
Flames_Gifts_data = [
    {
    "Speaker": flames_gifts['Voicetype'][index],
    "Dialogue": flames_gifts['Dialogue'][index],
    "Event": flames_gifts['Event'][index],
    } for index, row in flames_gifts.iterrows()]

data = data + Flames_Gifts_data

In [219]:
Flames_Gifts_data = convert_all_to_string(Flames_Gifts_data)

In [220]:
data_list.append(Flames_Gifts_data)

In [221]:
#write the flames_gifts data to a json file in the testing_for_error folder
with open('testing_for_error/Flames_Gifts_data.json', 'w') as f:
    f.write(json.dumps(Flames_Gifts_data, indent=4))

In [222]:
#import the fighting_over_item sheet
fighting_over_item = pd.read_excel('testing_excel.xlsx', sheet_name='fighting_over_item')
fighting_over_item = fighting_over_item.replace(u'\xa0', u' ', regex=True)

In [223]:
fighting_over_item_data = [
    {
    "Speaker": fighting_over_item['Voicetype'][index],
    "Dialogue": fighting_over_item['Dialogue'][index],
    "Event": fighting_over_item['Event'][index],
    "Line_of_dialogue": fighting_over_item['Line_of_dialogue'][index],
    } for index, row in fighting_over_item.iterrows()]

data = data + fighting_over_item_data

In [224]:
fighting_over_item_data = convert_all_to_string(fighting_over_item_data)

In [225]:
data_list.append(fighting_over_item_data)

In [226]:
#write the fighting_over_item data to a json file in the testing_for_error folder
with open('testing_for_error/fighting_over_item_data.json', 'w') as f:
    f.write(json.dumps(fighting_over_item_data, indent=4))

In [227]:
#read the sheet return&bumping&naked
return_bumping_naked = pd.read_excel('testing_excel.xlsx', sheet_name='return&bumping&naked')
return_bumping_naked = return_bumping_naked.replace(u'\xa0', u' ', regex=True)

In [228]:
#add the return_bumping_naked dataframe to the json file
return_bumping_naked_data = [
    {
    "Speaker": return_bumping_naked['Voicetype'][index],
    "Dialogue": return_bumping_naked['Dialogue'][index],
    "Event": return_bumping_naked['Event'][index],
    } for index, row in return_bumping_naked.iterrows()]

data = data + return_bumping_naked_data

In [229]:
return_bumping_naked_data = convert_all_to_string(return_bumping_naked_data)

In [230]:
data_list.append(return_bumping_naked_data)

In [231]:
#write the return_bumping_naked data to a json file in the testing_for_error folder
with open('testing_for_error/return_bumping_naked_data.json', 'w') as f:
    f.write(json.dumps(return_bumping_naked_data, indent=4))

In [232]:
# read the sheet Asking_armor
asking_armor = pd.read_excel('testing_excel.xlsx', sheet_name='Asking_armor')
asking_armor = asking_armor.replace(u'\xa0', u' ', regex=True)

In [233]:
#add the asking_armor dataframe to the json file
asking_armor_data = [
    {
    "Speaker": asking_armor['Voicetype'][index],
    "Dialogue": asking_armor['Dialogue'][index],
    "Event": asking_armor['Event'][index],
    "Response_to": asking_armor['Response_to'][index]
    } for index, row in asking_armor.iterrows()]

data = data + asking_armor_data

In [234]:
asking_armor_data = convert_all_to_string(asking_armor_data)

In [235]:
data_list.append(asking_armor_data)

In [236]:
#write the asking_armor data to a json file in the testing_for_error folder
with open('testing_for_error/asking_armor_data.json', 'w') as f:
    f.write(json.dumps(asking_armor_data, indent=4))

In [237]:
#read the sheet Spells&robes
spells_robes = pd.read_excel('testing_excel.xlsx', sheet_name='Spells&robes')
spells_robes = spells_robes.replace(u'\xa0', u' ', regex=True)
#remove the \u2019 character from the dataframe
spells_robes = spells_robes.replace(u'\u2019', "'", regex=True)
#remove the \u00a0 character from the dataframe
spells_robes = spells_robes.replace(u'\u00a0', ' ', regex=True)

In [238]:
#add the spells_robes dataframe to the json file
spells_robes_data = [
    {
    "Speaker": spells_robes['Voicetype'][index],
    "Dialogue": spells_robes['Dialogue'][index],
    "Event": spells_robes['Event'][index],
    } for index, row in spells_robes.iterrows()]

data = data + spells_robes_data

In [239]:
spells_robes_data = convert_all_to_string(spells_robes_data)

In [240]:
data_list.append(spells_robes_data)

In [241]:
#write the spells_robes data to a json file in the testing_for_error folder
with open('testing_for_error/spells_robes_data.json', 'w') as f:
    f.write(json.dumps(spells_robes_data, indent=4))

In [242]:
#read the sheet Shouting&Corpses
shouting_corpses = pd.read_excel('testing_excel.xlsx', sheet_name='Shouting&Corpses')
shouting_corpses = shouting_corpses.replace(u'\xa0', u' ', regex=True)
shouting_corpses = shouting_corpses.replace(u'\u2019', "'", regex=True)
shouting_corpses = shouting_corpses.replace(u'\u00a0', ' ', regex=True)

In [243]:
#add the shouting_corpses dataframe to the json file
shouting_corpses_data = [
    {
    "Speaker": shouting_corpses['Voicetype'][index],
    "Dialogue": shouting_corpses['Dialogue'][index],
    "Event": shouting_corpses['Event'][index],
    } for index, row in shouting_corpses.iterrows()]

data = data + shouting_corpses_data

In [244]:
shouting_corpses_data = convert_all_to_string(shouting_corpses_data)

In [245]:
data_list.append(shouting_corpses_data)

In [246]:
#write the shouting_corpses data to a json file in the testing_for_error folder
with open('testing_for_error/shouting_corpses_data.json', 'w') as f:
    f.write(json.dumps(shouting_corpses_data, indent=4))

In [247]:
#add the sheet combat_dialogue to the json file
combat_dialogue = pd.read_excel('testing_excel.xlsx', sheet_name='combat_dialogue')
combat_dialogue = combat_dialogue.replace(u'\xa0', u' ', regex=True)
combat_dialogue = combat_dialogue.replace(u'\u2019', "'", regex=True)
combat_dialogue = combat_dialogue.replace(u'\u00a0', ' ', regex=True)

In [248]:
combat_dialogue_data = [
    {
    "Speaker": "Any",
    "Dialogue": combat_dialogue['Dialogue'][index],
    "Event": combat_dialogue['Event'][index],
    "Condition": combat_dialogue['Condition'][index]
    } for index, row in combat_dialogue.iterrows()]

data = data + combat_dialogue_data

In [249]:
combat_dialogue_data = convert_all_to_string(combat_dialogue_data)

In [250]:
data_list.append(combat_dialogue_data)

In [251]:
#writing the combat_dialogue data to a json file in the testing_for_error folder
with open('testing_for_error/combat_dialogue_data.json', 'w') as f:
    f.write(json.dumps(combat_dialogue_data, indent=4))

In [252]:
#add the sheet Persuation_check to the json file

persuasion_check = pd.read_excel('testing_excel.xlsx', sheet_name='Persuasion_check')
persuasion_check = persuasion_check.replace(u'\xa0', u' ', regex=True)
persuasion_check = persuasion_check.replace(u'\u2019', "'", regex=True)
persuasion_check = persuasion_check.replace(u'\u00a0', ' ', regex=True)

In [253]:
persuasion_check_data = [
    {
    "Speaker": "Any",
    "Dialogue": persuasion_check['Dialogue'][index],
    "Event": persuasion_check['Event'][index],
    "Result": persuasion_check['Result'][index],
    } for index, row in persuasion_check.iterrows()]

data = data + persuasion_check_data

In [254]:
persuasion_check_data = convert_all_to_string(persuasion_check_data)

In [255]:
data_list.append(persuasion_check_data)

In [256]:
#write the persuasion_check data to a json file in the testing_for_error folder
with open('testing_for_error/persuasion_check_data.json', 'w') as f:
    f.write(json.dumps(persuasion_check_data, indent=4))

In [257]:
#add the sheet entering_buying to the json file
entering_buying = pd.read_excel('testing_excel.xlsx', sheet_name='entering_buying')
entering_buying = entering_buying.replace(u'\xa0', u' ', regex=True)
entering_buying = entering_buying.replace(u'\u2019', "'", regex=True)
entering_buying = entering_buying.replace(u'\u00a0', ' ', regex=True)

In [258]:
entering_buying_data = [
    {
    "Speaker": entering_buying['Voicetype'][index],
    "Dialogue": entering_buying['Dialogue'][index],
    "Event": entering_buying['Event'][index],
    } for index, row in entering_buying.iterrows()]

data = data + entering_buying_data

In [259]:
entering_buying_data = convert_all_to_string(entering_buying_data)

In [260]:
data_list.append(entering_buying_data)

In [261]:
#write the entering_buying data to a json file in the testing_for_error folder
with open('testing_for_error/entering_buying_data.json', 'w') as f:
    f.write(json.dumps(entering_buying_data, indent=4))

In [262]:
# add the sheet request_training to the json file

In [263]:
request_training = pd.read_excel('testing_excel.xlsx', sheet_name='requesting_training')
request_training = request_training.replace(u'\xa0', u' ', regex=True)
request_training = request_training.replace(u'\u2019', "'", regex=True)
request_training = request_training.replace(u'\u00a0', ' ', regex=True)

In [264]:
request_training_data = [
    {
    "Speaker": request_training['Voicetype'][index],
    "Dialogue": request_training['Dialogue'][index],
    "Event": request_training['Event'][index],
    } for index, row in request_training.iterrows()]

data = data + request_training_data

In [265]:
request_training_data = convert_all_to_string(request_training_data)

In [266]:
data_list.append(request_training_data)

In [267]:
#write the request_training data to a json file in the testing_for_error folder
with open('testing_for_error/request_training_data.json', 'w') as f:
    f.write(json.dumps(request_training_data, indent=4))

In [268]:
#add the sheet marriage to the json file
marriage = pd.read_excel('testing_excel.xlsx', sheet_name='marriage')
marriage = marriage.replace(u'\xa0', u' ', regex=True)
marriage = marriage.replace(u'\u2019', "'", regex=True)
marriage = marriage.replace(u'\u00a0', ' ', regex=True)

In [269]:
marriage_data = [
    {
    "Speaker": marriage['Voicetype'][index],
    "Dialogue": marriage['Dialogue'][index],
    "Event": marriage['Event'][index],
    "Response_to": marriage['Response_to'][index]
    } for index, row in marriage.iterrows()]

In [270]:
marriage_data = convert_all_to_string(marriage_data)

In [271]:
data_list.append(marriage_data)

In [272]:
#write the marriage data to a json file in the testing_for_error folder
with open('testing_for_error/marriage_data.json', 'w') as f:
    f.write(json.dumps(marriage_data, indent=4))

In [273]:
data = data + marriage_data

In [274]:
#add the sheet wedding to the json file
wedding = pd.read_excel('testing_excel.xlsx', sheet_name='wedding')
wedding = wedding.replace(u'\xa0', u' ', regex=True)
wedding = wedding.replace(u'\u2019', "'", regex=True)
wedding = wedding.replace(u'\u00a0', ' ', regex=True)

In [275]:
wedding_data = [
    {
    "Speaker": wedding['Voicetype'][index],
    "Dialogue": wedding['Dialogue'][index],
    "Event": wedding['Event'][index],
    "Condition": wedding['Condition'][index],
    } for index, row in wedding.iterrows()]

data = data + wedding_data

In [276]:
wedding_data = convert_all_to_string(wedding_data)

In [277]:
data_list.append(wedding_data)

In [278]:
#writing the wedding data to a json file in the testing_for_error folder
with open('testing_for_error/wedding_data.json', 'w') as f:
    f.write(json.dumps(wedding_data, indent=4))

In [279]:
orcs_khajiits = pd.read_excel('testing_excel.xlsx', sheet_name='orcs_khajiits')

In [280]:
orcs_khajiits

Unnamed: 0,Voicetype,Dialogue,Condition,Event
0,FemaleOrc,"""What are you doing here, outlander? We don't ...",the Player is not a blood-kin,the Player entering an Orc Stronghold
1,FemaleOrc,"""This is our stronghold. Leave.""",the Player is not a blood-kin,the Player entering an Orc Stronghold
2,FemaleOrc,"""Outsiders. Nothing but trouble.""",the Player is not a blood-kin,the Player entering an Orc Stronghold
3,FemaleOrc,"""Malacath blesses us with this land. It's not ...",the Player is not a blood-kin,the Player entering an Orc Stronghold
4,FemaleOrc,"""This stronghold is for the Orcs. Leave. Now.""",the Player is not a blood-kin,the Player entering an Orc Stronghold
...,...,...,...,...
73,FemaleKhajiit,"""Khajiit are not welcome in the cities, so we ...",the Khajiit is a guard,the Player greeting a Khajiit Guard on the Kha...
74,FemaleKhajiit,"""Some come to buy, but others come to steal. K...",the Khajiit is a guard,the Player greeting a Khajiit Guard on the Kha...
75,FemaleKhajiit,"""Blessings of the moons upon you, traveler.""",the Khajiit is a guard,the Player greeting a Khajiit Guard on the Kha...
76,FemaleKhajiit,"""Khajiit is just a guard, and has no wares to ...",the Khajiit is a guard,the Player greeting a Khajiit Guard on the Kha...


In [281]:
orcs_khajiits.columns

Index(['Voicetype', 'Dialogue', 'Condition', 'Event'], dtype='object')

In [282]:
orcs_khajiits_data = [
    {
    "Speaker": orcs_khajiits['Voicetype'][index],
    "Dialogue": orcs_khajiits['Dialogue'][index],
    "Event": orcs_khajiits['Event'][index],
    "Condition": orcs_khajiits['Condition'][index],
    } for index, row in orcs_khajiits.iterrows()]

data = data + orcs_khajiits_data

In [283]:
orcs_khajiits_data = convert_all_to_string(orcs_khajiits_data)

In [284]:
data_list.append(orcs_khajiits_data)

In [285]:
#write the orcs_khajiits data to a json file in the testing_for_error folder
with open('testing_for_error/orcs_khajiits_data.json', 'w') as f:
    f.write(json.dumps(orcs_khajiits_data, indent=4))

In [286]:
#add the sheet housecarl to the json file
housecarl = pd.read_excel('testing_excel.xlsx', sheet_name='Housecarl')
housecarl = housecarl.replace(u'\xa0', u' ', regex=True)
housecarl = housecarl.replace(u'\u2019', "'", regex=True)
housecarl = housecarl.replace(u'\u00a0', ' ', regex=True)

In [287]:
housecarl_data = [
    {
    "Speaker": housecarl['Voicetype'][index],
    "Dialogue": housecarl['Dialogue'][index],
    "Event": housecarl['Event'][index],
    } for index, row in housecarl.iterrows()]

data = data + housecarl_data

In [288]:
housecarl_data = convert_all_to_string(housecarl_data)

In [289]:
data_list.append(housecarl_data)

In [290]:
#write the housecarl data to a json file in the testing_for_error folder
with open('testing_for_error/housecarl_data.json', 'w') as f:
    f.write(json.dumps(housecarl_data, indent=4))

In [291]:
# add the sheet Carriage to the json file
carriage = pd.read_excel('testing_excel.xlsx', sheet_name='Carriage')
carriage = carriage.replace(u'\xa0', u' ', regex=True)
carriage = carriage.replace(u'\u2019', "'", regex=True)
carriage = carriage.replace(u'\u00a0', ' ', regex=True)

In [292]:
carriage_data = [
    {
    "Speaker": carriage['Voicetype'][index],
    "Dialogue": carriage['Dialogue'][index],
    "Event": carriage['Event'][index],
    "Condition": carriage['Condition'][index],
    } for index, row in carriage.iterrows()]

data = data + carriage_data

In [293]:
carriage_data = convert_all_to_string(carriage_data)

In [294]:
data_list.append(carriage_data)

In [295]:
#write the carriage data to a json file in the testing_for_error folder
with open('testing_for_error/carriage_data.json', 'w') as f:
    f.write(json.dumps(carriage_data, indent=4))

In [296]:
guards = pd.read_excel('testing_excel.xlsx', sheet_name='Guards')
guards = guards.replace(u'\xa0', u' ', regex=True)
guards = guards.replace(u'\u2019', "'", regex=True)
guards = guards.replace(u'\u00a0', ' ', regex=True)

In [297]:
guards_data = [
    {
    "Speaker": "Guard",
    "Dialogue": guards['Dialogue'][index],
    "Event": guards['Event'][index],
    "Condition": guards['Condition'][index],
    } for index, row in guards.iterrows()]

data = data + guards_data

In [298]:
guards_data = convert_all_to_string(guards_data)

In [299]:
data_list.append(guards_data)

In [300]:
#write the guards data to a json file in the testing_for_error folder
with open('testing_for_error/guards_data.json', 'w') as f:
    f.write(json.dumps(guards_data, indent=4))

In [301]:
guards_race = pd.read_excel('testing_excel.xlsx', sheet_name='Guards_race')
guards_race = guards_race.replace(u'\xa0', u' ', regex=True)
guards_race = guards_race.replace(u'\u2019', "'", regex=True)
guards_race = guards_race.replace(u'\u00a0', ' ', regex=True)

In [302]:
guards_race

Unnamed: 0,Dialogue,Race,Condition,Event
0,"""What is it, Argonian?""",Argonian,,a Guard reacting to players race
1,"""Stay out of trouble, Argonian.""",Argonian,,a Guard reacting to players race
2,"""Stay out of trouble, lizard.""",Argonian,the Guard is a Stormcloak,a Guard reacting to players race
3,"""Staying out of trouble, kinsman?""",Nord,the Guard is Nord,a Guard reacting to players race
4,"""How can I help a brother Nord?""",Nord,the Guard is Nord,a Guard reacting to players race
5,"""Stay out of trouble, Nord.""",Nord,the Guard is not Nord,a Guard reacting to players race
6,"""Don't cross me, Elf.""","Altmer, Bosmer, or Dunmer",the Guard is Nord and a Stormcloak,a Guard reacting to players race
7,"""Stay out of trouble, Elf.""","Altmer, Bosmer, or Dunmer",the Guard is not a Stormcloak,a Guard reacting to players race
8,"""What is it, Elf?""","Altmer, Bosmer, or Dunmer",the Guard is not a Stormcloak,a Guard reacting to players race
9,"""Stay out of trouble, Khajiit.""",Khajiit,,a Guard reacting to players race


In [303]:
guards_race_data = [
    {
        "Speaker": "Guard",
        "Dialogue": row['Dialogue'],
        "Event": row['Event'],
        "Condition": row['Condition'],
        "Race": row['Race'],
    } for index, row in guards_race.iterrows()  # Ensure this matches the DataFrame you're indexing
]
data = data + guards_race_data

In [304]:
guards_race_data = convert_all_to_string(guards_race_data)

In [305]:
data_list.append(guards_race_data)

In [306]:
#write the guards_race_data to a json file in the testing_for_error folder
with open('testing_for_error/guards_race_data.json', 'w') as f:
    f.write(json.dumps(guards_race_data, indent=4))

In [307]:
#add the sheet Guard_Equipment to the json file
guard_equipment = pd.read_excel('testing_excel.xlsx', sheet_name='Guard_Equipment')
guard_equipment = guard_equipment.replace(u'\xa0', u' ', regex=True)
guard_equipment = guard_equipment.replace(u'\u2019', "'", regex=True)
guard_equipment = guard_equipment.replace(u'\u00a0', ' ', regex=True)

In [308]:
guard_equipment_data = [
    {
    "Speaker": "Guard",
    "Dialogue": guard_equipment['Dialogue'][index],
    "Event": guard_equipment['Event'][index],
    "Condition": guard_equipment['Condition'][index],
    "Equipment": guard_equipment['Equipment'][index],
    } for index, row in guard_equipment.iterrows()]

data = data + guard_equipment_data

In [309]:
guard_equipment_data = convert_all_to_string(guard_equipment_data)

In [310]:
data_list.append(guard_equipment_data)

In [311]:
#write the guard_equipment data to a json file in the testing_for_error folder
with open('testing_for_error/guard_equipment_data.json', 'w') as f:
    f.write(json.dumps(guard_equipment_data, indent=4))

In [312]:
#add the sheet Guard_skill to the json file
guard_skill = pd.read_excel('testing_excel.xlsx', sheet_name='Guard_skill')
guard_skill = guard_skill.replace(u'\xa0', u' ', regex=True)
guard_skill = guard_skill.replace(u'\u2019', "'", regex=True)
guard_skill = guard_skill.replace(u'\u00a0', ' ', regex=True)

In [313]:
guard_skill_data = [
    {
    "Speaker": "Guard",
    "Dialogue": guard_skill['Dialogue'][index],
    "Event": guard_skill['Event'][index],
    } for index, row in guard_skill.iterrows()]

data = data + guard_skill_data

In [314]:
guard_skill_data = convert_all_to_string(guard_skill_data)

In [315]:
data_list.append(guard_skill_data)

In [316]:
#write the guard_skill data to a json file in the testing_for_error folder
with open('testing_for_error/guard_skill_data.json', 'w') as f:
    f.write(json.dumps(guard_skill_data, indent=4))

In [317]:
# add the Stromcloak-imperial sheet to the json file
stromcloak_imperial = pd.read_excel('testing_excel.xlsx', sheet_name='Stromcloak-imperial')
stromcloak_imperial = stromcloak_imperial.replace(u'\xa0', u' ', regex=True)
stromcloak_imperial = stromcloak_imperial.replace(u'\u2019', "'", regex=True)
stromcloak_imperial = stromcloak_imperial.replace(u'\u00a0', ' ', regex=True)

In [318]:
stromcloak_imperial_data = [
    {
    "Speaker": stromcloak_imperial['Guard'][index],
    "Dialogue": stromcloak_imperial['Dialogue'][index],
    "Event": stromcloak_imperial['Event'][index],
    "Condition": stromcloak_imperial['Condition'][index],
    } for index, row in stromcloak_imperial.iterrows()]

data = data + stromcloak_imperial_data

In [319]:
stromcloak_imperial_data = convert_all_to_string(stromcloak_imperial_data)

In [320]:
data_list.append(stromcloak_imperial_data)

In [321]:
#write the stromcloak_imperial data to a json file in the testing_for_error folder
with open('testing_for_error/stromcloak_imperial_data.json', 'w') as f:
    f.write(json.dumps(stromcloak_imperial_data, indent=4))

In [322]:
#add the Guard_interactions sheet to the json file
guard_interactions = pd.read_excel('testing_excel.xlsx', sheet_name='Guard_interactions')
guard_interactions = guard_interactions.replace(u'\xa0', u' ', regex=True)
guard_interactions = guard_interactions.replace(u'\u2019', "'", regex=True)
guard_interactions = guard_interactions.replace(u'\u00a0', ' ', regex=True)

In [323]:
#keep the rows until row 103
guard_interactions = guard_interactions[:104]
guard_interactions

Unnamed: 0,Dialogue,Location,Condition,Event
0,"""<Town>'s under my protection. You watch yours...",Any Town,,Player interacts with the guard in a specific ...
1,"""You've come to Dawnstar at a strange time, fr...",Dawnstar,,Player interacts with the guard in a specific ...
2,"""The people here are still plagued with nightm...",Dawnstar,,Player interacts with the guard in a specific ...
3,"""Dangerous post, Dawnstar. If the bandits don'...",Dawnstar,,Player interacts with the guard in a specific ...
4,"""Old lady Frida is a grouch, but she sure can ...",Dawnstar,,Player interacts with the guard in a specific ...
...,...,...,...,...
99,"""South of here, and west of the road, lies Cro...",Windhelm,,Player interacts with the guard in a specific ...
100,"""I've no problem with your kind, Orc. But that...",Windhelm,the Player is an Orc,Player interacts with the guard in a specific ...
101,"""I've no problem with Orcs. But that stronghol...",Windhelm,,Player interacts with the guard in a specific ...
102,"""Across the river to the northeast lies Yngol ...",Windhelm,,Player interacts with the guard in a specific ...


In [324]:
guard_interactions_data = [
    {
    "Speaker": "Guard",
    "Dialogue": guard_interactions['Dialogue'][index],
    "Event": guard_interactions['Event'][index],
    "Condition": guard_interactions['Condition'][index],
    "Location": guard_interactions['Location'][index],
    } for index, row in guard_interactions.iterrows()]

data = data + guard_interactions_data

In [325]:
guard_interactions_data = convert_all_to_string(guard_interactions_data)

In [326]:
data_list.append(guard_interactions_data)

In [327]:
#write the guard_interactions data to a json file in the testing_for_error folder
with open('testing_for_error/guard_interactions_data.json', 'w') as f:
    f.write(json.dumps(guard_interactions_data, indent=4))

In [328]:
def convert_all_to_string(data):
    """
    Recursively convert all data in a dictionary or list to string.
    """
    if isinstance(data, dict):
        return {key: convert_all_to_string(value) for key, value in data.items()}
    elif isinstance(data, list):
        return [convert_all_to_string(item) for item in data]
    else:
        return str(data)

# Convert all data to string
string_data = convert_all_to_string(data)

In [329]:
# Write the shuffled data to a json file
with open('results/data.json', 'w') as f:
    f.write(json.dumps(string_data, indent=4))

In [330]:
# randomize the components of the json file
import random

random.shuffle(string_data)

# Write the shuffled data to a json file
with open('results/shuffled_data.json', 'w') as f:
    f.write(json.dumps(string_data, indent=4))

In [331]:
from sklearn.model_selection import train_test_split
from collections import defaultdict

# Custom function to split data based on the number of elements for each event
def custom_train_val_test_split(data, event_key):
    event_dict = defaultdict(list)
    for item in data:
        event_dict[item[event_key]].append(item)
    
    train_data = []
    validation_data = []
    test_data = []
    
    for event, items in event_dict.items():
        
        if len(items) == 1:
            train_data.extend(items)
        elif len(items) == 2:
            train_data.append(items[0])
            validation_data.append(items[1])
        else:
            # Calculate sizes for train, val, and test
            n_train = max(1, int(len(items) * 0.844))
            n_val_test = len(items) - n_train
            n_val = max(1, n_val_test // 2)
            n_test = n_val_test - n_val
            
            # Handle edge cases to ensure valid test_size
            if n_val_test == 0:
                train_split, val_test_split = items, []
            else:
                train_split, val_test_split = train_test_split(items, test_size=n_val_test, stratify=[item[event_key] for item in items],random_state=42)
            
            if len(val_test_split) == 1:
                validation_split, test_split = val_test_split, []
            else:
                if n_test == 0:
                    validation_split, test_split = val_test_split, []
                else:
                    validation_split, test_split = train_test_split(val_test_split, test_size=n_test, stratify=[item[event_key] for item in val_test_split], random_state=42)
            
            # Ensure each set has at least one item
            if not validation_split:
                validation_split.append(train_split.pop())
            if not test_split:
                test_split.append(train_split.pop())
            
            train_data.extend(train_split)
            validation_data.extend(validation_split)
            test_data.extend(test_split)
    
    return train_data, validation_data, test_data

# Split the data
train_data_strat, validation_data_strat, test_data_strat = custom_train_val_test_split(string_data, 'Event')

# Print lengths
print(f'Train Data Length: {len(train_data_strat)}')
print(f'Validation Data Length: {len(validation_data_strat)}')
print(f'Test Data Length: {len(test_data_strat)}')

Train Data Length: 1417
Validation Data Length: 177
Test Data Length: 181


In [332]:
#Calculate the total length of the train, validation and test data
print(len(train_data_strat)+len(validation_data_strat)+len(test_data_strat))

1775


In [333]:
# from the string data create a list with all the unique events but exclude the events with 1 or 2 elements
unique_events = [event for event in set([item['Event'] for item in string_data]) if sum([item['Event'] == event for item in string_data]) > 2]

# Check if each event is present in the train, validation, and test data and print the count of the total number of events missing
missing_events = 0
for event in unique_events:
    if event not in [item['Event'] for item in train_data_strat]:
        missing_events += 1
        print(f'Event "{event}" not in train data')
    if event not in [item['Event'] for item in validation_data_strat]:
        missing_events += 1
        print(f'Event "{event}" not in validation data')
    if event not in [item['Event'] for item in test_data_strat]:
        missing_events += 1
        print(f'Event "{event}" not in test data')
        
print(f'Total missing events: {missing_events}')

Total missing events: 0


In [334]:
# Write the stratified and split data to json files in the results folder
with open('results/stratified_train.json', 'w') as f:
    f.write(json.dumps(train_data_strat, indent=4))
    
with open('results/stratified_val.json', 'w') as f:
    f.write(json.dumps(validation_data_strat, indent=4))

with open('results/stratified_test.json', 'w') as f:
    f.write(json.dumps(test_data_strat, indent=4))

In [25]:
#add the Introduction sheet to the json file
introduction = pd.read_excel('testing_excel.xlsx', sheet_name='Introduction')


In [29]:
#remove the \n character from the dataframe
introduction = introduction.replace(u'\n', ' ', regex=True)
#remove the \u2019 character from the dataframe
introduction = introduction.replace(u'\u2019', "'", regex=True)
#remove the \u00a0 character from the dataframe
introduction = introduction.replace(u'\u00a0', ' ', regex=True)
#remove the \xa0 character from the dataframe
introduction = introduction.replace(u'\xa0', ' ', regex=True)
#remove the \u2014 character from the dataframe
introduction = introduction.replace(u'\u2014', '--', regex=True)
#remove the \u2026 character from the dataframe
introduction = introduction.replace(u'\u2026', '...', regex=True)

In [30]:
import json

In [31]:
introduction_data = [
    {
    "Game": introduction['Game'][index],
    "Monologue": introduction['Introduction'][index],
    "Context": introduction['Context'][index],
    } for index, row in introduction.iterrows()]

#pass the introduction data to a json file
introduction_data = convert_all_to_string(introduction_data)
with open('testing_for_error/introduction_data.json', 'w') as f:
    f.write(json.dumps(introduction_data, indent=4))

In [1]:
#Suffle the introduction data
random.shuffle(introduction_data)

#split the introduction data into 80 percent train 10 percent validation and 10 percent test
train_data_introduction, validation_data_introduction, test_data_introduction = train_test_split(introduction_data, test_size=0.2, random_state=42)

#print the lengths of the train, validation and test data
print(f'Train Data Length: {len(train_data_introduction)}')
print(f'Validation Data Length: {len(validation_data_introduction)}')
print(f'Test Data Length: {len(test_data_introduction)}')


NameError: name 'random' is not defined

In [175]:
#delete the contents of the results folder na the testing_for_error folder
import os
import shutil

# Delete the contents of the results folder

def delete_contents(folder):
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))
     
folder = 'results'
delete_contents(folder)
folder = 'testing_for_error'
delete_contents(folder)