# Import & Setup

In [1]:
import pandas as pd
import numpy as np
import json
pd.set_option('display.max_columns', None)
pd.set_option('max_colwidth', None)

# Data

`LazyVoiceFinder_export.csv` contains a full export of all Skyrim + DLC dialogue exchanges.
To generate this dataset:
1. Download [Lazy Voice Finder](https://www.nexusmods.com/skyrimspecialedition/mods/8619)
2. Set the game to Skyrim in LazyVoiceFinder.exe
3. Right-click any column -> Unhide Columns -> (All Columns)
4. Go to File -> Export CSV File...

In [2]:
df_all = pd.read_csv('LazyVoiceFinder_export.csv', low_memory=False)
df_all = df_all.loc[~(df_all['State'].isin(['Bad File Name, No Dialogue','No Voice File']))]
df_all = df_all.loc[(df_all['Dialogue 1 - English'].notna()) & (df_all['Dialogue 1 - English']!='')]
print(df_all.shape)
df_all.head()

(74642, 31)


Unnamed: 0,State,Index,Plugin,Last Modifier,FormId,FormId without Load#,Response Num,Edid,Topic FormId,Topic Edid,Topic Text,Quest FormId,Quest Edid,Quest Name,Category/Subtype,Emotion,Conditions,Bsa Name,Full Path,Base Path,Path,File Name,Extensions,Voice Type,Prompt,Dialogue 1 - English,Dialogue 2 - English,Script Notes,Edits,Keyword,StringId
26,,0,skyrim.esm,skyrim.esm,000130DA,000130DA,1.0,[000130DA],00035E10,MG07TolfdirForcegreetBranchTopic,,0001F257,MG07,The Staff of Magnus,Topic/Custom,Neutral,"GetStage: ""The Staff of Magnus"" [QUST:0001F257] >= 50\nGetIsID: ""Tolfdir"" [NPC_:0001C19E] = 1",Skyrim - Voices_en0.bsa,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa\sound\voice\skyrim.esm\maleoldkindly\mg07_mg07tolfdirforcegreet_000130da_1.fuz,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa,sound\voice\skyrim.esm\maleoldkindly\mg07_mg07tolfdirforcegreet_000130da_1.fuz,mg07_mg07tolfdirforcegreet_000130da_1.fuz,.fuz,maleoldkindly,,"You survived! You have it, then?","You survived! You have it, then?",,,,25001.0
27,,0,skyrim.esm,skyrim.esm,000130DA,000130DA,2.0,[000130DA],00035E10,MG07TolfdirForcegreetBranchTopic,,0001F257,MG07,The Staff of Magnus,Topic/Custom,Neutral,"GetStage: ""The Staff of Magnus"" [QUST:0001F257] >= 50\nGetIsID: ""Tolfdir"" [NPC_:0001C19E] = 1",Skyrim - Voices_en0.bsa,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa\sound\voice\skyrim.esm\maleoldkindly\mg07_mg07tolfdirforcegreet_000130da_2.fuz,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa,sound\voice\skyrim.esm\maleoldkindly\mg07_mg07tolfdirforcegreet_000130da_2.fuz,mg07_mg07tolfdirforcegreet_000130da_2.fuz,.fuz,maleoldkindly,,Let's hope it's as powerful as the Psijics believe it to be.,Let's hope it's as powerful as the Psijics believe it to be.,,,,17544.0
28,,0,skyrim.esm,skyrim.esm,00013113,00013113,1.0,[00013113],0001310D,[0001310D],,00036192,MQ203,Alduin's Wall,Scene/Scene,Surprise,,Skyrim - Voices_en0.bsa,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa\sound\voice\skyrim.esm\femaleuniquedelphine\mq203__00013113_1.fuz,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa,sound\voice\skyrim.esm\femaleuniquedelphine\mq203__00013113_1.fuz,mq203__00013113_1.fuz,.fuz,femaleuniquedelphine,,"That's done it! Look, it's coming to life!","That's done it! Look, it's coming to life!",awe breaking through Delphine's usual jaded persona,,,60023.0
29,,0,skyrim.esm,skyrim.esm,00013114,00013114,1.0,[00013114],0001310C,[0001310C],,00036192,MQ203,Alduin's Wall,Scene/Scene,Happy,,Skyrim - Voices_en0.bsa,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa\sound\voice\skyrim.esm\femaleuniquedelphine\mq203__00013114_1.fuz,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa,sound\voice\skyrim.esm\femaleuniquedelphine\mq203__00013114_1.fuz,mq203__00013114_1.fuz,.fuz,femaleuniquedelphine,,You did it. There's the entrance!,You did it. There's the entrance!,,,,49515.0
30,,0,skyrim.esm,skyrim.esm,00013115,00013115,1.0,[00013115],0001310A,[0001310A],,00036192,MQ203,Alduin's Wall,Scene/Scene,Surprise,,Skyrim - Voices_en0.bsa,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa\sound\voice\skyrim.esm\maleuniqueesbern\mq203__00013115_1.fuz,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa,sound\voice\skyrim.esm\maleuniqueesbern\mq203__00013115_1.fuz,mq203__00013115_1.fuz,.fuz,maleuniqueesbern,,There's no telling what we might find inside!,There's no telling what we might find inside!,excitedly,,,30760.0


In [3]:
df_all.describe(include='all')

Unnamed: 0,State,Index,Plugin,Last Modifier,FormId,FormId without Load#,Response Num,Edid,Topic FormId,Topic Edid,Topic Text,Quest FormId,Quest Edid,Quest Name,Category/Subtype,Emotion,Conditions,Bsa Name,Full Path,Base Path,Path,File Name,Extensions,Voice Type,Prompt,Dialogue 1 - English,Dialogue 2 - English,Script Notes,Edits,Keyword,StringId
count,0.0,74642.0,74642,74642,74642,74642,74642.0,74642,74642,74642,24736,74642,74642,54687,74642,74642,53773,74642,74642,74642,74642,74642,74642,74642,2987,74642.0,74642.0,26256,1193,0.0,74642.0
unique,0.0,,4,5,35455,35288,,35455,17506,17506,5622,1760,1760,1137,79,8,9564,1,74642,1,74642,43827,1,134,1087,41088.0,41088.0,6094,708,,
top,,,skyrim.esm,skyrim.esm,0001FB79,0001FB79,,dunEldergleamT03Shared02,0001F319,DialogueGenericSharedInfo,DialogueGenericSharedInfo,00013EB3,DialogueGeneric,Generic dialogue,Topic/Custom,Neutral,GetIsVoiceType: DefaultNPCVoiceTypes [FLST:0003B4A5] = 1,Skyrim - Voices_en0.bsa,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa\sound\voice\skyrim.esm\maleoldkindly\mg07_mg07tolfdirforcegreet_000130da_1.fuz,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa,sound\voice\skyrim.esm\maleoldkindly\mg07_mg07tolfdirforcegreet_000130da_1.fuz,duneldergl_duneldergleamt0_0001fb79_1.fuz,.fuz,malenord,(Remain silent),,,Angry combat attack line,"I'll take whatever ya got, after I kill you.",,
freq,,,61165,59802,56,56,,56,2010,2010,2010,11199,11199,12196,23347,28836,756,74642,1,74642,1,56,74642,4453,77,1781.0,1781.0,678,65,,
mean,,0.565446,,,,,1.199593,,,,,,,,,,,,,,,,,,,,,,,,30468.828032
std,,1.270529,,,,,0.602203,,,,,,,,,,,,,,,,,,,,,,,,20856.653934
min,,0.0,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,4.0
25%,,0.0,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,9852.0
50%,,0.0,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,29128.0
75%,,0.0,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,48195.75


In [4]:
df_all.head(1)['Full Path']

26    C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa\sound\voice\skyrim.esm\maleoldkindly\mg07_mg07tolfdirforcegreet_000130da_1.fuz
Name: Full Path, dtype: object

In [5]:
df_all.loc[df_all['Topic FormId']=='00035E10']

Unnamed: 0,State,Index,Plugin,Last Modifier,FormId,FormId without Load#,Response Num,Edid,Topic FormId,Topic Edid,Topic Text,Quest FormId,Quest Edid,Quest Name,Category/Subtype,Emotion,Conditions,Bsa Name,Full Path,Base Path,Path,File Name,Extensions,Voice Type,Prompt,Dialogue 1 - English,Dialogue 2 - English,Script Notes,Edits,Keyword,StringId
26,,0,skyrim.esm,skyrim.esm,000130DA,000130DA,1.0,[000130DA],350000000000.0,MG07TolfdirForcegreetBranchTopic,,0001F257,MG07,The Staff of Magnus,Topic/Custom,Neutral,"GetStage: ""The Staff of Magnus"" [QUST:0001F257] >= 50\nGetIsID: ""Tolfdir"" [NPC_:0001C19E] = 1",Skyrim - Voices_en0.bsa,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa\sound\voice\skyrim.esm\maleoldkindly\mg07_mg07tolfdirforcegreet_000130da_1.fuz,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa,sound\voice\skyrim.esm\maleoldkindly\mg07_mg07tolfdirforcegreet_000130da_1.fuz,mg07_mg07tolfdirforcegreet_000130da_1.fuz,.fuz,maleoldkindly,,"You survived! You have it, then?","You survived! You have it, then?",,,,25001.0
27,,0,skyrim.esm,skyrim.esm,000130DA,000130DA,2.0,[000130DA],350000000000.0,MG07TolfdirForcegreetBranchTopic,,0001F257,MG07,The Staff of Magnus,Topic/Custom,Neutral,"GetStage: ""The Staff of Magnus"" [QUST:0001F257] >= 50\nGetIsID: ""Tolfdir"" [NPC_:0001C19E] = 1",Skyrim - Voices_en0.bsa,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa\sound\voice\skyrim.esm\maleoldkindly\mg07_mg07tolfdirforcegreet_000130da_2.fuz,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa,sound\voice\skyrim.esm\maleoldkindly\mg07_mg07tolfdirforcegreet_000130da_2.fuz,mg07_mg07tolfdirforcegreet_000130da_2.fuz,.fuz,maleoldkindly,,Let's hope it's as powerful as the Psijics believe it to be.,Let's hope it's as powerful as the Psijics believe it to be.,,,,17544.0
44796,,0,skyrim.esm,skyrim.esm,000C2778,000C2778,1.0,[000C2778],350000000000.0,MG07TolfdirForcegreetBranchTopic,,0001F257,MG07,The Staff of Magnus,Topic/Custom,Puzzled,"GetStage: ""The Staff of Magnus"" [QUST:0001F257] >= 50",Skyrim - Voices_en0.bsa,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa\sound\voice\skyrim.esm\maleoldkindly\mg07_mg07tolfdirforcegreet_000c2778_1.fuz,C:\Games\Steam\steamapps\common\Skyrim Special Edition\Data\Skyrim - Voices_en0.bsa,sound\voice\skyrim.esm\maleoldkindly\mg07_mg07tolfdirforcegreet_000c2778_1.fuz,mg07_mg07tolfdirforcegreet_000c2778_1.fuz,.fuz,maleoldkindly,,What are we waiting for?,What are we waiting for?,,,,33274.0


In [6]:
df_all['Topic Text'].value_counts().head(10)

Topic Text
DialogueGenericSharedInfo      2010
CWSharedInfoStack               978
Hello.                          376
ShoutStartShort                 365
What have you got for sale?     354
ShoutStartLong                  348
PushRoDa                        346
ShoutEndShort                   339
DialogueGenericHello            244
DCETAttack                      240
Name: count, dtype: int64

In [7]:
df_all['Prompt'].value_counts().head(10)

Prompt
(Remain silent)                                                                    77
I'd like to decorate my home.                                                      48
Consider it done.                                                                  41
What'll you give me for these?                                                     28
Floor decorations. (400 gold)                                                      24
I'll take you to the monster you're hunting. (Persuade)                            24
You know what you have to do.                                                      19
What can you tell me about yourself?                                               19
Dead.                                                                              18
I don't have time for this. I can get rough with you if I have to. (Intimidate)    18
Name: count, dtype: int64

In [8]:
df_all['Category/Subtype'].value_counts()

Category/Subtype
Topic/Custom                      23347
Scene/Scene                       13151
Miscellaneous/Hello                9368
Miscellaneous/SharedInfo           8864
Combat/Attack                      2476
                                  ...  
Favors/FlyingMountLand                6
Favors/FlyingMountCancelLand          6
Miscellaneous/StandonFurniture        2
Favor/Intimidate                      1
Favor/Bribe                           1
Name: count, Length: 79, dtype: int64

In [9]:
filtered_cols = ['FormId','Response Num', 'Topic Text','Quest FormId','Quest Name','Category/Subtype','Emotion','Voice Type','Prompt','Dialogue 1 - English']
df = df_all[filtered_cols]

df = df.loc[df['Topic Text'].notna()]
df = df.loc[(df['Quest Name']!='Generic dialogue')]
# Filter out cases where Topic Text just says CWSharedInfoStack and Prompt contains nothing
df = df.loc[~((df['Topic Text']=='CWSharedInfoStack') & (df['Prompt'].isna()))]

# Filter out cases where the player response is not in spoken English
# "Hello." is also removed as the responses from this short statement vary too wildly
excluded_topics = ['Hello.','ShoutStartShort','ShoutStartLong','PushRoDa','ShoutEndShort','DCETAttack','RelationshipAdoption_SharedInfos',
                   '(Invisible continue)','(Invisible Continue)','...','Shout2','Shout1b','CommandedHello','MS11','DLC1ThrallHellos',
                   'DialogueCarriageSystemIdle','DCETNoticeCorpse','(Sharedinfo)','TG01Hellos','TG09NDAttacks',
                   'RelationshipMarriagePostWeddingLoveInterestBlockingTopic','DA03Hello','(Invisible Continues Linked to Self)','TG08BHellos',
                   'BYOHRelationshipAdoptableOrphanable_SharedInfos','DLC1VQ07Attack','(Invisible Continue) Location of Attack',
                   'RelationshipAdoption_FGReceiveGiftTopic','DBNazirEvictionBranchTopic','DLC1VQ04Hellos','dunRannveigQSTSHARED01',
                   'T01Hellos','TG09NDDeaths','(Invisible Continue)(Forced Good bye) Time Limit to Attack',
                   '(Invisible Continue)(Forced Goodbye) Time Limit to Attack','(forcegreet)','(Invisible Continue) Difficulty based on Attack Dela',
                   '(Invisible Continue) Difficulty based on Attack Delta','TG09NDCombatToNormal','DB11AmaundJobDoneTopic','DB11EmperorPlayerResponse13']
df = df.loc[~(df['Topic Text'].isin(excluded_topics))]

# DialogueRiftenGoodbyes can just be set to Goodbye
df['Topic Text'] = np.where(df['Topic Text']=='DialogueRiftenGoodbyes','Goodbye.', df['Topic Text'])
# Where Topic Text = CWSharedInfoStack, the player's dialogue can be found in Prompt
df['Input'] = np.where(df['Topic Text']=='CWSharedInfoStack', df['Prompt'], df['Topic Text'])
print(df.shape[0])
df.head(2)

17569


Unnamed: 0,FormId,Response Num,Topic Text,Quest FormId,Quest Name,Category/Subtype,Emotion,Voice Type,Prompt,Dialogue 1 - English,Input
42,13626,1.0,Where can I find fire salts?,0005331D,Stoking the Flames,Topic/Custom,Fear,malebrute,,A flame atronach's body might provide fire salt. They're dangerous creatures that can be summoned by wizards.,Where can I find fire salts?
43,13626,3.0,Where can I find fire salts?,0005331D,Stoking the Flames,Topic/Custom,Neutral,malebrute,,"Of course, it would be much easier to check with an alchemist. They occasionally have them for sale.",Where can I find fire salts?


In [10]:
def group_dialogue(df):
    '''Group dialogue into a single response

    NPC responses are often split across multiple rows
    Unique responses can be identified by FormId, and grouped back into a single response via the order of Response Num
    '''
    grouped = df.groupby('FormId')
    processed_data = []
    seen_entries = set()

    for form_id, group in grouped:
        sorted_group = group.sort_values('Response Num')
        
        # Join the dialogues
        dialogues = ' '.join(sorted_group['Dialogue 1 - English'].tolist())
        
        # Get the player's input
        topic_text = sorted_group['Topic Text'].iloc[0]
        
        # Save the player / NPC interaction in an Alpaca style data format
        entry = {
            "instruction": "Generate dialogue in the style of Skyrim.",
            "input": topic_text,
            "output": dialogues
        }
        
        # Check if the entry is unique based on 'input' and 'output'
        entry_tuple = (entry['input'], entry['output'])
        if entry_tuple not in seen_entries:
            processed_data.append(entry)
            seen_entries.add(entry_tuple)

    return processed_data

In [11]:
data = group_dialogue(df)
print(len(data))
data[0:5]

8830


[{'instruction': 'Generate dialogue in the style of Skyrim.',
  'input': 'Where can I find fire salts?',
  'output': "A flame atronach's body might provide fire salt. They're dangerous creatures that can be summoned by wizards. Of course, it would be much easier to check with an alchemist. They occasionally have them for sale."},
 {'instruction': 'Generate dialogue in the style of Skyrim.',
  'input': 'Sorry to hear that.',
  'output': "Shutting this place down would dishonor my family's heritage, but what other choice will I have?"},
 {'instruction': 'Generate dialogue in the style of Skyrim.',
  'input': "I'll get some for you.",
  'output': 'You will? Thank you! Ten pinches of fire salts should give me all I need to bring this forge back to life.'},
 {'instruction': 'Generate dialogue in the style of Skyrim.',
  'input': 'Goodbye.',
  'output': "Return anytime. You're quite welcome here."},
 {'instruction': 'Generate dialogue in the style of Skyrim.',
  'input': 'Goodbye.',
  'outpu

In [12]:
with open('skyrim_alpaca_style_dataset.json', 'w') as json_file:
    json.dump(data, json_file, indent=4)