In [1]:
import pandas as pd
import numpy as np

In [97]:
dialogues = pd.read_csv("../data/got_dialouge_sentiment.csv")
speakers = pd.DataFrame(dialogues['Speaker'].unique(), columns=["Speaker"])

characters = pd.read_csv("../data/game_of_thrones_characters_cleaned.csv")
names = pd.DataFrame(characters.Name.unique(), columns=["Name"])

In [98]:
# Define the function to process the DataFrame
def clean_speakers_column(df, column):
    df['lower'] = df[column].str.lower().str.strip().str.replace(r'\(.*?\)', '', regex=True).str.strip()
    df['bracket'] = df[column].str.extract(r'\((.*?)\)', expand=False)
    return df

In [99]:
speakers = clean_speakers_column(speakers, "Speaker")
names = clean_speakers_column(names, "Name")

In [101]:
def map_longest_names_with_whitespace(names_list, exclude_words):
    """
    Maps names to their longest version from a list of names, ensuring a whitespace separates the short and long versions.

    Parameters:
        names_list (list): A list of names.
        exclude_words (list): A list of words that should never be changed.

    Returns:
        dict: A dictionary where keys are names from the input list, and values
              are the longest names that start with the key and meet the conditions.
    """
    name_mapping = {}

    # Remove any NaN or None entries
    names_list = [name for name in names_list if pd.notna(name)]
    
    # Sort the names_list by length in descending order for efficient matching
    sorted_names = sorted(names_list, key=len, reverse=True)
    
    for name in names_list:
        # Skip names in the exclude_words list
        if name in exclude_words:
            continue
        
        # Find all names that start with the current name, have a whitespace after it, 
        # do not contain '&' or '#', and are not longer than 30 characters.
        matching_names = [
            longer_name for longer_name in sorted_names 
            if (longer_name.startswith(f"{name} ") and 
                longer_name != name and 
                '&' not in longer_name and 
                '#' not in longer_name and 
                'and' not in longer_name and
                # there are no numbers in the name
                not any(char.isdigit() for char in longer_name) and 
                len(longer_name) <= 30)
        ]
        
        # Select the longest name, if any
        if matching_names:
            name_mapping[name] = max(matching_names, key=len)

    # addional manual adds
    name_mapping['sam'] = 'samwell tarly'
    
    return name_mapping


In [102]:
exclude_words = ['guard', 'soldier', 'knight','captain','child']
speaker_map = map_longest_names_with_whitespace(speakers.lower.unique(), exclude_words)

In [103]:
# loop through the speaker dataframe and apply the speaker map if it exists
for index, row in speakers.iterrows():
    if row['lower'] in speaker_map:
        speakers.at[index, 'lower'] = speaker_map[row['lower']]


In [104]:
mapper = {
    # A
    'aemon': 'aemon',
    'aggar': None,
    'aggo': None, 
    'alanna': None,
    'allo': None,
    'anara': None,
    'anguy': 'anguy',
    'archmaester': None,
    'armeca': None,
    'arthur': 'arthur',
    'ash': None,
    'aya': None,
    'arthur dayne': None,
    'axell florent': 'axell florent',
    'arwaya frey': None,
    'aeron greyjoy': 'aeron',
    'areo hotah': 'areo',
    'adrack humble': None,
    'alys karstark': None,
    'alton lannister': 'alton',
    'amory lorch': 'amory',
    'addam marbrand': 'addam marbrand',
    'arys oakheart': None,
    'aron santagar': None,
    'arya stark': 'arya stark',
    'aberdolf strongbeard': None,
    'aerys ii targaryen': None,
    'alliser thorne': 'alliser thorne',
    'anya waynwood': None,

    # B
    'balerion': None,
    'balian': None,
    'bannen': None,
    'baratheon general': None,
    'baratheon soldier': None,
    'barra': None,
    'bastard%27s girls': None,
    'bernadette': None,
    'bianca': 'bianca',
    'billy': None,
    'biter': None,
    'black lorren': 'black lorren',
    'blood': None,
    'boros blount': None,
    'boake': None,
    'boat commander': None,
    'bobono': 'bobono',
    'bolton officer': 'bolton officer',
    'borba': None,
    'borkoy': None,
    'braavosi madam': None,
    'brant': None,
    'brea': None,
    'bronn': 'bronn',
    'brothel guard': None,
    'brozho': None,
    'brusco': None,
    'beric dondarrion': 'beric dondarrion',
    'balon greyjoy': 'balon',
    'bowen marsh': 'bowen marsh',
    'belicho paenymion': 'belicho',
    'brynden rivers': None,
    'barristan selmy': 'barristan selmy',
    'benjen stark': 'benjen stark',
    'bran stark': 'bran stark',
    'balon swann': None,
    'brienne of tarth': 'brienne',
    'brynden tully': 'brynden',

    # C
    'caleotte': None,
    'camello': 'camello',
    'captain%27s daughter': None,
    'captured wight': None,
    'catspaw': None,
    'cley cerwyn': None,
    'chella': None,
    'citadel maester': None,
    'clarenzo': 'clarenzo',
    'clea': None,
    'clydas': None,
    'cohollo': 'cohollo',
    'colen of greenpools': 'colen',
    'cooper': 'cooper',
    'copper king': None,
    'craster': 'craster',
    'craster%27s last son': None,
    'craya': 'crayah',
    'cressen': 'cressen',
    'cersei lannister': 'cersei lannister',
    'catelyn stark': 'catelyn stark',

    # D
    'desmond crakehall': None,
    'dagmer': 'dagmer',
    'daisy': 'daisy',
    'dim dalba': 'dim dalba',
    'deziel dalt': None,
    'dancer': None,
    'dareon': None,
    'derek': None,
    'despondent man': None,
    'dirah': 'dirah',
    'dongo': None,
    'doreah': 'doreah',
    'dothraki crone': None,
    'drennan': None,
    'drogo': 'drogo',
    'drogo%27s stallion': None,
    'drogon': None,
    'drowned man': None,
    'dwarf 1': None,
    'dwarf 2': None,
    'dwarf 3': None,
    'dwarf 4': None,
    'dwarf 5': None,
    'dying man': 'dying man',
    'derwa frey': None,
    'donnel hill': None,
    'dontos hollard': None,
    'duncan liddle': None,
    'denys mallister': None,
    'doran martell': 'doran',
    'daario naharis': 'daario naharis',
    'daemon sand': None,
    'davos seaworth': 'davos',
    'daenerys targaryen%27s mare': None,
    'daenerys targaryen': 'daenerys targaryen',
    'dickon tarly': 'dickon',
    'donnel waynwood': 'donnel',

    # E
    'edric baratheon': None,
    'emmon cuy': None,
    'ebrose': None,
    'echeal': None,
    'eddie': None,
    'eglantine': None,
    'elder slave': 'elder meereen slave',
    'escan': None,
    'eyrie guard': None,
    'euron greyjoy': 'euron',
    'eon hunter': None,
    'ellaria sand': 'ellaria',
    'eldrick sarsfield': None,
    'eddard stark': 'eddard stark',
    'endrew tarth': None,
    'eddison tollett': 'eddison tollett',
    'edmure tully': 'edmure',

    # F
    'faith militant 1': None,
    'faith militant 2': None,
    'farlen': 'farlen',
    'farmer': 'farmer hamlet',
    'fennesz': 'fennesz',
    'fergus': None,
    'fighting pit announcer': None,
    'forzho': None,
    'frances': None,
    'frey guard': 'frey guard',
    'frey knight': 'frey man',
    'frey soldier 1': 'frey soldier',
    'frey soldier 2': 'frey soldier #2',
    'freya frey': None,

    # G
    'gendry baratheon': 'gendry',
    'gregor clegane': None,
    'gared': 'gared',
    'gatins': 'gatins',
    'gelmarr': None,
    'genna': None,
    'geoff': None,
    'gerald': None,
    'ghita': None,
    'ghost': None,
    'gilly': 'gilly',
    'galbart glover': 'galbart glover',
    'goatherd': 'goatherd',
    'gold cloak': 'gold cloak',
    'gold cloak 1': 'guard 1',
    'gold cloak 2': 'guard 2',
    'gordo': None,
    'gordy': None,
    'grancer': None,
    'great master': 'great master #1',
    'green khal': None,
    'grenn': 'grenn',
    'grey wind': None,
    'grey worm': 'grey worm',
    'guymon': 'guymon',
    'gerold hightower': None,
    'gyles rosby': None,
    'greizhen mo ullhor': 'greizhen',
    'greatjon umber': 'greatjon umber',

    # H
    'haggo': None,
    'halder': None,
    'hallyne': 'haylene',
    'harker': None,
    'harrag': 'harrag',
    'helliweg': None,
    'henk': None,
    'heward': None,
    'high priestess': None,
    'high septon': 'high septon',
    'high sparrow': 'high sparrow',
    'hobb': None,
    'hodor': 'hodor',
    'hog farmer': None,
    'hot pie': 'hot pie',
    'hugh of the vale': 'hugh of the vale',
    'harald karstark': None,
    'hizdahr zo loraq%27s manservant': None,
    'hizdahr zo loraq': 'hizdahr zo loraq',
    'howland reed': None,
    'harry strickland': 'harry',
    'harys swyft': None,
    'hoster tully': None,

    # I
    'imry florent': None,
    'iggo': None,
    'ironborn captain': 'ironborn',
    'irri': 'irri',
    'izembaro': 'izembaro',
    'illyrio mopatis': 'illyrio mopatis',
    'ilyn payne': None,

    # J
    'jon arryn': None,
    'joffrey baratheon': 'joffrey baratheon',
    'jonos bracken': 'jonos bracken',
    'jack bulwer': None,
    'jory cassel': 'jory cassel',
    'jafer flowers': None,
    'janeya frey': None,
    'joyeuse frey': None,
    'jaqen h%27ghar': 'jaqen',
    'jack': None,
    'jacks': 'jacks',
    'jhiqui': 'jhiqui',
    'joby': None,
    'johnna': None,
    'joss': None,
    'jaime lannister': 'jaime lannister',
    'jaremy mallister': None,
    'jeor mormont': 'jeor mormont',
    'jorah mormont': 'jorah mormont',
    'jeyne poole': None,
    'jojen reed': 'jojen',
    'jaremy rykker': 'jaremy rykker',
    'janos slynt': 'janos slynt',
    'jon snow': 'jon snow',
    'joss stilwood': None,

    # K
    'kitty frey': None,
    'karsi': 'karsi',
    'kayla': None,
    'kegs': None,
    'kesh': None,
    'king%27s landing baker': "king's landing baker",
    'king%27s landing boaster': None,
    'king%27s landing rioter': None,
    'kinvara': 'kinvara',
    'koner': None,
    'kovarro': 'kovarro',
    'kullback': None,
    'kurleket': None,
    'kevan lannister': 'kevan lannister',
    'kraznys mo nakloz': 'kraznys mo nakloz',
    'karl tanner': 'karl tanner',

    # L
    'lysa arryn': 'lysa arryn',
    'lord blackmont': None,
    'lothor brune': None,
    'lady crane': 'lady crane',
    'lothar frey': 'lothar',
    'lord hornwood': None,
    'lady': None,
    'lannister captain': None,
    'lannister scout': 'lannister scout',
    'lancel lannister': 'lancel lannister',
    'last dragon': None,
    'lead dornish guard': None,
    'lead kingsguard': None,
    'leadranach': None,
    'leaf': 'leaf',
    'leo lefford': 'leo lefford',
    'lem': 'lem',
    'lhara': 'lhara',
    'little bird': 'little bird',
    'loboda': 'loboda',
    'locke': 'locke',
    'lommy': 'lommy',
    'lord of bones': 'lord of bones',
    'lordsport dockhand': None,
    'lowell': None,
    'loyal night%27s watch man 1': None,
    'loyal night%27s watch man 2': None,
    'luke': None,
    'luwin': 'luwin',
    'lynderly knight': None,
    'lord mazin': None,
    'lyanna mormont': 'lyanna mormont',
    'lyanna stark': 'lyanna',
    'lollys stokeworth': 'lollys stokeworth',
    'loras tyrell': 'loras tyrell',

    # M
    'myrcella baratheon': 'myrcella baratheon',
    'medger cerwyn': None,
    'mirri maz duur': 'mirri maz duur',
    'marianne frey': None,
    'merry frey': 'merry',
    'masha heddle': 'masha heddle',
    'melara hetherspoon': 'melara',
    'martyn lannister': 'martyn',
    'maggy': 'maggy',
    'mago': 'mago',
    'malakho': None,
    'malko': 'malko',
    'mallister supporter': None,
    'malugo': None,
    'marei': 'marei',
    'marillion': 'marillion',
    'martha': None,
    'master torturer': None,
    'matthar': None,
    'meake': None,
    'melisandre': 'melisandre',
    'merchant captain': None,
    'mero': 'mero',
    'mhaegen': 'mhaegen',
    'mikken': None,
    'mirelle': None,
    'missandei': 'missandei',
    'moelle': None,
    'mole%27s town madam': "mole's town madam",
    'mole%27s town whore': "mole's town whore",
    'mandon moore': 'mandon',
    'morag': 'morag',
    'mord': 'mord',
    'mordane': 'mordane',
    'morgan': 'morgan',
    'maege mormont': None,
    'moro': None,
    'moro%27s bloodrider 1': None,
    'moro%27s bloodrider 2': None,
    'moro%27s wife 1': None,
    'moro%27s wife 2': None,
    'mossador': 'mossador',
    'mother of dragons': None,
    'mully': None,
    'mycah': 'mycah',
    'myranda': 'myranda',
    'mance rayder': 'mance',
    'meera reed': 'meera',
    'matthos seaworth': 'matthos',
    'melessa tarly': 'melessa',
    'meryn trant': 'meryn trant',
    'margaery tyrell%27s handmaiden': None,
    'mace tyrell': 'mace tyrell',
    'margaery tyrell': 'margaery',
    'mag mar tun doh weg': None,

    # N
    'neyela frey': None,
    'nicho': None,
    'night king': None,
    'night%27s watch man': "night's watch",
    'night%27s watch messenger': None,
    'night%27s watch officer': None,
    'nora': None,
    'northern lord': None,
    'nymeria': 'nymeria',
    'nymeria sand': None,
    'ned umber': 'ned',

    # O
    'osmund kettleblack': None,
    'oberyn martell': 'oberyn',
    'old nan': 'old nan',
    'old man': 'old man',
    'old woman': None,
    'old woman prisoner': None,
    'olly': 'olly',
    'olly%27s mother': "olly's mother",
    'olyvar': 'olyvar',
    'orell': 'orell',
    'ornela': None,
    'orphan kid 1': None,
    'orphan kid 2': None,
    'osha': 'osha',
    'othor': None,
    'oznak zo pahl': None,
    'obara sand': 'obara',
    'olenna tyrell': 'olenna',
    'othell yarwyck': 'othell yarwick',

    # P
    'petyr baelish': 'petyr baelish',
    'petyr baratheon': 'petyr',
    'prendahl na ghezn': 'prendahl',
    'preston greenfield': None,
    'palla': None,
    'podrick payne': 'podrick',
    'pentoshi servant': None,
    'polliver': 'polliver',
    'portan': 'portan',
    'pyat pree': 'pyat pree',
    'prince of dorne': None,
    'protestor': 'protester',
    'pycelle': 'pycelle',
    'pypar': 'pypar',

    # Q
    'qartheen woman': None,
    'qhono': None,
    'qhorin': 'qhorin',
    'qorro': None,
    'qotho': 'qotho',
    'quaithe': 'quaithe',
    'quent': 'quent',
    'qyburn': 'qyburn',

    # R
    'robin arryn': 'robin arryn',
    'renly baratheon': 'renly baratheon',
    'robert baratheon': 'robert baratheon',
    'roose bolton%27s son': 'ramsay',
    'ramsay bolton': 'ramsay',
    'roose bolton': 'roose bolton',
    'rodrik cassel': 'rodrik cassel',
    'razdal mo eraz': 'razdal',
    'robett glover': 'robett glover',
    'rickard karstark': 'rickard karstark',
    'ralf kenning': 'ralf',
    'reginald lannister': 'reginald',
    'rakharo': 'rakharo',
    'rast': 'rast',
    'ray': 'ray',
    'red priestess': 'red priestess',
    'rennick': 'rennick',
    'rhaegal': None,
    'rhaego': None,
    'rhalko': None,
    'riddell': None,
    'riverlands traveller': None,
    'ryger rivers': 'ryger rivers',
    'rorge': 'rorge',
    'ros': 'ros',
    'ros%27s dissatisfied customer': None,
    'royal steward': None,
    'robar royce': None,
    'rickard stark': 'rickard',
    'rickon stark': 'rickon stark',
    'robb stark': 'robb stark',
    'rhaegar targaryen': 'rhaegar',
    'randyll tarly': 'randyll',
    'roslin tully': 'roslin',

    # S
    'selyse baratheon': 'selyse',
    'shireen baratheon': 'shireen',
    'stannis baratheon': 'stannis',
    'sandor clegane': 'sandor clegane',
    'syrio forel': 'syrio forel',
    'sarra frey': 'sarra',
    'serra frey': None,
    'shirei frey': None,
    'stevron frey': 'stevron frey',
    'salladhor saan': 'salladhor',
    'sally': None,
    'sam': 'sam',
    'sandhu': None,
    'sarra': 'sarra',
    'scolera': None,
    'septon': 'septon',
    'ser pounce': None,
    'shae': 'shae',
    'shagga': 'shagga',
    'shaggydog': None,
    'silk king': 'silk king',
    'simpson': None,
    'singing lannister soldier': None,
    'sissy': 'sissy',
    'slave': 'slaves',
    'slaver': 'slaver',
    'smitty': None,
    'son of the harpy': None,
    'sorcerer': None,
    'spice king': 'spice king',
    'stableboy': 'stable boy',
    'stark': None,
    'stark guard': 'stark guard',
    'stark messenger': None,
    'stark soldier': 'soldier',
    'sansa stark': 'sansa stark',
    'steelshanks': 'steelshanks walton',
    'steffon': None,
    'steve': None,
    'stiv': None,
    'samwell stone': None,
    'stonesnake': None,
    'stranger': None,
    'street tough 1': None,
    'street tough 2': None,
    'stygg': None,
    'styr': 'styr',
    'summer': None,
    'samwell tarly': 'samwell tarly',
    'smalljon umber': 'smalljon',

    # T
    'tommard baratheon': 'tomard',
    'tommen baratheon': 'tommen',
    'theon greyjoy': 'theon greyjoy',
    'torrhen karstark': None,
    'tyrion lannister': 'tyrion lannister',
    'tywin lannister': 'tywin lannister',
    'trystane martell': 'trystane',
    'tobho mott': 'tobho mott',
    'tycho nestoris': 'tycho',
    'tyene sand': 'tyene',
    'talisa stark': 'talisa',
    'tansy': 'tansy',
    'talla tarly': 'talla',
    'teela': 'teela',
    'ternesio terys': None,
    'thenn warg': 'thenn warg',
    'theo': None,
    'thin man': 'thin man',
    'thoros': 'thoros',
    'tickler': 'tickler',
    'timett': 'timett',
    'todder': None,
    'tom': None,
    'tomard': 'tomard',
    'tommy': None,
    'tormund': 'tormund',
    'tortured prisoner': None,
    'tortured slave': None,
    'tourney herald': None,
    'tully bannerman': None,
    'tyrell lady': None,
    'tyrell servant': None,

    # U
    'une': None,
    'unella': 'unella',
    'unsullied': 'unsullied',
    'urzen': None,

    # V
    'vance corbray': None,
    'vardis egen': 'vardis egen',
    'vayon poole': None,
    'viserys targaryen': 'viserys targaryen',
    'vala': 'vala',
    'valyrian slave': None,
    'valyrian sword smith': None,
    'varly': 'varly',
    'varys': 'varys',
    'violet': 'violet',
    'viserion': None,

    # W
    'walda bolton': 'walda',
    'walda frey': 'walda',
    'walder frey': 'walder frey',
    'waldra frey': None,
    'willem lannister': 'willem',
    'wendel manderly': None,
    'wyman manderly': None,
    'wex pyke': None,
    'walder rivers': None,
    'waymar royce': 'waymar royce',
    'waif': 'waif',
    'wallen': None,
    'warlock': None,
    'weasel': None,
    'westerosi trader': None,
    'weyland': None,
    'white rat': 'white rat',
    'white walker': None,
    'white walker 1': None,
    'white walker 2': None,
    'wildling girl': None,
    'will': 'will',
    'willa': 'willa',
    'wineseller': 'wine merchant',
    'winter town boy': None,
    'winter town man': None,
    'winterfell girl': None,
    'winterfell shepherd': 'winterfell shepherd',
    'winterfell spy': None,
    'willis wode': None,
    'wolkan': 'wolkan',
    'wounded lannister soldier': 'wounded soldier',
    'wun weg wun dar wun': 'wun wun',
    'wyl': None,

    # X
    'xaro xhoan daxos': 'xaro',

    # Y
    'yara greyjoy': 'yara',
    'yezzan zo qaggaz': 'yezzan',
    'yohn royce': 'yohn royce',
    'ygritte': 'ygritte',
    'yoren': 'yoren',
    'yunkai%27i whore': None,

    # Z
    'zalla': None,
    'zanrush': None,
}



In [None]:
import pickle
# save to file
with open('mapper.pkl', 'wb') as f:
    pickle.dump(mapper, f)

In [105]:
# load from file
with open('mapper.pkl', 'rb') as f:
    mapper = pickle.load(f)

In [106]:
# add all items in speaker mapper to the mapper if they are not already in the mapper
for key, value in speaker_map.items():
    if key not in mapper:
        mapper[key] = value


In [107]:
mapper

{'aemon': 'aemon',
 'aggar': None,
 'aggo': None,
 'alanna': None,
 'allo': None,
 'anara': None,
 'anguy': 'anguy',
 'archmaester': None,
 'armeca': None,
 'arthur': 'arthur',
 'ash': None,
 'aya': None,
 'arthur dayne': None,
 'axell florent': 'axell florent',
 'arwaya frey': None,
 'aeron greyjoy': 'aeron',
 'areo hotah': 'areo',
 'adrack humble': None,
 'alys karstark': None,
 'alton lannister': 'alton',
 'amory lorch': 'amory',
 'addam marbrand': 'addam marbrand',
 'arys oakheart': None,
 'aron santagar': None,
 'arya stark': 'arya stark',
 'aberdolf strongbeard': None,
 'aerys ii targaryen': None,
 'alliser thorne': 'alliser thorne',
 'anya waynwood': None,
 'balerion': None,
 'balian': None,
 'bannen': None,
 'baratheon general': None,
 'baratheon soldier': None,
 'barra': None,
 'bastard%27s girls': None,
 'bernadette': None,
 'bianca': 'bianca',
 'billy': None,
 'biter': None,
 'black lorren': 'black lorren',
 'blood': None,
 'boros blount': None,
 'boake': None,
 'boat comman

In [108]:
# for each character, get the dialogue mapping from the mapper
mapped_characters = []
for i, row in names.iterrows():
    mapped_characters.append(mapper[row['lower']])

names['mapped'] = mapped_characters


In [109]:
# amount of mapped in names that is not None
names_filtered =names.loc[names['mapped'].notna()]

# select rows in characters df that have dialouge
characters_with_dialouges = characters[characters['Name'].isin(names_filtered['Name'])]

# add mapped column to characters_with_dialouges
characters_with_dialouges['mapped'] = names_filtered['mapped']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  characters_with_dialouges['mapped'] = names_filtered['mapped']


In [110]:
speakers

Unnamed: 0,Speaker,lower,bracket
0,,,
1,WAYMAR ROYCE,waymar royce,
2,WILL,will,
3,GARED,gared,
4,ROYCE,royce,
...,...,...,...
953,RIVERLANDS LORD,riverlands lord,
954,DORNISH PRINCE,dornish prince,
955,IRONBORN LORD,ironborn lord,
956,VALE LORD,vale lord,


In [111]:
speakers_mapped = speakers.loc[speakers.lower.isin(names_filtered.mapped.values)]

speakers_mapped

Unnamed: 0,Speaker,lower,bracket
1,WAYMAR ROYCE,waymar royce,
2,WILL,will,
3,GARED,gared,
5,JON,jon snow,
7,SANSA,sansa stark,
...,...,...,...
933,JON(V.O.),jon snow,V.O.
934,LYANNA (O.S.),lyanna mormont,O.S.
936,PODRICK (V.O.),podrick,V.O.
943,WILLA,willa,


In [112]:
speakers_mapped = speakers.loc[speakers.lower.isin(names_filtered.mapped.values)]
    
dialogues_subset = dialogues.loc[dialogues['Speaker'].isin(speakers_mapped['Speaker'])]

dialouges_with_characters = dialogues_subset.merge(
    speakers_mapped[['Speaker', 'lower', 'bracket']],
    on='Speaker',
    how='left'
)

# rename lower to mapped
dialouges_with_characters.rename(columns={'lower': 'mapped'}, inplace=True)

In [115]:
# save to data folder
dialouges_with_characters.to_csv('../data/dialouges_w_map.csv', index=False)
characters_with_dialouges.to_csv('../data/characters_w_map.csv', index=False)


In [113]:
characters_with_dialouges

Unnamed: 0,Name,Born,Died,House(s),Affiliation(s),Title(s),Culture,Father,Mother,Sibling(s),...,Religion,Issue,Also known as,Personal arms,Species,Affiliation,Lover(s),Family,Dead,mapped
0,Aemon,"Aemon Targaryen 202 AC Red Keep , King's Landing","302 AC (aged 100) Castle Black , the Gift",Targaryen (renounced),"['Order of Maesters', ""Night's Watch""]",Prince (formerly) Maester,Valyrian,{ Maekar I Targaryen },{ Dyanna Dayne },{ Daeron Targaryen } { Aerion Targaryen } { Ae...,...,,,,,,,,,1,aemon
7,Anguy,"Dornish Marches , the Stormlands",,,['Brotherhood Without Banners'],,Marcher,,,,...,Lord of Light,,,,,,,,0,anguy
11,Lysa Arryn,"Lysa Tully Riverrun , the Riverlands","301 AC The Eyrie , the Vale of Arryn",Tully Arryn (by marriage) Baelish (by marriage),['Unknown'],,Rivermen,{ Hoster Tully },{ Minisa Whent },,...,Faith of the Seven,Robin Arryn,,,,,,,1,lysa arryn
12,Robin Arryn,"289 AC (age 16) Red Keep , King's Landing",,Arryn,['Royce'],Lord of the Eyrie Defender of the Vale Warden ...,Valemen,{ Jon Arryn },{ Lysa Tully },,...,,,Sweetrobin,,,,,,0,robin arryn
13,Arthur,,,,"['Varys', 'Qyburn']",,,,,,...,,,,,,,,,0,arthur
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
585,Wounded Lannister soldier,Lannisport,,,['Lannister'],,Westermen,Fisherman,,,...,Faith of the Seven,,,,,,,,0,wounded soldier
586,Wun Weg Wun Dar Wun,Beyond the Wall,"303 AC Winterfell , the North",,"['Mance Rayder', 'Jon Snow']",,Giant,,,,...,Old Gods,,Wun Wun,,,,,,1,wun wun
588,Othell Yarwyck,The Westerlands,303 AC Castle Black,Yarwyck,"[""Night's Watch""]",First Builder,Westermen,,,,...,Faith of the Seven,,,,,,,,1,othell yarwick
589,Ygritte,Beyond the Wall,"301 AC Castle Black , the Wall",,['Mance Rayder'],,Free Folk,,,,...,Old Gods,,,,,,Jon Snow,,1,ygritte


In [114]:
dialouges_with_characters

Unnamed: 0,Text,Speaker,Episode,Season,Show,sentiment_score,sentiment_label,mapped,bracket
0,What d’you expect? They’re savages. One lot s...,WAYMAR ROYCE,e1-Winter is Coming,season-01,Game-of-Thrones,2.843596,NEGATIVE,waymar royce,
1,I’ve never seen wildlings do a thing like thi...,WILL,e1-Winter is Coming,season-01,Game-of-Thrones,2.763056,POSITIVE,will,
2,How close did you get?,WAYMAR ROYCE,e1-Winter is Coming,season-01,Game-of-Thrones,0.582696,NEGATIVE,waymar royce,
3,Close as any man would.,WILL,e1-Winter is Coming,season-01,Game-of-Thrones,2.810488,POSITIVE,will,
4,We should head back to the wall.,GARED,e1-Winter is Coming,season-01,Game-of-Thrones,3.263553,NEGATIVE,gared,
...,...,...,...,...,...,...,...,...,...
20781,Uh the Archmaester is less than enthusiastic ...,SAM,e6,season-08,Game-of-Thrones,4.652675,NEGATIVE,samwell tarly,
20782,"Well, I imagine he isn't using them properly.",BRONN,e6,season-08,Game-of-Thrones,4.785044,NEGATIVE,bronn,
20783,I think we can all agree that ships take prec...,BRIENNE,e6,season-08,Game-of-Thrones,0.476380,POSITIVE,brienne,
20784,I think that's a very presumptuous statement.,BRONN,e6,season-08,Game-of-Thrones,3.653502,POSITIVE,bronn,


In [93]:
dialouges_with_characters.loc[dialouges_with_characters['mapped']=='jon snow']


Unnamed: 0,Text,Speaker,Episode,Season,Show,sentiment_score,sentiment_label,mapped,bracket
10,Go on. Father’s watching.,JON,e1-Winter is Coming,season-01,Game-of-Thrones,3.178272,POSITIVE,jon snow,
11,And your mother.,JON,e1-Winter is Coming,season-01,Game-of-Thrones,2.477227,POSITIVE,jon snow,
13,"Don’t think too much, Bran.",JON,e1-Winter is Coming,season-01,Game-of-Thrones,4.272757,NEGATIVE,jon snow,
21,Don’t look away.,JON (to BRAN),e1-Winter is Coming,season-01,Game-of-Thrones,3.833535,POSITIVE,jon snow,to BRAN
22,Father will know if you do.,JON,e1-Winter is Coming,season-01,Game-of-Thrones,3.365271,POSITIVE,jon snow,
...,...,...,...,...,...,...,...,...,...
20727,"You can come see me, you know, at Castle Black.",JON,e6,season-08,Game-of-Thrones,3.797649,POSITIVE,jon snow,
20729,You think anyone will dare tell you women are...,JON,e6,season-08,Game-of-Thrones,2.874924,NEGATIVE,jon snow,
20733,(chuckles) I don't know.,JON,e6,season-08,Game-of-Thrones,2.659033,NEGATIVE,jon snow,
20735,You have your Needle?,JON,e6,season-08,Game-of-Thrones,1.881338,NEGATIVE,jon snow,
