In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
dialogues = pd.read_csv("../data/dialouges_w_map.csv")
characters = pd.read_csv("../data/characters_w_map.csv")


In [318]:
# Initialize a dictionary to store accumulative sentiment and counts
speaker_sentiments = {}

# Create new columns in the dialogues dataframe
dialogues['accumulative_sentiment'] = 0.0
dialogues['average_sentiment'] = 0.0

# Iterate through each row in the dialogues dataframe with a progress bar
for index, row in tqdm(dialogues.iterrows(), total=len(dialogues), desc="Processing dialogues"):
    speaker = row['mapped']
    sentiment_score = row['sentiment_score']
    sentiment_label = row['sentiment_label']
    
    # Convert sentiment_label to a score: 1 for POSITIVE, -1 for NEGATIVE
    sentiment_sign = 1 if sentiment_label == 'POSITIVE' else -1
    
    # Update the sentiment score based on the label
    adjusted_sentiment_score = sentiment_sign * sentiment_score
    
    # Initialize speaker if not present in the dictionary
    if speaker not in speaker_sentiments:
        speaker_sentiments[speaker] = {'accumulative_sentiment': 0, 'sentiment_count': 0, 'sentiment_sign_sum': 0}
    
    # Update accumulative sentiment and sentiment count
    speaker_sentiments[speaker]['accumulative_sentiment'] += adjusted_sentiment_score
    speaker_sentiments[speaker]['sentiment_count'] += 1
    speaker_sentiments[speaker]['sentiment_sign_sum'] += sentiment_sign
    
    # Calculate average sentiment so far
    average_sentiment_so_far = speaker_sentiments[speaker]['sentiment_sign_sum'] / speaker_sentiments[speaker]['sentiment_count']
    
    # Update the dialogues dataframe with the calculated values
    dialogues.at[index, 'accumulative_sentiment'] = speaker_sentiments[speaker]['accumulative_sentiment']
    dialogues.at[index, 'average_sentiment'] = average_sentiment_so_far

# Save the updated dialogues dataframe
dialogues.to_csv("../data/dialouges_over_time.csv", index=False)

print("Updated dialogues dataframe has been saved.")


Processing dialogues: 100%|██████████| 20786/20786 [00:02<00:00, 7787.22it/s]


Updated dialogues dataframe has been saved.


In [368]:
# Count the number of lines for each character
line_counts = dialogues['mapped'].value_counts()

# Get the top 20 characters with the most lines
top_20_characters = line_counts.head(20).reset_index()

# Rename columns for clarity
top_20_characters.columns = ['character', 'line_count']

In [369]:
top_20_characters

Unnamed: 0,character,line_count
0,tyrion lannister,1752
1,jon snow,1134
2,daenerys targaryen,967
3,cersei lannister,940
4,jaime lannister,931
5,sansa stark,800
6,arya stark,776
7,samwell tarly,553
8,davos,505
9,theon greyjoy,478


In [370]:
# Select the last row for each unique speaker
end_sentiment_df = dialogues.groupby('mapped').tail(1)

# Create a new DataFrame with only the unique characters and their final scores
end_sentiment = end_sentiment_df[['mapped', 'accumulative_sentiment', 'average_sentiment']].reset_index(drop=True)

# Rename columns for clarity (optional)
end_sentiment.rename(columns={
    'mapped': 'speaker',
    'accumulative_sentiment': 'final_accumulative_sentiment',
    'average_sentiment': 'final_average_sentiment'
}, inplace=True)



In [371]:
df =end_sentiment.loc[end_sentiment['speaker'].isin(top_20_characters['character'])].sort_values(by='final_average_sentiment', ascending=False)
#df = end_sentiment.sort_values(by='final_average_sentiment', ascending=False)

df_dead = []


for index, row in df.iterrows():
    name_map = row['speaker']

    # check if the character is dead
    if pd.isna(characters.loc[characters['mapped']==name_map].Died.values[0]):
        df_dead.append(False)
    else:
        df_dead.append(True)
    
df['dead'] = df_dead


In [372]:
df

Unnamed: 0,speaker,final_accumulative_sentiment,final_average_sentiment,dead
197,margaery,232.034402,0.192857,True
216,ramsay,104.451694,0.096296,True
246,jorah mormont,106.587335,0.018373,True
269,davos,41.694621,-0.005941,False
253,varys,-18.565441,-0.051546,True
270,samwell tarly,-52.44245,-0.063291,False
272,bronn,-88.092152,-0.071605,False
273,tyrion lannister,-239.559026,-0.078767,False
267,jon snow,-204.24694,-0.097002,True
254,jaime lannister,-234.029204,-0.106337,True


In [373]:

df.loc[(df['final_average_sentiment']!=1.0) & (df['final_average_sentiment']!=-1.0)].iloc[:40]

Unnamed: 0,speaker,final_accumulative_sentiment,final_average_sentiment,dead
197,margaery,232.034402,0.192857,True
216,ramsay,104.451694,0.096296,True
246,jorah mormont,106.587335,0.018373,True
269,davos,41.694621,-0.005941,False
253,varys,-18.565441,-0.051546,True
270,samwell tarly,-52.44245,-0.063291,False
272,bronn,-88.092152,-0.071605,False
273,tyrion lannister,-239.559026,-0.078767,False
267,jon snow,-204.24694,-0.097002,True
254,jaime lannister,-234.029204,-0.106337,True


In [342]:
pd.isna(characters.loc[characters['mapped']=='samwell tarly'].Died.values[0])

True

In [327]:
# function that print all dialouge and sentiment for a given speaker
def print_dialouge_and_sentiment(speaker, df=dialogues, column = 'Speaker'):
    df = df.loc[df[column]== speaker]
    for index, row in df.iterrows():
        print(row['Text'], row['sentiment_label'])


In [365]:
print_dialouge_and_sentiment('tycho', df=dialogues, column = 'mapped')

 Welcome to the Iron Bank. Please, sit. POSITIVE
 What can we do for you, Lord Stannis? NEGATIVE
 The Iron Throne is currently occupied by Tommen of the House Baratheon, King of the Andals and the First Men, Lord of the Seven Kingdoms and Protector of the Realm. POSITIVE
 Yes, we have heard this story. POSITIVE
 The king's grandfather tells a different story. A story about a jealous uncle whose attempts to usurp the throne from the rightful king cost the Seven Kingdoms dearly in blood and gold. POSITIVE
 And you feel your blood gives you a claim on our gold? POSITIVE
 Across the Narrow Sea, your books are filled with words like "usurper" and "madman" and "blood right." Here, our books are filled with numbers. We prefer the stories they tell. More plain. Less open to interpretation. How many fighting men remain loyal to you? NEGATIVE
 And how many ships do you have? The ones still afloat, Ser Davos, not at the bottom of Blackwater Bay. NEGATIVE
 And how much wheat and barley and beef an

In [312]:
dialogues

Unnamed: 0,Text,Speaker,Episode,Season,Show,sentiment_score,sentiment_label,mapped,bracket,accumulative_sentiment,average_sentiment
0,What d’you expect? They’re savages. One lot s...,WAYMAR ROYCE,e1-Winter is Coming,season-01,Game-of-Thrones,2.843596,NEGATIVE,waymar royce,,-2.843596,-1.000000
1,I’ve never seen wildlings do a thing like thi...,WILL,e1-Winter is Coming,season-01,Game-of-Thrones,2.763056,POSITIVE,will,,2.763056,1.000000
2,How close did you get?,WAYMAR ROYCE,e1-Winter is Coming,season-01,Game-of-Thrones,0.582696,NEGATIVE,waymar royce,,-3.426292,-1.000000
3,Close as any man would.,WILL,e1-Winter is Coming,season-01,Game-of-Thrones,2.810488,POSITIVE,will,,5.573544,1.000000
4,We should head back to the wall.,GARED,e1-Winter is Coming,season-01,Game-of-Thrones,3.263553,NEGATIVE,gared,,-3.263553,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...
20781,Uh the Archmaester is less than enthusiastic ...,SAM,e6,season-08,Game-of-Thrones,4.652675,NEGATIVE,sam,,13.813297,-0.012469
20782,"Well, I imagine he isn't using them properly.",BRONN,e6,season-08,Game-of-Thrones,4.785044,NEGATIVE,bronn,,-91.745654,-0.074257
20783,I think we can all agree that ships take prec...,BRIENNE,e6,season-08,Game-of-Thrones,0.476380,POSITIVE,brienne,,-122.387626,-0.158730
20784,I think that's a very presumptuous statement.,BRONN,e6,season-08,Game-of-Thrones,3.653502,POSITIVE,bronn,,-88.092152,-0.071605


In [8]:
dialouges = pd.read_csv("../data/dialouges_over_time.csv")

In [26]:
dialouges.loc[dialouges['mapped']=='ygritte'].iloc[9].Text

"  Hundreds and thousands. More than you've ever seen, crow."

In [7]:
characters

Unnamed: 0,Name,Born,Died,House(s),Affiliation(s),Title(s),Culture,Father,Mother,Sibling(s),...,Religion,Issue,Also known as,Personal arms,Species,Affiliation,Lover(s),Family,Dead,mapped
0,Aemon,"Aemon Targaryen 202 AC Red Keep , King's Landing","302 AC (aged 100) Castle Black , the Gift",Targaryen (renounced),"['Order of Maesters', ""Night's Watch""]",Prince (formerly) Maester,Valyrian,{ Maekar I Targaryen },{ Dyanna Dayne },{ Daeron Targaryen } { Aerion Targaryen } { Ae...,...,,,,,,,,,1,aemon
1,Anguy,"Dornish Marches , the Stormlands",,,['Brotherhood Without Banners'],,Marcher,,,,...,Lord of Light,,,,,,,,0,anguy
2,Lysa Arryn,"Lysa Tully Riverrun , the Riverlands","301 AC The Eyrie , the Vale of Arryn",Tully Arryn (by marriage) Baelish (by marriage),['Unknown'],,Rivermen,{ Hoster Tully },{ Minisa Whent },,...,Faith of the Seven,Robin Arryn,,,,,,,1,lysa arryn
3,Robin Arryn,"289 AC (age 16) Red Keep , King's Landing",,Arryn,['Royce'],Lord of the Eyrie Defender of the Vale Warden ...,Valemen,{ Jon Arryn },{ Lysa Tully },,...,,,Sweetrobin,,,,,,0,robin arryn
4,Arthur,,,,"['Varys', 'Qyburn']",,,,,,...,,,,,,,,,0,arthur
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,Wounded Lannister soldier,Lannisport,,,['Lannister'],,Westermen,Fisherman,,,...,Faith of the Seven,,,,,,,,0,wounded soldier
299,Wun Weg Wun Dar Wun,Beyond the Wall,"303 AC Winterfell , the North",,"['Mance Rayder', 'Jon Snow']",,Giant,,,,...,Old Gods,,Wun Wun,,,,,,1,wun wun
300,Othell Yarwyck,The Westerlands,303 AC Castle Black,Yarwyck,"[""Night's Watch""]",First Builder,Westermen,,,,...,Faith of the Seven,,,,,,,,1,othell yarwick
301,Ygritte,Beyond the Wall,"301 AC Castle Black , the Wall",,['Mance Rayder'],,Free Folk,,,,...,Old Gods,,,,,,Jon Snow,,1,ygritte
