### Imports

In [None]:
import sqlite3
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob

### Spacy Language Model & Pipeline Configuration

In [None]:
nlp = spacy.load('en_core_web_sm')
nlp.add_pipe("spacytextblob")

### DB Connection

In [None]:
conn = sqlite3.connect('hp-dataset-with-relations.sqlite')
c = conn.cursor()

# Sentiment Analysis

In [None]:
# define function to get sentiment of a table
def calculate_sentiment(text):
    analysed = [nlp(line) for line in text]
    polarities = [l._.polarity for l in analysed]
    assessments = [l._.assessments for l in analysed]
    #return assessments
    return polarities

In [None]:
# get namelist of all available views in database
views = c.execute("SELECT tbl_name FROM sqlite_master where type='view'").fetchall()
views_names = [table[0] for table in views] 
views_names.remove('all_parts')

In [None]:
# test single table
sql = c.execute("SELECT dialog FROM harry_voldemort").fetchall()  
dialog = [i[0] for i in sql]
res = calculate_sentiment(dialog)
#print(res)

In [None]:
#call function with every view
for table in views_names:
    sql = c.execute("SELECT dialog FROM " + table).fetchall()  
    dialog = [i[0] for i in sql]
    result = calculate_sentiment(dialog)
    narr = np.array(result)
    mean = np.mean(narr)
    
    plt.title(table + ' ' + str(mean))
    x_axis = (-1, 1)
    if table.startswith('harry'):
        plt.hist(narr, range = x_axis, facecolor='g')
    else:
        plt.hist(narr, range = x_axis, facecolor='r')
        
    #plt.savefig(table + '.jpg')
#print(table + ': || ' + str(mean) + ' ||' + str(result))
#plt.clf()

In [None]:
#visualize relations as colored networkgraph -from Harry
tab = pd.DataFrame(columns={'from': [], 'name':[], 'mean':[]})
print(tab)
for table in views_names:
    sql = c.execute("SELECT dialog FROM " + table).fetchall()  
    dialog = [i[0] for i in sql]
    result = calculate_sentiment(dialog)
    narr = np.array(result)
    mean = np.mean(narr)
    if mean ==0 or mean <0 or mean >0:
        if table.startswith('harry'):
            tab.loc[len(tab.index)]=["H", table, mean]

In [None]:
#visualize dataframe
tab

In [None]:
# visualize sentiments as colored networkgraph -from Harry
G = nx.from_pandas_edgelist(tab, source='from', target='name', edge_attr='mean')
colors = ["black", "darkred", "lightgreen", "lightgreen", "darkred", "lightblue", "lightgreen", "darkred", "lightblue", "lightblue", "lightgreen", "lightblue","darkred", "lightblue","darkred", "lightblue", "lightblue", "lightblue", "lightgreen", "lightblue", "darkred", "lightgreen", "lightgreen", "darkred", "darkred", "darkred", "lightgreen", "darkred"]
weights = 1

nx.draw_spring(G, with_labels=True, node_shape='*', node_color = colors, node_size = 1000, edge_color='black', font_color="black")

plt.title("SENTIMENTS FROM HARRY")

from matplotlib.pyplot import figure

plt.show()

In [None]:
#visualize relations as colored networkgraph -to Harry
tab = pd.DataFrame(columns={'from': [], 'name':[], 'mean':[]})
print(tab)
for table in views_names:
    sql = c.execute("SELECT dialog FROM " + table).fetchall()  
    dialog = [i[0] for i in sql]
    result = calculate_sentiment(dialog)
    narr = np.array(result)
    mean = np.mean(narr)
    if mean ==0 or mean <0 or mean >0:
        if table.endswith('_harry'):
            tab.loc[len(tab.index)]=["H", table, mean]

In [None]:
tab

In [None]:
#visualize relations as colored networkgraph -to Harry
G=nx.from_pandas_edgelist(tab, source='from', target='name', edge_attr='mean')
colors = ["black", "lightgreen", "lightgreen","lightgreen", "lightgreen", "lightgreen", "lightgreen", "lightgreen", "lightgreen", "lightblue", "lightgreen", "lightgreen", "lightgreen","lightgreen", "lightgreen","lightgreen","lightgreen","lightgreen","lightgreen","lightgreen","lightgreen","lightgreen","lightgreen","lightgreen", "darkred", "darkred", "lightgreen", "darkred"]
    
nx.draw_spring(G, with_labels=True, node_shape='*', node_color = colors, node_size = 1000, edge_color='black', font_color="black")
plt.show()

## Topic Modeling