In [351]:
import re
import csv
import networkx as nx
import matplotlib.pyplot as plt
import os

def get_active_nodes(G):
    activenodes = set()
    for edge in G.edges(data=True):
        #print(edge)
        activenodes.add(edge[0])
        activenodes.add(edge[1])
    return activenodes

def episode(s, e, name = 'GoT_S0'):
    if e < 10:
        stt = name +str(s)+'E0'+str(e) 
    else:
        stt = name +str(s)+'E'+str(e) 
    #print(stt)
    edic = {}
    for f in files:
        if f.startswith(stt):
            #en = int(f[11:14])
            en = int(f.split('.')[0].split('_')[-1])
            #print(f, en)
            edic[en] = f
    return edic
 
def readG(f, path = 'Data/GoT_dyn_ts10/'):
    #print(f)
    return nx.read_graphml( path + f)

def histogram(G):
    hist = {}
    dict = {}
    for node in G.nodes(data=True):
        dict[node[0]] = node[1]['label']
    #print(dict)
    for edge in G.edges(data=True):
        for i in range(2):
            node = dict[edge[i]]
            #print(node)
            try:
                hist[node] += 1
            except:
                hist[node] = 1
    return hist

def aggregate_density(s, e, name = 'GoT_S0', path = 'Data/GoT_dyn_ts10/'):
    edic = episode(s,e, name)
    sorted_x = sorted(edic.items(), key=lambda kv: kv[1])
   
    total_edges = 0
    potential_edges = 0
    
    #print(sorted_x)
    for k, v in sorted_x:
        g = readG(v, path)
        
        num_actnodes = len(get_active_nodes(g))
        num_edges = len(g.edges(data=True))
        
        total_edges += num_edges
        potential_edges += num_actnodes*(num_actnodes-1)/2
       
    return total_edges/potential_edges

def average_density(s, e, name = 'GoT_S0', path = 'Data/GoT_dyn_ts10/'):
    edic = episode(s,e,name)
    sorted_x = sorted(edic.items(), key=lambda kv: kv[1])
    #print(sorted_x)
    dns = []
    #print(sorted_x)
    for k, v in sorted_x:
        g = readG(v, path)
        #h = histogram(g)
        #print(v)
        dns.append(nx.density(g))
    return sum(dns)/len(dns)
    
def node_dict(g):
    dict = {}
    for node in g.nodes(data=True):
        dict[node[0]] = node[1]['label']
    return dict
            
def active_nodes(s, e, name = 'GoT_S0', path = 'Data/GoT_dyn_ts10/'):
    edic = episode(s,e, name)
    sorted_x = sorted(edic.items(), key=lambda kv: kv[1])
    
    act_nodes = set()
    #print(sorted_x)
    for k, v in sorted_x:
        g = readG(v, path)
        activenodes = get_active_nodes(g)
        nodedict = node_dict(g)
        for n in activenodes:
            act_nodes.add(nodedict[n])
    #print(act_nodes)
    return act_nodes
     
def degree_data(g):
    G_deg = nx.degree_histogram(g)
    G_deg_sum = [a * b for a, b in zip(G_deg, range(0, len(G_deg)))]
    avg_degree = sum(G_deg_sum) / g.number_of_nodes()
    return avg_degree
    
def average_degree(s, e, name = 'GoT_S0', path = 'Data/GoT_dyn_ts10/'):
    edic = episode(s,e,name)
    sorted_x = sorted(edic.items(), key=lambda kv: kv[1])
    
    degrees = []
    act_nodes = set()
    #print(sorted_x)
    for k, v in sorted_x:
        g = readG(v, path)
        avg_degree = degree_data(g)
        degrees.append(avg_degree)
    #print(degrees)
    return sum(degrees)/len(degrees)
    
def get_corr(review):
    dict = {'Network Metrics':[],
            'Correlation':[],
            'pValue':[]
           }

    for col in ['avg_density', 'agg_density', 'active_nodes', 'avg_degrees']:
        corr, pval = pearsonr(review[col], review['Review'])
        dict['Network Metrics'].append(col)
        dict['Correlation'].append(corr)
        dict['pValue'].append(pval)

    df_corr = pd.DataFrame(dict)
    return df_corr

#print(average_dens)
#print(aggregate_dens)
#print(num_actnodes)
#print(average_degrees)

# Game of Throne

In [353]:
import pandas as pd
from scipy.stats import pearsonr

average_dens = []
aggregate_dens = []
num_actnodes = []
average_degrees = []

files = os.listdir("Data/GoT_dyn_ts10")
for S in range(1,4):
    for E in range(1,11):
        if S == 3 and E > 2:
            break
        avg_density = average_density(S,E)
        agg_density = aggregate_density(S, E)
        #print(S, E, dd)
        
        average_dens.append(avg_density)
        aggregate_dens.append(agg_density)
        
        num_actnodes.append(len(active_nodes(S, E)))
        average_degrees.append(average_degree(S, E))

GoT_review = pd.read_csv('GoT.csv').drop('Unnamed: 0', 1)  
review = GoT_review.head(len(average_dens))

review2 = review.assign(avg_density = average_dens)
review2 = review2.assign(agg_density = aggregate_dens)
review2 = review2.assign(active_nodes = num_actnodes)
review2 = review2.assign(avg_degrees = average_degrees)

review2

Unnamed: 0,Episode,Review,avg_density,agg_density,active_nodes,avg_degrees
0,Winter Is Coming,9.001235,0.000815,0.150334,30,0.165441
1,The Kingsroad,8.701235,0.000883,0.179638,27,0.179216
2,Lord Snow,8.601235,0.001148,0.141691,46,0.233115
3,"Cripples, Bastards, and Broken Things",8.701235,0.000932,0.111754,48,0.189291
4,The Wolf and the Lion,9.001235,0.00109,0.122671,40,0.221289
5,A Golden Crown,9.101235,0.001135,0.099788,42,0.230392
6,You Win or You Die,9.201235,0.000766,0.111297,38,0.155462
7,The Pointy End,9.001235,0.001067,0.099767,53,0.216635
8,Baelor,9.601235,0.001314,0.110849,50,0.26677
9,Fire and Blood,9.501235,0.001083,0.079656,49,0.219771


In [354]:
get_corr(review2)

Unnamed: 0,Network Metrics,Correlation,pValue
0,avg_density,-0.313642,0.155199
1,agg_density,0.196934,0.37971
2,active_nodes,-0.400721,0.064579
3,avg_degrees,-0.313642,0.155199


# House of Cards

In [355]:
import pandas as pd
from scipy.stats import pearsonr

average_dens = []
aggregate_dens = []
num_actnodes = []
average_degrees = []

files = os.listdir("Data/HoC_dyn_ts10")

for S in range(1,3):
    for E in range(1,14):
        avg_density = average_density(S,E, 'HoC_S0', 'Data/HoC_dyn_ts10/' )
        agg_density = aggregate_density(S, E, 'HoC_S0', 'Data/HoC_dyn_ts10/' )
        #print(S, E, dd)
        
        average_dens.append(avg_density)
        aggregate_dens.append(agg_density)
        
        num_actnodes.append(len(active_nodes(S, E, 'HoC_S0', 'Data/HoC_dyn_ts10/' )))
        average_degrees.append(average_degree(S, E, 'HoC_S0', 'Data/HoC_dyn_ts10/' ))

HoC_review = pd.read_csv('HoC.csv').drop('Unnamed: 0', 1)  
review = HoC_review.head(len(average_dens))

review2 = review.assign(avg_density = average_dens)
review2 = review2.assign(agg_density = aggregate_dens)
review2 = review2.assign(active_nodes = num_actnodes)
review2 = review2.assign(avg_degrees = average_degrees)

review2

Unnamed: 0,Episode,Review,avg_density,agg_density,active_nodes,avg_degrees
0,Chapter 1,8.601235,0.000326,0.164139,30,0.082831
1,Chapter 2,8.501235,0.000437,0.144969,33,0.111111
2,Chapter 3,8.301235,0.000388,0.151019,33,0.098475
3,Chapter 4,8.201235,0.000346,0.168726,31,0.087883
4,Chapter 5,8.401235,0.000496,0.154327,36,0.125882
5,Chapter 6,8.501235,0.00041,0.168845,33,0.104202
6,Chapter 7,8.101235,0.000437,0.138301,34,0.111111
7,Chapter 8,7.701235,0.000349,0.177536,19,0.088688
8,Chapter 9,8.501235,0.000472,0.209886,23,0.119888
9,Chapter 10,8.701235,0.000299,0.234228,21,0.076035


In [357]:
get_corr(review2)


Unnamed: 0,Network Metrics,Correlation,pValue
0,avg_density,-0.169385,0.408111
1,agg_density,0.033663,0.870321
2,active_nodes,-0.158587,0.439059
3,avg_degrees,-0.169385,0.408111


# Breaking Bad

In [358]:
import pandas as pd
from scipy.stats import pearsonr

average_dens = []
aggregate_dens = []
num_actnodes = []
average_degrees = []

files = os.listdir("Data/BB_dyn_ts10")

for S in range(1,4):
    for E in range(1,14):
        try:
            avg_density = average_density(S,E, 'BB_S0', 'Data/BB_dyn_ts10/' )
            agg_density = aggregate_density(S, E, 'BB_S0', 'Data/BB_dyn_ts10/' )
            #print(S, E, dd)

            average_dens.append(avg_density)
            aggregate_dens.append(agg_density)

            num_actnodes.append(len(active_nodes(S, E, 'BB_S0', 'Data/BB_dyn_ts10/' )))
            average_degrees.append(average_degree(S, E, 'BB_S0', 'Data/BB_dyn_ts10/' ))
        except:
            pass

HoC_review = pd.read_csv('BB.csv').drop('Unnamed: 0', 1)  
review = HoC_review.head(len(average_dens))

review2 = review.assign(avg_density = average_dens)
review2 = review2.assign(agg_density = aggregate_dens)
review2 = review2.assign(active_nodes = num_actnodes)
review2 = review2.assign(avg_degrees = average_degrees)

review2

Unnamed: 0,Episode,Review,avg_density,agg_density,active_nodes,avg_degrees
0,Pilot,9.001235,0.001695,0.221981,18,0.21187
1,Cat's in the Bag...,8.601235,0.000825,0.55914,8,0.103175
2,...And the Bag's in the River,8.701235,0.001697,0.242975,14,0.212121
3,Cancer Man,8.201235,0.001794,0.221843,24,0.224293
4,Gray Matter,8.301235,0.001571,0.226804,24,0.196429
5,Crazy Handful of Nothin',9.301235,0.002092,0.284287,23,0.261487
6,A No-Rough-Stuff-Type Deal,8.801235,0.001673,0.237461,21,0.20911
7,Seven Thirty-Seven,8.601235,0.001354,0.347826,15,0.169312
8,Grilled,9.301235,0.001562,0.341667,12,0.195238
9,Bit by a Dead Bee,8.301235,0.001892,0.222057,20,0.236508


In [359]:
get_corr(review2)

Unnamed: 0,Network Metrics,Correlation,pValue
0,avg_density,0.18505,0.365456
1,agg_density,0.146404,0.475435
2,active_nodes,-0.138513,0.499797
3,avg_degrees,0.18505,0.365456


# GETTING REVIEWS FROM IMDB

In [347]:
import imdb

# creating instance of IMDb
ia = imdb.IMDb()
 
def get_review(code, num_seasons, num_epsodes):
    # id
    #code = "1856010"

    # getting information
    series = ia.get_movie(code)

    # adding new info set
    ia.update(series, 'episodes')

    # getting episodes of the series
    episodes = series.data['episodes']

    # printing the object i.e name
    print(series)

    dict = {'Episode':[],
            'Review':[]
           }

    # getting season
    for s in range(1,num_seasons):
        season = episodes[s]

        # getting single episode of season
        for i in range(1,num_epsodes):
            try:
                epi = season[i]

                # getting id and printing it
                #print(epi['title'], epi.data['rating'])
                dict['Episode'].append(epi['title'])
                dict['Review'].append(epi.data['rating'])
            except:
                pass

    df_review = pd.DataFrame(dict)
    return df_review

HoC_review = get_review("1856010", 3, 14)
HoC_review

House of Cards


Unnamed: 0,Episode,Review
0,Chapter 1,8.601235
1,Chapter 2,8.501235
2,Chapter 3,8.301235
3,Chapter 4,8.201235
4,Chapter 5,8.401235
5,Chapter 6,8.501235
6,Chapter 7,8.101235
7,Chapter 8,7.701235
8,Chapter 9,8.501235
9,Chapter 10,8.701235


In [348]:
HoC_review = get_review("1856010", 3, 14)
HoC_review.to_csv("HoC.csv")
GoT_review = get_review("0944947", 4, 11)
GoT_review.to_csv("GoT.csv")
BB_review = get_review("0903747", 4, 14)
BB_review.to_csv("BB.csv")


House of Cards
Game of Thrones
Breaking Bad


In [349]:
HoC_review = pd.read_csv('BB.csv')  
HoC_review.drop('Unnamed: 0', 1)

Unnamed: 0,Episode,Review
0,Pilot,9.001235
1,Cat's in the Bag...,8.601235
2,...And the Bag's in the River,8.701235
3,Cancer Man,8.201235
4,Gray Matter,8.301235
5,Crazy Handful of Nothin',9.301235
6,A No-Rough-Stuff-Type Deal,8.801235
7,Seven Thirty-Seven,8.601235
8,Grilled,9.301235
9,Bit by a Dead Bee,8.301235


In [350]:
BB_review = get_review("0903747", 4, 14)
BB_review

Breaking Bad


Unnamed: 0,Episode,Review
0,Pilot,9.001235
1,Cat's in the Bag...,8.601235
2,...And the Bag's in the River,8.701235
3,Cancer Man,8.201235
4,Gray Matter,8.301235
5,Crazy Handful of Nothin',9.301235
6,A No-Rough-Stuff-Type Deal,8.801235
7,Seven Thirty-Seven,8.601235
8,Grilled,9.301235
9,Bit by a Dead Bee,8.301235
