# What is the most popular TV show - a real world use case

In this section, we will be taking on the very fortunate position or role of a Facebook data scientist to determine, amongst other things, what the most popular TV show in our dataset is. This will test the knowledge we have gained so far and will take you through a typical use case that gets asked of a graph data scientist.

## Getting setup

In [None]:
#! pip install -r requirements.txt
import matplotlib.pyplot as plt
import igraph

## Examing the graph structure

In [None]:
# Import custom functions
from graphtastic.utils import graph_from_attributes_and_edgelist

In [None]:
node_attr_path = './data/musae_facebook_target.csv'
edgelist_path = './data/musae_facebook_edges.csv'
attributes = ['page_name', 'page_type']
# Create a graph from our wrapper functions
g = graph_from_attributes_and_edgelist(node_attr_path, edgelist_path, attributes)

In [None]:
histogram = g.degree_distribution(bin_width=5)
print(histogram)

In [None]:
import matplotlib.pyplot as plt
bins = 30
plt.hist(g.degree(), bins)
plt.xlabel('Node degree centrality')
plt.ylabel('Frequency')
plt.show()

## Measuring connectedness

In [None]:
connected_components = g.clusters()
print(connected_components)

## Looking at the top degree nodes

In [None]:
degree = list(zip(g.vs['page_name'], g.degree()))
sorted_degree = sorted(degree, key=lambda x: x[1], reverse=True)   
print(sorted_degree[:10])

In [None]:
tv_nodes = g.vs.select(page_type_eq='tvshow')
tv_indices = [node.index for node in tv_nodes]
tv_degree = list(zip(g.vs[tv_indices]['page_name'], g.degree(tv_indices)))
sorted_tv_degree = sorted(tv_degree, key=lambda x: x[1], reverse=True)
print(sorted_tv_degree[:10])

## Using `select()` to interrogate the graph

In [None]:
gov_pol_nodes = g.vs.select(page_type_in=['government', 'politician'])

In [None]:
one_to_three_100_nodes = g.vs.select(_degree_ge=100, 
                                     _degree_le=140, 
                                     page_type_eq='tvshow')

print(one_to_three_100_nodes['page_name'])

In [None]:
g.vs['node_degree'] = g.degree()
one_to_three_100_nodes = g.vs.select(node_degree_ge=100,
                                     node_degree_le=140,
                                     page_type_eq='tvshow')
print(one_to_three_100_nodes['page_name'])


## Properties of our popular nodes

In [None]:
today_show_id = g.vs.select(page_name_eq='Today Show')[0].index
print(today_show_id)

In [None]:
today_show_edges = g.es.select(_incident=[today_show_id])

In [None]:
sources = [edge.source for edge in today_show_edges]
targets = [edge.target for edge in today_show_edges]
print(sources)
print(targets)

In [None]:
neighbor_nodes = list(set(sources + targets))
neighbor_nodes.remove(909)
print(neighbor_nodes)
print(len(neighbor_nodes))

In [None]:
neighbor_page_types = g.vs[neighbor_nodes]['page_type']
print(neighbor_page_types)

In [None]:
from collections import Counter
page_type_dict = Counter(neighbor_page_types)
print(page_type_dict)

In [None]:
# Insert missing code step here

In [None]:
politician_id = [id_tuple for id_tuple in list(ids_and_page_types) if id_tuple[1] == 'politician']
print(politician_id)


In [None]:
politician_name = g.vs[22243]['page_name']
print(politician_name)
