# Video Graph Analysis

This notebook analyzes the recommendation graph. Emphasis on assortativity.

In [None]:
import os
import pandas
import json
import numpy as np
import pandas as pd

import networkx as nx
from matplotlib import pyplot as plt
from collections import Counter

### Data Import and Preparation

In [None]:
# import the graph from adjacency list
G = nx.read_adjlist(create_using=nx.DiGraph(), 
                    path="../../data/derived_data/analysis_redo/video_adjacency.txt")

# import features df
video_info = pd.read_csv('../../data/derived_data/analysis_redo/video_info.csv')

# import channel leanings and merge into features
channel_leanings = pd.read_csv('../../data/derived_data/analysis_redo/channel_classification.csv')
video_info = video_info.merge(channel_leanings, on='channel', how='left')

In [None]:
# make a category_name <-> id mapping for easier category handling
category_df = pd.read_csv('../../data/derived_data/category_crosswalk.csv')
category_list = category_df.category_id.values

categories = set(category_list)
cat_mapping = {}
for ix, category_id in enumerate(categories):
    cat_mapping[category_id] = ix

In [None]:
# make a like ratio column
def like_ratio(l, d):
    try:
        return int(l) / int(d)
    except ZeroDivisionError:
        return 0.5

video_info['lr'] = video_info.apply(lambda x: like_ratio(x['likes'], x['dislikes']), axis=1)

# Create node attributes by making dicts for each attribute of interest
category = dict(zip(video_info.video_id, video_info.category))
views = dict(zip(video_info.video_id, video_info.views))
postdate = dict(zip(video_info.video_id, video_info.postdate))
channel = dict(zip(video_info.video_id, video_info.channel))
lr = dict(zip(video_info.video_id, video_info.lr))
leaning = dict(zip(video_info.video_id, video_info.leaning))  # might have to drop NA leanings?

In [None]:
# set attributes in the graph
nx.set_node_attributes(G, name='leaning', values=leaning)
nx.set_node_attributes(G, name='category', values=category)
nx.set_node_attributes(G, name='views', values=views)
nx.set_node_attributes(G, name='like_ratio', values=lr)
nx.set_node_attributes(G, name='postdate', values=postdate)
nx.set_node_attributes(G, name='channel', values=channel)

### Assortativity

In [None]:
# assortativity
print(nx.attribute_assortativity_coefficient(G, 'category'))
print(nx.attribute_assortativity_coefficient(G, 'channel'))
print(nx.attribute_assortativity_coefficient(G, 'leaning'))
#print(nx.numeric_assortativity_coefficient(G, attribute='views'))
#print(nx.numeric_assortativity_coefficient(G, attribute='like_ratio'))

In [None]:
# mix_matrix[i,j] = count of i -> j edges
mix_matrix = nx.attribute_mixing_matrix(G, 
                                        attribute='leaning',
                                        mapping={'L': 0, 'C': 1, 'R': 2},
                                        normalized=False)

# normalize so rows sum to 1; choosing rows so that each cell can
# be interpreted as the frequency with which category i recommends a video
# in category j (row sums are the out-degree)
mix_matrix = mix_matrix / mix_matrix.sum(axis=1, keepdims=True)

In [None]:
plt.figure(figsize=(7,7))
plt.imshow(mix_matrix)
plt.colorbar(shrink=0.8)
plt.xticks(np.arange(3), ('Left', 'Center', 'Right'))
plt.yticks(np.arange(3), ('Left', 'Center', 'Right'))
plt.show()
#plt.savefig('presentation_resources/leaning_mixing_matrix', dpi=300)