In [48]:
from minet.cli.utils import get_rcfile
from minet.youtube import YouTubeAPIClient
from ural import urls_from_text, normalize_url, should_resolve
from dataclasses import asdict
from tqdm import tqdm
from collections import Counter
import networkx as nx
from ipysigma import Sigma
from fog.tokenizers import WordTokenizer
from pprint import pprint
from pelote import remove_leaves, crop_to_largest_connected_component, monopartite_projection

In [12]:
client = YouTubeAPIClient(get_rcfile()['youtube']['key'], sleep=False)

In [13]:
VIDEOS = []

for video in tqdm(client.channel_videos('SciencesPo')):
    VIDEOS.append(video)

len(VIDEOS)

1891it [00:08, 224.49it/s]


1891

In [16]:
ENHANCED_VIDEOS = []

for _, video in tqdm(client.videos(v.video_id for v in VIDEOS)):
    ENHANCED_VIDEOS.append(video)

len(ENHANCED_VIDEOS)

1891it [00:11, 168.03it/s]


In [38]:
VIDEO_INDEX = {v.video_id: v.title for v in ENHANCED_VIDEOS}

In [24]:
sum(v.comment_count or 0 for v in ENHANCED_VIDEOS)

6956

In [25]:
next(v for v in ENHANCED_VIDEOS if v.comment_count is None)

YouTubeVideo(video_id='HC2wzds_YHc', title='Cérémonie du diplôme - promotion 2020', published_at='2021-06-23T13:39:12Z', description='Vivez la cérémonie du diplôme de Sciences Po, promotion 2020 !\n\n----\nSciences Po est une université de recherche internationale, sélective, ouverte sur le monde, qui se place parmi les meilleures en sciences humaines et sociales. Cliquez pour en savoir plus sur nos formations : http://bit.ly/2hz6Kr0\n\nComment se porter candidat ou candidate à Sciences Po ? http://bit.ly/2JlAxAD\n\n----\nSuivez-nous sur les réseaux sociaux !\n----\n\nYoutube : https://www.youtube.com/channel/UCjaCN9r_oyIgyUwY7wgACkA\nFacebook : https://www.facebook.com/SciencesPo/\nTwitter : https://twitter.com/sciencespo\nInstagram : https://www.instagram.com/sciencespo/\nSnapchat : https://www.snapchat.com/add/sciencespo\nLinkedIn : https://fr.linkedin.com/school/sciences-po/\nGroupe Facebook officiel : https://www.facebook.com/groups/sciencespogroup/\n\nNotre newsletter : http://bi

In [26]:
COMMENTS = []

for v in tqdm(ENHANCED_VIDEOS):
    if v.comment_count is None or v.comment_count < 1:
        continue

    for comment in client.comments(v.video_id):
        COMMENTS.append(comment)

len(COMMENTS)

100%|█████████████████████████████████████████████████████████████████████████| 1891/1891 [01:59<00:00, 15.87it/s]


6956

In [28]:
Counter(c.author_name for c in COMMENTS).most_common(10)

[('@sciencespo', 55),
 ('@svavhel', 44),
 ('@freikorpsdamonisch8127', 40),
 ('@toughr1506', 35),
 ('@mohammadiqbal6688', 30),
 ('@marie-pierreconde1852', 29),
 ('@jonathanraspaut15', 29),
 ('@annaa1476', 29),
 ('@alainvillesuzanne8613', 27),
 ('@holger3943', 25)]

In [35]:
URLS = Counter()

for c in COMMENTS:
    for url in urls_from_text(c.text):
        URLS[url] += 1

len(URLS), len([u for u in URLS if should_resolve(u)]), len([normalize_url(u) for u in URLS])

(171, 53, 171)

In [45]:
g = nx.Graph()

for c in COMMENTS:
    g.add_node(c.author_name, part='author')
    g.add_node(c.video_id, part='video', label=VIDEO_INDEX[c.video_id])
    g.add_edge(c.author_name, c.video_id)

remove_leaves(g)
crop_to_largest_connected_component(g)

In [46]:
Sigma(g, node_size=g.degree, node_color="part")

Sigma(nx.Graph with 395 nodes and 476 edges)

In [55]:
m = monopartite_projection(g, 'author', metric="jaccard")

In [56]:
Sigma(m, node_color="louvain", node_metrics=["louvain"])

Sigma(nx.Graph with 184 nodes and 864 edges)

In [68]:
g = nx.Graph()

for v in ENHANCED_VIDEOS:
    g.add_node(v.video_id, part="video")

    for tag in v.tags:
        g.add_node(tag, part="tag")
        g.add_edge(v.video_id, tag)

m = monopartite_projection(g, 'tag')

In [69]:
Sigma(m, node_color="louvain", node_metrics=["louvain"])

Sigma(nx.Graph with 4,411 nodes and 46,762 edges)