In [24]:
import pandas as pd
import numpy as np
import json

In [25]:
emb_df = pd.read_json("data/embeddings.json")

with open("data/videos.json", "r") as file:
    video_info_df = json.load(file)

In [26]:
emb_df.head()

Unnamed: 0,video_id,embeddings
0,ido-nerU_lk,"[[0.009513185, -0.00495829, -0.070846304, -0.0..."
1,SdYf5lAf3l0,"[[-0.012842869000000002, 0.014075993, 0.007954..."
2,F0_mbxBhlcE,"[[0.012729603, -0.03326714, -0.048892893, -0.0..."
3,31QBB0U9BSc,"[[-0.06162233, -0.0030312098000000003, 0.00631..."
4,g53DHbdbUtI,"[[-0.032726347, 0.01872434, -0.013206775, -0.0..."


In [27]:
titles = {x['id']['videoId']:x['snippet']['title'] for x in video_info_df}

In [28]:
title_column = list()

for video_id in emb_df['video_id'].values:
    if video_id in titles.keys():
        title_column.append(titles[video_id])

In [29]:
titles = title_column

In [30]:
embeddings = np.asarray(emb_df['embeddings'].values)

In [31]:
embeddings = np.asarray([np.asarray(x[0]) for x in embeddings])

In [32]:
from sklearn.metrics.pairwise import cosine_similarity

In [33]:
embeddings.shape

(516, 768)

In [34]:
similarity = cosine_similarity(embeddings)
similarity

array([[1.        , 0.4702196 , 0.66370286, ..., 0.42439698, 0.41294993,
        0.49348085],
       [0.4702196 , 1.        , 0.53016633, ..., 0.47804975, 0.51036347,
        0.50729637],
       [0.66370286, 0.53016633, 1.        , ..., 0.53746903, 0.4933724 ,
        0.55722042],
       ...,
       [0.42439698, 0.47804975, 0.53746903, ..., 1.        , 0.48516161,
        0.55042753],
       [0.41294993, 0.51036347, 0.4933724 , ..., 0.48516161, 1.        ,
        0.53366862],
       [0.49348085, 0.50729637, 0.55722042, ..., 0.55042753, 0.53366862,
        1.        ]])

## Generate Graph

In [45]:
import networkx as nx
import plotly.graph_objects as go

In [46]:
threashold = 0.7

In [47]:
G = nx.Graph()

In [48]:
for i, title in enumerate(titles):
    G.add_node(i, label=title)

In [49]:
for i in range(len(titles)):
    for j in range(i + 1, len(titles)):
        if similarity[i, j] > threashold:  
            G.add_edge(i, j, weight=similarity[i, j])

In [50]:
pos = nx.spring_layout(G, seed=42)

In [51]:
x_nodes = [pos[k][0] for k in G.nodes()]
y_nodes = [pos[k][1] for k in G.nodes()]

In [52]:
edges_x = []
edges_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edges_x += [x0, x1, None]
    edges_y += [y0, y1, None]

In [53]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=edges_x,
    y=edges_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines'
))

fig.add_trace(go.Scatter(
    x=x_nodes,
    y=y_nodes,
    mode='markers',
    marker=dict(
        size=10,
        color='skyblue',
        line=dict(width=2, color='DarkSlateGrey')
    ),
    text=[G.nodes[n]['label'] for n in G.nodes()],
    hoverinfo='text'  
))

fig.update_layout(
    showlegend=False,
    title="Grafo de Similaridade entre Vídeos",
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)

fig.show()

## Degree Distribution

In [58]:
node_degree = [(G.degree(n), G.nodes[n]['label']) for n in G.nodes()]
node_degree = sorted(node_degree, key=lambda x: x[0], reverse=True)

In [60]:
sorted_degrees = [item[0] for item in node_degree]
sorted_titles = [item[1] for item in node_degree]

In [61]:
fig = go.Figure(data=[
    go.Bar(
        x=sorted_titles,
        y=sorted_degrees,
        marker_color='skyblue',
        marker_line_color='grey',
        marker_line_width=1.5
    )
])

# Configurações do layout do gráfico
fig.update_layout(
    title="Distribuição de Graus do Grafo por Título do Vídeo",
    xaxis_title="Título do Vídeo",
    yaxis_title="Grau",
    xaxis_tickangle=-45,  # Inclina os títulos para melhor visualização
    template="plotly_white"
)

# Exibir o gráfico
fig.show()

## Betweness

In [62]:
betweenness = nx.betweenness_centrality(G)

In [63]:
betweenness_ordenado = sorted(betweenness.items(), key=lambda x: x[1], reverse=True)

In [64]:
top_n = 10
nos_top_betweenness = betweenness_ordenado[:top_n]

In [65]:
indices_nos = [n[0] for n in nos_top_betweenness]
betweenness_nos = [n[1] for n in nos_top_betweenness]
labels_nos = [G.nodes[n[0]]['label'] for n in nos_top_betweenness]

In [66]:
pos = nx.spring_layout(G, seed=42)
x_top_nos = [pos[i][0] for i in indices_nos]
y_top_nos = [pos[i][1] for i in indices_nos]

In [67]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x_top_nos,
    y=y_top_nos,
    mode='markers+text',
    text=labels_nos,
    textposition="top center",
    marker=dict(
        size=15,
        color='orange',
        line=dict(width=2, color='DarkSlateGrey')
    ),
    hoverinfo='text' 
))

fig.update_layout(
    showlegend=False,
    title="Nódos com Maior Centralidade de Betweenness",
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)

fig.show()

In [75]:
betweenness = nx.betweenness_centrality(G)

# Normaliza os valores de betweenness para definir o tamanho dos nós
sizes = [1000 * betweenness[node] for node in G.nodes()]

# Gera as posições dos nós para o layout do grafo
pos = nx.spring_layout(G, seed=42)
x_nos = [pos[node][0] for node in G.nodes()]
y_nos = [pos[node][1] for node in G.nodes()]
labels_nos = [G.nodes[node]['label'] for node in G.nodes()]

# Cria a visualização com Plotly
fig = go.Figure()

# Adiciona os nós com tamanho proporcional ao betweenness
fig.add_trace(go.Scatter(
    x=x_nos,
    y=y_nos,
    mode='markers',
    text=labels_nos,  # Texto que aparece ao passar o mouse
    marker=dict(
        size=sizes,
        color='skyblue',
        line=dict(width=2, color='DarkSlateGrey')
    ),
    hoverinfo='text'  # Mostra o título apenas ao passar o mouse
))

# Adiciona as arestas do grafo
for edge in G.edges():
    fig.add_trace(go.Scatter(
        x=[pos[edge[0]][0], pos[edge[1]][0]],
        y=[pos[edge[0]][1], pos[edge[1]][1]],
        mode='lines',
        line=dict(width=0.5, color='grey'),
        hoverinfo='none'
    ))

# Configurações do layout
fig.update_layout(
    showlegend=False,
    title="Grafo com Tamanho dos Nós Proporcional à Centralidade de Betweenness",
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)

# Exibir o gráfico
fig.show()

## Clustering

In [74]:
clustering = nx.clustering(G)

# Ordena os nós pelo coeficiente de clustering em ordem decrescente
clustering_ordenado = sorted(clustering.items(), key=lambda x: x[1], reverse=True)

# Filtra para obter apenas os nós com maior coeficiente de clustering (exemplo: top 10)
top_n = 10
nos_top_clustering = clustering_ordenado[:top_n]

# Extrai os índices e os valores de clustering dos nós selecionados
indices_nos = [n[0] for n in nos_top_clustering]
clustering_nos = [n[1] for n in nos_top_clustering]
labels_nos = [G.nodes[n[0]]['label'] for n in nos_top_clustering]

# Posição dos nós selecionados para plotar
pos = nx.spring_layout(G, seed=42)
x_top_nos = [pos[i][0] for i in indices_nos]
y_top_nos = [pos[i][1] for i in indices_nos]

# Criar a visualização com Plotly
fig = go.Figure()

# Adicionar os nós com maior coeficiente de clustering
fig.add_trace(go.Scatter(
    x=x_top_nos,
    y=y_top_nos,
    mode='markers+text',
    text=labels_nos,
    textposition="top center",
    marker=dict(
        size=15,
        color='lightgreen',
        line=dict(width=2, color='DarkSlateGrey')
    ),
    hoverinfo='text'  # Exibe o título ao passar o mouse
))

# Configurações do layout
fig.update_layout(
    showlegend=False,
    title="Nós com Maior Coeficiente de Clustering",
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)

# Exibir o gráfico
fig.show()

## Eingenvector

In [71]:
eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=500, tol=1e-4)

eigenvector_ordenado = sorted(eigenvector_centrality.items(), key=lambda x: x[1], reverse=True)

top_n = 10
nos_top_eigenvector = eigenvector_ordenado[:top_n]

indices_nos = [n[0] for n in nos_top_eigenvector]
eigenvector_nos = [n[1] for n in nos_top_eigenvector]
labels_nos = [G.nodes[n[0]]['label'] for n in nos_top_eigenvector]

pos = nx.spring_layout(G, seed=42)
x_top_nos = [pos[i][0] for i in indices_nos]
y_top_nos = [pos[i][1] for i in indices_nos]

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x_top_nos,
    y=y_top_nos,
    mode='markers+text',
    text=labels_nos,
    textposition="top center",
    marker=dict(
        size=15,
        color='lightcoral',
        line=dict(width=2, color='DarkSlateGrey')
    ),
    hoverinfo='text' 
))

fig.update_layout(
    showlegend=False,
    title="Nós com Maior Centralidade de Autovetor (Eigenvector)",
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)

fig.show()