# Parte 8 - Redes

In [None]:
import networkx as nx
import matplotlib.pyplot as plt
from itertools import combinations
from collections import Counter
import re
import pandas as pd

hinos_analise: pd.DataFrame = pd.read_pickle("..\\assets\\hinos_analise_tokens.pkl")

# Supondo que 'df' tem uma coluna "tokens" com listas de palavras
def build_word_cooccurrence_network(df, window_size=2, min_cooccurrence=2):
    cooccurrences = Counter()

    for tokens in df["tokens_no_stops"]:
        for i in range(len(tokens) - window_size + 1):
            window = tokens[i : i + window_size]
            for pair in combinations(window, 2):
                cooccurrences[tuple(sorted(pair))] += 1

    # Cria o grafo
    G = nx.Graph()
    for (w1, w2), freq in cooccurrences.items():
        if freq >= min_cooccurrence:
            G.add_edge(w1, w2, weight=freq)

    return G


G = build_word_cooccurrence_network(hinos_analise, window_size=3, min_cooccurrence=3)

# Visualização simples
plt.figure(figsize=(10, 8))
pos = nx.spring_layout(G, k=0.5)
nx.draw_networkx_nodes(G, pos, node_size=100, node_color="lightblue")
nx.draw_networkx_edges(G, pos, alpha=0.3)
nx.draw_networkx_labels(G, pos, font_size=9)
plt.title("Rede de Coocorrência de Palavras")
plt.axis("off")
plt.show()

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Usa a coluna 'texto' do dataframe
tfidf = TfidfVectorizer()
X = tfidf.fit_transform(hinos_analise["tokens_no_stops"].apply(lambda x: ' '.join(x)))

# Calcula similaridade entre hinos
similarity = cosine_similarity(X)

# Cria grafo com base na similaridade
G_docs = nx.Graph()
for i in range(len(hinos_analise)):
    G_docs.add_node(i, label=f"Hino {i+1}")

# Adiciona arestas apenas para hinos com similaridade alta
threshold = 0.3
for i in range(len(hinos_analise)):
    for j in range(i + 1, len(hinos_analise)):
        if similarity[i, j] > threshold:
            G_docs.add_edge(i, j, weight=similarity[i, j])

# Visualização
plt.figure(figsize=(10, 8))
pos = nx.spring_layout(G_docs, k=0.6)
nx.draw(G_docs, pos, with_labels=True, node_color="lightgreen", node_size=600)
plt.title("Rede de Similaridade entre Hinos")
plt.show()

In [None]:
# Exemplo com seu grafo de coocorrências
nx.write_gexf(G, "rede_palavras.gexf")

# Ou com o grafo de hinos
nx.write_gexf(G_docs, "rede_hinos.gexf")