<a href="https://colab.research.google.com/github/gskumlehn/autism-info-llm-comparison/blob/main/network_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Instalar pacotes necessários
install.packages(c("tidytext", "dplyr", "ggplot2", "igraph", "ggraph", "readr"))

# Carregar bibliotecas
library(tidytext)
library(dplyr)
library(igraph)
library(ggraph)
library(ggplot2)
library(readr)

# Ler o arquivo CSV
data <- read_csv("data.csv")

# Extrair as colunas de interesse (perguntas e respostas)
responses <- data %>%
  select(Questions, WHO, `ChatGPT 4.0`, `ChatGPT Vision`, ScholarGPT, Gemini, Llama3, `Bing AI (Copilot)`, `Google Palm`, Claude, `ReKa Core`, `Solar Mini`) %>%
  pivot_longer(cols = -Questions, names_to = "network", values_to = "text")

# Visualizar os dados
print(responses)

Installing packages into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘RcppArmadillo’, ‘graphlayouts’




In [None]:
# Extrair as 15 palavras mais frequentes para cada rede
frequent_words <- responses %>%
  unnest_tokens(word, text) %>%
  count(network, word, sort = TRUE) %>%
  group_by(network) %>%
  top_n(15, n) %>%
  ungroup()

# Visualizar as palavras mais frequentes
print(frequent_words)

In [None]:
# Criar dados do grafo
edges <- frequent_words %>%
  select(network, word) %>%
  rename(from = network, to = word)

# Criar o objeto do grafo
graph <- graph_from_data_frame(edges, directed = FALSE)

# Definir atributos dos nós
node_attributes <- data.frame(
  name = V(graph)$name,
  type = ifelse(V(graph)$name %in% responses$network, "Network", "Word")
)

# Adicionar atributos ao grafo
V(graph)$type <- node_attributes$type

# Definir cores para redes e palavras
network_colors <- c("WHO" = "#1f77b4", "ChatGPT 4.0" = "#1f77b4", "ChatGPT Vision" = "#2ca02c",
                    "ScholarGPT" = "#d62728", "Gemini" = "#9467bd", "Llama3" = "#e377c2",
                    "Bing AI (Copilot)" = "#8c564b", "Google Palm" = "#17becf",
                    "Claude" = "#7f7f7f", "ReKa Core" = "#aec7e8", "Solar Mini" = "#ff7f0e")

word_color <- "darkorange"

# Atribuir cores com base no tipo de nó
node_colors <- ifelse(V(graph)$name %in% names(network_colors),
                      network_colors[V(graph)$name], word_color)

# Plotar o grafo
ggraph(graph, layout = "fr") +
  geom_edge_link(aes(edge_alpha = 0.5), color = "gray") +
  geom_node_point(aes(color = name), size = 10, show.legend = FALSE) +
  geom_node_text(aes(label = name), repel = TRUE, size = 5) +
  scale_color_manual(values = c(network_colors, word = word_color)) +
  theme_void() +
  labs(title = "Grafo de Co-ocorrência de Palavras", subtitle = "Redes e Palavras Mais Comuns")

In [None]:
# Carregar bibliotecas
library(tidytext)
library(dplyr)

# Carregar a lista de stop words em inglês
data("stop_words")

# Extrair as 15 palavras mais frequentes para cada rede, removendo stop words
frequent_words <- responses %>%
  unnest_tokens(word, text) %>%
  anti_join(stop_words, by = "word") %>%  # Remove stop words
  count(network, word, sort = TRUE) %>%
  group_by(network) %>%
  top_n(15, n) %>%
  ungroup()

# Visualizar as palavras mais frequentes após a remoção das stop words
print(frequent_words)

In [None]:
# Criar dados do grafo
edges <- frequent_words %>%
  select(network, word) %>%
  rename(from = network, to = word)

# Criar o objeto do grafo
graph <- graph_from_data_frame(edges, directed = FALSE)

# Definir atributos dos nós
node_attributes <- data.frame(
  name = V(graph)$name,
  type = ifelse(V(graph)$name %in% responses$network, "Network", "Word")
)

# Adicionar atributos ao grafo
V(graph)$type <- node_attributes$type

# Definir cores para redes e palavras
network_colors <- c("WHO" = "#1f77b4", "ChatGPT 4.0" = "#1f77b4", "ChatGPT Vision" = "#2ca02c",
                    "ScholarGPT" = "#d62728", "Gemini" = "#9467bd", "Llama3" = "#e377c2",
                    "Bing AI (Copilot)" = "#8c564b", "Google Palm" = "#17becf",
                    "Claude" = "#7f7f7f", "ReKa Core" = "#aec7e8", "Solar Mini" = "#ff7f0e")

word_color <- "darkorange"

# Atribuir cores com base no tipo de nó
node_colors <- ifelse(V(graph)$name %in% names(network_colors),
                      network_colors[V(graph)$name], word_color)

# Plotar o grafo
ggraph(graph, layout = "fr") +
  geom_edge_link(aes(edge_alpha = 0.5), color = "gray") +
  geom_node_point(aes(color = name), size = 10, show.legend = FALSE) +
  geom_node_text(aes(label = name), repel = TRUE, size = 5) +
  scale_color_manual(values = c(network_colors, word = word_color)) +
  theme_void() +
  labs(title = "Grafo de Co-ocorrência de Palavras", subtitle = "Redes e Palavras Mais Comuns (sem stop words)")