# Author Analysis

In [None]:
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter
import community as community_louvain
import numpy as np


df = pd.read_csv("preprocessing/processed_generative_ai_data_with_kmeans.csv")


def build_author_network(df, min_papers=2):
    all_authors = []
    for authors_str in df['authors'].dropna():
        author_list = [a.strip() for a in authors_str.split(';') if a.strip()]
        all_authors.extend(author_list)
    
    author_counts = Counter(all_authors)
    prolific_authors = {author for author, count in author_counts.items() if count >= min_papers}
    
    G = nx.Graph()
    
    for _, row in df.dropna(subset=['authors']).iterrows():
        authors = [a.strip() for a in row['authors'].split(';') if a.strip()]
        filtered_authors = [a for a in authors if a in prolific_authors]
        

        for i, author1 in enumerate(filtered_authors):
            for author2 in filtered_authors[i+1:]:
                if G.has_edge(author1, author2):
                    G[author1][author2]['weight'] += 1
                else:
                    G.add_edge(author1, author2, weight=1)
    

    G.remove_nodes_from(list(nx.isolates(G)))
    
    return G


## Network of authors with min 2 articles

In [None]:
author_network = build_author_network(df, min_papers=2)
print(f"Réseau d'auteur : {author_network.number_of_nodes()} noeuds et {author_network.number_of_edges()} edges")

## Communities Detection

In [None]:
communities = community_louvain.best_partition(author_network)
nx.set_node_attributes(author_network, communities, 'community')

## Top authors based on centrality

In [None]:
degree_centrality = nx.degree_centrality(author_network)
nx.set_node_attributes(author_network, degree_centrality, 'centrality')

top_authors = sorted([(author, centrality) for author, centrality in degree_centrality.items()], 
                    key=lambda x: x[1], reverse=True)[:20]
print("\nTop 20 des auteurs centraux dans le réseau de collaboration:")
for i, (author, centrality) in enumerate(top_authors, 1):
    community_id = communities[author]
    print(f"{i}. {author} - Centralité: {centrality:.4f} - Communauté: {community_id}")

## Largest research communities

In [None]:
community_counts = Counter(communities.values())
largest_communities = community_counts.most_common(5)
print("\nLes communautés d\'auteurs les plus larges:")
for community_id, size in largest_communities:
    print(f"Communauté {community_id}: {size} auteurs")


In [None]:
plt.figure(figsize=(20, 20))

if not nx.is_connected(author_network):
    largest_cc = max(nx.connected_components(author_network), key=len)
    G_vis = author_network.subgraph(largest_cc)
else:
    G_vis = author_network

pos = nx.spring_layout(G_vis, k=0.3, iterations=50, seed=42)

node_sizes = [4000 * G_vis.nodes[node]['centrality'] + 20 for node in G_vis.nodes]

edge_widths = [0.5 * G_vis[u][v]['weight'] for u, v in G_vis.edges]

node_colors = [communities[node] for node in G_vis.nodes]

plt.figure(figsize=(24, 24))
nx.draw_networkx(
    G_vis,
    pos=pos,
    with_labels=True,
    node_size=node_sizes,
    node_color=node_colors,
    width=edge_widths,
    edge_color='lightgrey',
    font_size=10,
    font_weight='bold',
    alpha=0.8,
    cmap=plt.cm.rainbow
)

plt.axis('off')
plt.title('Réseau de collaboration des auteurs en recherches sur IA Générative\n(Taille du Noeud = Centralité, La largeur des edges = fréquence de collaboration, couleur = communauté)', fontsize=20)
plt.tight_layout()
plt.savefig('figures/author_network.png', dpi=300, bbox_inches='tight')
plt.show()

## Research areas for each community

In [None]:
plt.figure(figsize=(24, 18))

for i, (community_id, _) in enumerate(largest_communities[:4], 1):
    
    community_nodes = [node for node in G_vis.nodes if communities[node] == community_id]
    community_graph = G_vis.subgraph(community_nodes)
    
    
    if community_graph.number_of_nodes() > 5:
        plt.subplot(2, 2, i)
        
        
        comm_pos = nx.spring_layout(community_graph, k=0.3, seed=42)
        
        
        comm_node_sizes = [3000 * community_graph.nodes[node]['centrality'] + 100 for node in community_graph.nodes]
        
        
        comm_edge_widths = [G_vis[u][v]['weight'] for u, v in community_graph.edges]
        
        
        nx.draw_networkx(
            community_graph,
            pos=comm_pos,
            with_labels=True,
            node_size=comm_node_sizes,
            node_color='lightblue',
            width=comm_edge_widths,
            edge_color='grey',
            font_size=12,
            font_weight='bold',
            alpha=0.8
        )
        
        plt.title(f'Communauté de recherches {community_id}', fontsize=16)
        plt.axis('off')

plt.tight_layout()
plt.savefig('figures/author_communities.png', dpi=300, bbox_inches='tight')
plt.show()