In [None]:
!pip install pandas networkx matplotlib

: 

In [None]:
# Import Packages
import pandas as pd
import networkx as nx
import itertools
import matplotlib.pyplot as plt

: 

In [None]:
# Import Data
data = pd.read_excel('scraped_text_v1.xlsx')
data.head()

: 

## Co-Authorship Network Analysis ##

In [None]:
def create_analyze_visualize_coauthorship_graph(dataframe, authors_column):
    # Initialize an empty graph
    G_coauthorship = nx.Graph()
    
    # Iterate over the dataset to extract authors and create edges
    for authors in dataframe[authors_column]:
        author_list = [author.strip() for author in authors.split(',')]
        for pair in itertools.combinations(author_list, 2):
            G_coauthorship.add_edge(pair[0], pair[1])
    
    # Display the number of nodes and edges in the co-authorship network
    num_nodes = G_coauthorship.number_of_nodes()
    num_edges = G_coauthorship.number_of_edges()
    print(f"Co-authorship Network: {num_nodes} nodes, {num_edges} edges")
    
    # Visualize the co-authorship network
    plt.figure(figsize=(12, 12))
    pos = nx.spring_layout(G_coauthorship, k=0.3)
    nx.draw(G_coauthorship, pos, with_labels=True, node_size=50, font_size=8, font_color='darkblue', node_color='skyblue', edge_color='gray')
    plt.title('Co-authorship Network')
    plt.tight_layout()
    plt.show()
    
    # Compute centrality measures for the co-authorship network
    degree_centrality = nx.degree_centrality(G_coauthorship)
    betweenness_centrality = nx.betweenness_centrality(G_coauthorship)
    closeness_centrality = nx.closeness_centrality(G_coauthorship)
    
    # Create a DataFrame to display the top authors based on centrality measures
    centrality_df = pd.DataFrame({
        'Author': list(degree_centrality.keys()),
        'Degree Centrality': list(degree_centrality.values()),
        'Betweenness Centrality': list(betweenness_centrality.values()),
        'Closeness Centrality': list(closeness_centrality.values())
    })
    top_authors = centrality_df.sort_values(by='Degree Centrality', ascending=False).head(10)
    print("Top Authors by Degree Centrality:")
    print(top_authors)
    
    return G_coauthorship, centrality_df

: 

In [None]:
# Create, analyze, and visualize the co-authorship graph using the function
coauthorship_graph, centrality_df = create_analyze_visualize_coauthorship_graph(data, 'Author')

: 

## Visualization of Keyword Network ##

In [None]:
def create_analyze_visualize_keyword_network(dataframe, keywords_column):
    # Initialize a graph for the keyword co-occurrence network
    G_keywords = nx.Graph()
    
    # Iterate over the dataset to extract keywords and create edges
    for keywords in dataframe[keywords_column]:
        if pd.isna(keywords):
            continue
        keyword_list = [keyword.strip() for keyword in keywords.split(',')]
        for pair in itertools.combinations(keyword_list, 2):
            G_keywords.add_edge(pair[0], pair[1])
    
    # Display the number of nodes and edges in the keyword co-occurrence network
    num_nodes_keywords = G_keywords.number_of_nodes()
    num_edges_keywords = G_keywords.number_of_edges()
    print(f"Keyword Co-occurrence Network: {num_nodes_keywords} nodes, {num_edges_keywords} edges")
    
    # Visualize the keyword co-occurrence network
    plt.figure(figsize=(12, 12))
    pos = nx.spring_layout(G_keywords, k=0.3)
    nx.draw(G_keywords, pos, with_labels=True, node_size=50, font_size=8, font_color='darkgreen', node_color='lightgreen', edge_color='gray')
    plt.title('Keyword Co-occurrence Network')
    plt.show()
    
    # Compute centrality measures for the keyword network
    degree_centrality_keywords = nx.degree_centrality(G_keywords)
    betweenness_centrality_keywords = nx.betweenness_centrality(G_keywords)
    closeness_centrality_keywords = nx.closeness_centrality(G_keywords)
    
    # Create a DataFrame to display the top keywords based on centrality measures
    centrality_keywords_df = pd.DataFrame({
        'Keyword': list(degree_centrality_keywords.keys()),
        'Degree Centrality': list(degree_centrality_keywords.values()),
        'Betweenness Centrality': list(betweenness_centrality_keywords.values()),
        'Closeness Centrality': list(closeness_centrality_keywords.values())
    })
    top_keywords = centrality_keywords_df.sort_values(by='Degree Centrality', ascending=False).head(10)
    print("Top Keywords by Degree Centrality:")
    print(top_keywords)
    
    return G_keywords, centrality_keywords_df

: 

In [None]:
# Create, analyze, and visualize the keyword co-occurrence network using the function
keyword_network, centrality_keywords_df = create_analyze_visualize_keyword_network(data, 'Keywords')

: 