In [None]:
import pandas as pd
import json

# Load the CSV file
file_path = "data_scopus.csv"  # Replace with the actual path to your file
data = pd.read_csv('data_scopus.csv')

# Filter out rows with missing values in essential columns
filtered_data = data.dropna(subset=['Authors', 'Year', 'Authors with affiliations'])

# Initialize containers for nodes and links
nodes = {}
links = []

# Function to extract the country from the affiliation string
def extract_country(affiliation):
    # Assuming country is the last part after the last comma
    parts = affiliation.split(",")
    return parts[-1].strip() if len(parts) > 1 else "Unknown"

# Iterate through rows to create nodes and links
for _, row in filtered_data.iterrows():
    authors = row['Authors'].split(", ")  # Split authors into a list
    affiliations = row['Authors with affiliations'].split("; ")  # Split affiliations
    year = row['Year']

    # Match authors to affiliations
    for i, author in enumerate(authors):
        if i < len(affiliations):  # Ensure matching exists
            affiliation = affiliations[i]
            country = extract_country(affiliation)
        else:
            affiliation = "Unknown"
            country = "Unknown"

        # Create or update nodes
        if author not in nodes:
            nodes[author] = {
                "id": author,
                "name": author,
                "country": country,
                "affiliation": affiliation
            }

    # Create links between co-authors
    for i, source in enumerate(authors):
        for j, target in enumerate(authors):
            if i < j:  # Avoid duplicate links
                links.append({"source": source, "target": target, "year": year})

# Ensure all authors in links are in nodes
for link in links:
    if link['source'] not in nodes:
        nodes[link['source']] = {"id": link['source'], "name": link['source'], "country": "Unknown", "affiliation": "Unknown"}
    if link['target'] not in nodes:
        nodes[link['target']] = {"id": link['target'], "name": link['target'], "country": "Unknown", "affiliation": "Unknown"}

# Convert nodes dictionary to a list
nodes_list = list(nodes.values())

# Create final JSON structure
author_network_data = {"nodes": nodes_list, "links": links}

# Save to JSON file
output_path = "author_network_with_details_fixed.json"  # Replace with your desired file path
with open(output_path, "w") as json_file:
    json.dump(author_network_data, json_file, indent=4)

print(f"JSON file created at {output_path}")


JSON file created at author_network_with_details_fixed.json
