In [1]:
import requests
import urllib.parse
import networkx as nx
from pyvis.network import Network
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import json
from IPython.display import display, JSON

ModuleNotFoundError: No module named 'pyvis'

In [None]:
# Function to get work data from OpenAlex API
def get_openalex_data(work_id_or_doi):
    if work_id_or_doi.startswith('https://'):
        url = work_id_or_doi
    else:
        encoded_doi = urllib.parse.quote(work_id_or_doi)
        url = f'https://api.openalex.org/works/https://doi.org/{encoded_doi}'
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f'Error: {response.status_code}')
        print(response.text)
        return None


In [None]:
# Function to get citing works from OpenAlex API
def get_cited_by_data(cited_by_api_url):
    response = requests.get(cited_by_api_url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f'Error fetching cited_by_api_url: {response.status_code}')
        print(response.text)
        return None

In [None]:
# Function to build the citation graph and assign concepts
def build_citation_graph(work_data, graph=None, depth=0, max_depth=2):
    if graph is None:
        graph = nx.DiGraph()
    work_id = work_data['id']
    
    # Extract the primary concept (concept)
    #primary_concept = work_data.get('concepts', [{}])[0].get('display_name', 'Unknown concept')
    #primary_topic = work_data.get('primary_topic', [{}])[0].get('display_name', 'Unknown domain')
    #field = work_data.get('primary_topic', [{}])[1].get('display_name', 'Unknown domain')
    #domain = work_data.get('primary_topic', [{}])[2].get('display_name', 'Unknown domain')
    #print(domain)

    concepts = work_data.get('concepts', [])
    primary_concept = concepts[0].get('display_name', 'Unknown concept')
    
    primary_topic = work_data.get('primary_topic', {}) or {}
    #display(JSON(primary_topic))
    subfield = (primary_topic.get('subfield') or {}).get('display_name', 'Unknown subfield')
    field = (primary_topic.get('field') or {}).get('display_name', 'Unknown field')
    #print(field)
    
    # Add or update the node without passing additional attributes directly in add_node
    if work_id not in graph:
        graph.add_node(work_id)  # Add node without extra attributes
    
    # Update depth and concept attributes after adding the node
    graph.nodes[work_id]['depth'] = min(depth, graph.nodes[work_id].get('depth', float('inf')))
    graph.nodes[work_id]['primary_concept'] = primary_concept
    graph.nodes[work_id]['subfield'] = subfield
    graph.nodes[work_id]['field'] = field
    graph.nodes[work_id].update(work_data)  # Update with other data from work_data
    
    if depth >= max_depth:
        return graph
    
    cited_by_api_url = work_data.get('cited_by_api_url')
    if cited_by_api_url:
        citing_works_data = get_cited_by_data(cited_by_api_url)
        if citing_works_data:
            for citing_work_data in citing_works_data.get('results', []):
                citing_work_id = citing_work_data['id']
                
                # Retrieve primary concept for each citing work
                citing_concept = citing_work_data.get('concepts', [{}])[0].get('display_name', 'Unknown concept')

                # Add the citing node without extra attributes directly in add_node
                if citing_work_id not in graph:
                    graph.add_node(citing_work_id)
                
                # Update depth and concept attributes after adding the node
                graph.nodes[citing_work_id]['depth'] = min(depth + 1, graph.nodes[citing_work_id].get('depth', float('inf')))
                graph.nodes[citing_work_id]['concept'] = citing_concept  # Set concept
                graph.nodes[citing_work_id].update(citing_work_data)  # Update with other data from citing_work_data
                
                # Add the edge from citer to cited
                graph.add_edge(citing_work_id, work_id)
                
                # Recursively build the graph
                build_citation_graph(citing_work_data, graph, depth + 1, max_depth)
    return graph

In [None]:
#doi = '10.1038/nphys1170'
#doi = '10.1017/CBO9781107415324'
doi = '10.1167/10.3.4' # mine psych
#doi = '10.17638/datacat.liverpool.ac.uk/1311' # Liverpool 1
#doi = '10.1016/j.heliyon.2024.e30685' # Liverpool 2

# Elsevier
#doi = '10.7488/ds/1369'  # Edinburgh
#doi = '10.17864/1947.256' # Reading
#doi = '10.15131/shef.data.11567910' # Sheffield
#doi = '10.5523/bris.2fahpksont1zi26xosyamqo8rr' # Bristol

# Fetch root work data
root_work_data = get_openalex_data(doi)
print(root_work_data['title'])
print(root_work_data['cited_by_count'])

display(JSON(root_work_data))

if root_work_data:
    # Build the citation graph
    citation_graph = build_citation_graph(root_work_data, max_depth=2)

    # Create a PyVis network for Jupyter notebook
    net = Network(
        height='750px',
        width='100%',
        directed=True,
        notebook=True,
        cdn_resources='in_line'
    )


    # Populate the network with the NetworkX graph
    net.from_nx(citation_graph)

    # Extract all unique concepts and map them to positions on a continuous colormap
    concepts = sorted(set(data['primary_concept'] for _, data in citation_graph.nodes(data=True)))
    fields = sorted(set(data['field'] for _, data in citation_graph.nodes(data=True)))
    cmap = plt.get_cmap('rainbow')
    
    # Assign each concept a unique color based on its position in the list
    concept_colors = {concept: mcolors.rgb2hex(cmap(i / len(concepts))) for i, concept in enumerate(concepts)}
    field_colors = {field: mcolors.rgb2hex(cmap(i / len(fields))) for i, field in enumerate(fields)}

    # Get the depths of all nodes
    depths = [data['depth'] for _, data in citation_graph.nodes(data=True)]
    min_depth = min(depths)
    max_depth = max(depths)
    
    # Assign colors based on concept and update node tooltips
    for node_id, node_data in citation_graph.nodes(data=True):
        field = node_data.get('field', 'Unknown field')
        color = field_colors[field]  # Get color for concept
        net.get_node(node_id)['color'] = color

        depth = node_data['depth']
        
        # Set node color and tooltip with concept
        net.get_node(node_id)['color'] = color
        node_doi = node_data.get('doi', 'No DOI')
        title = node_data.get('display_name', 'No Title')
        publication_year = node_data.get('publication_year', 'Unknown Year')
        keywords = node_data.get('keywords', [])
        keywords_text = ', '.join([keyword['display_name'] for keyword in keywords]) if keywords else 'No keywords'
        net.get_node(node_id)['title'] = f"{title} ({publication_year})\nField: {field}\nKeywords: {keywords_text}\nDOI: {node_doi}"
        net.get_node(node_id)['label'] = field #title
        net.get_node(node_id)['size'] = 100 / (depth+1)  # Adjust size as needed

    # Enable physics menu
    net.show_buttons()  #filter_=['physics'])

    # Generate the HTML content
    html_content = net.generate_html()

    # Display the network in the notebook
    display(HTML(html_content))

else:
    print('Failed to retrieve root work data.')