In [1]:
__author__ = "Jon Ball"
__version__ = "June 2024"

In [2]:
import re
import json
import numpy as np
import pandas as pd
import networkx as nx
from scipy import stats
from collections import defaultdict
from numpy import array # needed for eval()

In [3]:
with open("data/issns_active_2022.txt", "r") as infile:
    issnsActive = list(line.strip() for line in infile.readlines())
issnsActive = {issn: 1 for issn in issnsActive if issn}
print(len(issnsActive))

60709


In [4]:
def get_edges(row):
    # for each unique publication in the dimensions data
    pub_id = row["publication_id"]
    # if the ISSN indicates a 2022 active OJS context
    if row["issn"] in issnsActive or row["eissn"] in issnsActive:
        # get publication_id for each document it references
        ref_ids =  list(set([s.strip("'") for s in row["reference_ids"].strip("[]").split()])) if "[]" not in row["reference_ids"] else []
        # get publication_id for each document that cites it
        cite_ids = list(set([d["id"] for d in [eval(s) for s in re.split(r"\.*\n+\.*", row["citing_ids"].strip("[]"))]])) if "[]" not in row["citing_ids"] else []
        # define edges in the citation graph
        V = [(pub_id, ref_id) for ref_id in ref_ids if ref_id]
        V += [(cite_id, pub_id) for cite_id in cite_ids if cite_id]
        return V
    else:
        return None

In [5]:
def get_issns(row):
    # if an ISSN is provided
    issn = row["issn"]
    issn = issn.strip() if isinstance(issn, str) else ""
    # if an E-ISSN is provided
    eissn = row["eissn"]
    eissn = eissn.strip() if isinstance(eissn, str) else ""
    # join ISSN and E-ISSN if both are present, else take either
    if issn and eissn:
        issns = f"{issn}\n{eissn}"
    elif issn:
        issns = issn
    elif eissn:
        issns = eissn
    else:
        issns = None
    return issns


In [6]:
def get_id_issn_map(row):
    # for each unique publication in the dimensions data
    pub_id = row["publication_id"]
    # return a mapping of publication_id to ISSN(s)
    issns = row["issns"]
    return pub_id, issns

In [7]:
def get_field_specific_issns(df, field):
    mask = (df["category_for"].notnull()) & (df["category_for"].str.contains(field))
    issns = set(df[mask]["issn"].dropna()) | set(df[mask]["eissn"].dropna())
    return issns

In [8]:
fields = {
    "agriculture": "Agricultural, Veterinary and Food Sciences",
    "bio": "Biological Sciences",
    "biomed": "Biomedical and Clinical Sciences",
    "design": "Built Environment and Design",
    "chem": "Chemical Sciences",
    "commerce": "Commerce, Management, Tourism and Services",
    "art": "Creative Arts and Writing",
    "earth": "Earth Sciences",
    "econ": "Economics",
    "ed": "Education",
    "engineering": "Engineering",
    "envisci": "Environmental Sciences",
    "health": "Health Sciences",
    "hist": "History, Heritage AND Archaeology",
    "soc": "Human Society",
    "indig": "Indigenous Studies",
    "infocomp": "Information and Computing Sciences",
    "comm": "Language, Communication and Culture",
    "law": "Law and Legal Studies",
    "math": "Mathematical Sciences",
    "phil": "Philosophy and Religious Studies",
    "phys": "Physical Sciences",
    "psych": "Psychology"  
}

## references_of_all_pubs.csv - 2022 active ISSNs

In [9]:
ref_df = pd.read_csv("data/references_of_all_pubs.csv", header=None)
ref_df.columns = ["index", "publication_id", "reference_ids", "citing_ids", "doi", "issn", "eissn", "type", "date", "category_for", "times_cited", "research_org_cities", "ur_id", "source", "research_org_country_names", "altmetrics", "title", "abstract", "concepts", "idk"]
ref_df["issns"] = ref_df.apply(get_issns, axis=1)
ref_df.head()

Unnamed: 0,index,publication_id,reference_ids,citing_ids,doi,issn,eissn,type,date,category_for,...,research_org_cities,ur_id,source,research_org_country_names,altmetrics,title,abstract,concepts,idk,issns
0,0,pub.1120627557,[],"[{'id': 'pub.1158208917', 'year': 2023}]",10.35673/ajmpi.v4i1.215,2406-8802,2685-550X,article,2019-07-31,"{'first_level': {'codes': array(['43', '50'], ...",...,[],['ur.011573102374.79'],"{'id': 'jour.1377569', 'title': 'Al-Adalah Jur...",[],"{'id': None, 'score': None}",{'preferred': 'PRAKSIS POLITIK NABI MUHAMMAD S...,"{'preferred': ""This paper examines the praxis ...","[{'concept': 'political action', 'relevance': ...",[],2406-8802\n2685-550X
1,1,pub.1016805418,['pub.1060450759' 'pub.1060527901' 'pub.106052...,"[{'id': 'pub.1138140594', 'year': 2021}\n {'id...",10.1038/294139a0,0028-0836,1476-4687,article,1981-11-01,"{'first_level': {'codes': array(['34', '51'], ...",...,['grid.482271.a'],['ur.0770644175.43' 'ur.01133403503.58' 'ur.01...,"{'id': 'jour.1018957', 'title': 'Nature', 'iss...",['United Kingdom'],"{'id': None, 'score': None}",{'preferred': 'Near-edge X-ray absorption spec...,{'preferred': 'The measurement of X-ray absorp...,[{'concept': 'X-ray absorption fine structure'...,[],0028-0836\n1476-4687
2,2,pub.1144573636,['pub.1029003997' 'pub.1134779582' 'pub.113470...,"[{'id': 'pub.1162918892', 'year': 2023}\n {'id...",10.3390/w14020151,,2073-4441,article,2022-01-07,"{'first_level': {'codes': array(['37', '40'], ...",...,[],['ur.011257637763.81' 'ur.016141117352.37' 'ur...,"{'id': 'jour.1398516', 'title': 'Water', 'issn...",[],"{'id': None, 'score': None}",{'preferred': 'Development of a Distributed Ma...,{'preferred': 'The article is devoted to the p...,"[{'concept': 'mathematical model', 'relevance'...",[],2073-4441
3,3,pub.1142327767,['pub.1001503523' 'pub.1002062994' 'pub.100313...,"[{'id': 'pub.1150797038', 'year': 2022}\n {'id...",10.1016/j.seta.2021.101661,2213-1388,2213-1396,article,2022-02-01,"{'first_level': {'codes': array(['33'], dtype=...",...,['grid.1004.5' 'grid.5037.1'],['ur.011201747573.76' 'ur.01166110143.28'],"{'id': 'jour.1144532', 'title': 'Sustainable E...",['Australia' 'Sweden'],"{'id': '119980489', 'score': 8}",{'preferred': 'A comparative review on the app...,{'preferred': 'Radiant low-temperature heating...,"[{'concept': 'high-temperature cooling', 'rele...",[],2213-1388\n2213-1396
4,4,pub.1037712872,['pub.1069651355' 'pub.1058573268' 'pub.100770...,"[{'id': 'pub.1134429328', 'year': 2021}\n {'id...",10.1016/0304-3878(81)90004-3,0304-3878,1872-6089,article,1981-08-01,"{'first_level': {'codes': array(['38', '44'], ...",...,['grid.17635.36'],['ur.010256274757.27' 'ur.012552575265.02'],"{'id': 'jour.1122871', 'title': 'Journal of De...",['United States'],"{'id': '64389583', 'score': 15}",{'preferred': 'The measurement and sources of ...,{'preferred': 'Production function models are ...,"[{'concept': 'firm attributes', 'relevance': 0...",[],0304-3878\n1872-6089


In [10]:
activeDF = ref_df[(ref_df["issn"].isin(issnsActive.keys())) | (ref_df["eissn"].isin(issnsActive.keys()))].copy()
activeDF.count()

index                         1215121
publication_id                1215121
reference_ids                 1215121
citing_ids                    1215121
doi                           1187300
issn                          1077002
eissn                         1119079
type                          1215121
date                          1214844
category_for                   797000
times_cited                   1215107
research_org_cities           1215121
ur_id                         1215121
source                        1215121
research_org_country_names    1215121
altmetrics                    1215121
title                         1215121
abstract                      1005616
concepts                      1215121
idk                           1215121
issns                         1215121
dtype: int64

In [11]:
activeDF["edges"] = activeDF.apply(get_edges, axis=1)
print(activeDF.iloc[0]["edges"])

[('pub.1158208917', 'pub.1120627557')]


In [12]:
edge_list = [e for el in activeDF["edges"].tolist() for e in el]
print(len(edge_list))
print(edge_list[0])

39646526
('pub.1158208917', 'pub.1120627557')


In [13]:
unique_contexts = activeDF[activeDF["issns"].notnull()]["issns"].unique().tolist()
print(len(unique_contexts))

24309


In [14]:
id2issns = ref_df.apply(get_id_issn_map, axis=1).tolist()
print(id2issns[0])

('pub.1120627557', '2406-8802\n2685-550X')


In [15]:
field_issns = {field: get_field_specific_issns(ref_df, pattern) for field, pattern in fields.items()}
del ref_df

## citations_of_all_pubs.csv - 2022 active ISSNs

In [16]:
cite_df = pd.read_csv("data/citations_of_all_pubs.csv", header=None)
cite_df.columns = ["index", "publication_id", "reference_ids", "citing_ids", "doi", "issn", "eissn", "type", "date", "category_for", "times_cited", "research_org_cities", "ur_id", "source", "research_org_country_names", "altmetrics", "title", "abstract", "concepts"]
cite_df["issns"] = cite_df.apply(get_issns, axis=1)
cite_df.head()

Unnamed: 0,index,publication_id,reference_ids,citing_ids,doi,issn,eissn,type,date,category_for,times_cited,research_org_cities,ur_id,source,research_org_country_names,altmetrics,title,abstract,concepts,issns
0,0,pub.1011627231,['pub.1000621744' 'pub.1000722723' 'pub.100103...,"[{'id': 'pub.1115166073', 'year': 2016}\n {'id...",10.1152/ajpendo.90306.2008,0193-1849,1522-1555,article,2008-05-20,"{'first_level': {'codes': array(['31', '42'], ...","{'times_cited': 100, 'recent_citations': 6, 'f...",['grid.6451.6'],['ur.07527627403.94' 'ur.01276472107.59'],"{'id': 'jour.1327387', 'title': 'AJP Endocrino...",['Israel'],"{'id': None, 'score': None}",{'preferred': 'Transcriptional regulation of t...,{'preferred': 'The insulin-responsive glucose ...,[],0193-1849\n1522-1555
1,1,pub.1011182182,['pub.1013726283' 'pub.1018900088' 'pub.102673...,"[{'id': 'pub.1102285413', 'year': 2010}\n {'id...",10.1353/apa.0.0007,0360-5949,1533-0699,article,2008-03-01,"{'first_level': {'codes': array(['43', '44', '...","{'times_cited': 7, 'recent_citations': 2, 'fie...",['grid.266515.3'],['ur.011637600145.05'],"{'id': 'jour.1143161', 'title': 'Transactions ...",['United States'],"{'id': '56626804', 'score': 9}",{'preferred': 'Genus quid est?: Roman Scholars...,{'preferred': 'From at least as early as Varro...,[],0360-5949\n1533-0699
2,2,pub.1000357800,['pub.1008741958' 'pub.1040358033' 'pub.105278...,"[{'id': 'pub.1055082006', 'year': 2015}\n {'id...",10.1016/j.cryobiol.2008.09.005,0011-2240,1090-2392,article,2008-09-19,"{'first_level': {'codes': array(['30', '32'], ...","{'times_cited': 29, 'recent_citations': 1, 'fi...",['grid.29980.3a'],['ur.013616340112.49' 'ur.01310020772.09' 'ur....,"{'id': 'jour.1001351', 'title': 'Cryobiology',...",['New Zealand'],"{'id': None, 'score': None}",{'preferred': 'Characterization of a family of...,{'preferred': 'Five genes coding for ice-activ...,[],0011-2240\n1090-2392
3,3,pub.1007126901,['pub.1006612903' 'pub.1010028023' 'pub.100433...,"[{'id': 'pub.1035493063', 'year': 2009}\n {'id...",10.1080/02513625.2008.10557013,0251-3625,2166-8604,article,2008-01-01,"{'first_level': {'codes': array(['33'], dtype=...","{'times_cited': 2, 'recent_citations': 0, 'fie...",[],['ur.016213633215.53' 'ur.016420304310.79'],"{'id': 'jour.1053460', 'title': 'disP - The Pl...",[],"{'id': None, 'score': None}","{'preferred': 'Die Zentralität war schon da!',...",,[],0251-3625\n2166-8604
4,4,pub.1007032109,['pub.1102845096' 'pub.1113140804' 'pub.106505...,"[{'id': 'pub.1046862372', 'year': 2015}\n {'id...",10.1080/14623940701816709,1462-3943,1470-1103,article,2008-02-01,"{'first_level': {'codes': array(['50'], dtype=...","{'times_cited': 3, 'recent_citations': 0, 'fie...",['grid.410319.e'],['ur.01313550775.24'],"{'id': 'jour.1139399', 'title': 'Reflective Pr...",['Canada'],"{'id': None, 'score': None}",{'preferred': 'Performing responsibility: ethi...,"{'preferred': 'I explore, and reflect on, the ...",[],1462-3943\n1470-1103


In [17]:
activeDF = cite_df[(cite_df["issn"].isin(issnsActive.keys())) | (cite_df["eissn"].isin(issnsActive.keys()))].copy()
activeDF.count()

index                         992926
publication_id                992926
reference_ids                 992926
citing_ids                    992926
doi                           987473
issn                          869269
eissn                         926256
type                          992926
date                          992414
category_for                  746954
times_cited                   992918
research_org_cities           992926
ur_id                         992926
source                        992926
research_org_country_names    992926
altmetrics                    992926
title                         992926
abstract                      889261
concepts                      992926
issns                         992926
dtype: int64

In [18]:
unique_contexts += activeDF[activeDF["issns"].notnull()]["issns"].unique().tolist()
print(len(unique_contexts))

37575


In [19]:
activeDF["edges"] = activeDF.apply(get_edges, axis=1)
print(activeDF.iloc[0]["edges"])

[('pub.1034132371', 'pub.1022711941'), ('pub.1034132371', 'pub.1002326098'), ('pub.1034132371', 'pub.1052271780'), ('pub.1034132371', 'pub.1026913579'), ('pub.1034132371', 'pub.1003604100'), ('pub.1034132371', 'pub.1045799580'), ('pub.1043886108', 'pub.1034132371'), ('pub.1019258875', 'pub.1034132371'), ('pub.1017412483', 'pub.1034132371'), ('pub.1031903195', 'pub.1034132371'), ('pub.1080561215', 'pub.1034132371'), ('pub.1020250538', 'pub.1034132371'), ('pub.1020310430', 'pub.1034132371'), ('pub.1045952946', 'pub.1034132371'), ('pub.1032937461', 'pub.1034132371'), ('pub.1022062989', 'pub.1034132371'), ('pub.1082366114', 'pub.1034132371'), ('pub.1008432868', 'pub.1034132371'), ('pub.1039318119', 'pub.1034132371'), ('pub.1081321795', 'pub.1034132371'), ('pub.1020675711', 'pub.1034132371'), ('pub.1000087098', 'pub.1034132371'), ('pub.1043495094', 'pub.1034132371'), ('pub.1012545893', 'pub.1034132371'), ('pub.1081150687', 'pub.1034132371'), ('pub.1008448320', 'pub.1034132371'), ('pub.10286

In [20]:
edge_list += [e for el in activeDF["edges"].tolist() for e in el]
print(len(edge_list))
print(edge_list[-1])

78578459
('pub.1140573500', 'pub.1058345146')


In [21]:
id2issns += cite_df.apply(get_id_issn_map, axis=1).tolist()
print(id2issns[-1])

('pub.1032068684', '0001-8791\n1095-9084')


In [22]:
for field, pattern in fields.items():
    field_issns[field] |= get_field_specific_issns(cite_df, pattern)
# Save the field-specific ISSN sets
with open("data/field_issns.json", "w") as outfile:
    json.dump({k: list(v) for k, v in field_issns.items()}, outfile)

In [23]:
del cite_df

## Map publication_id's to ISSNs in the citation graph

In [24]:
id2issns_dict = dict(id2issns)
issnEdges = defaultdict(list)
for citer_id, citee_id in edge_list:
    try:
        issnEdges[id2issns_dict[citer_id]].append(id2issns_dict[citee_id])
    except KeyError:
        continue
print(len(issnEdges))

57006


In [25]:
print(len(issnEdges))

57006


In [26]:
with open("data/issnEdges.json", "w") as outfile:
    json.dump(issnEdges, outfile)

In [27]:
with open("data/unique_contexts.json", "w") as outfile:
    json.dump(unique_contexts, outfile)

## Calculate a ratio of within-OJS citations and references, to out-citations and references, in the citation graph

In [1]:
import json
import numpy as np
import pandas as pd
import networkx as nx
from scipy import stats
from collections import deque

In [2]:
def is_publisher_journal(issn, issn_lookup):
    if issn is None:
        return False
    if isinstance(issn, str):
        if issn in issn_lookup:
            return True
        if "\n" in issn:
            return any(i in issn_lookup for i in issn.split("\n"))
    return False

In [3]:
# Load the ISSN edges dictionary
with open("data/issnEdges.json", "r") as f:
    issnEdges = json.load(f)
print(len(issnEdges))

57006


In [4]:
# Load the list of publisher ISSNs
with open("data/issns_active_2022.txt", "r") as f:
    publisher_issns = set(line.strip() for line in f)
print(len(publisher_issns))

60709


In [5]:
# Create a lookup set for faster membership testing
publisher_issn_lookup = set()
for issn in publisher_issns:
    publisher_issn_lookup.add(issn)
    if "\n" in issn:
        publisher_issn_lookup.update(issn.split("\n"))
print(len(publisher_issn_lookup))

60709


In [6]:
# Load the field-specific ISSN sets
with open("data/field_issns.json", "r") as f:
    field_issns = {k: set(v) for k, v in json.load(f).items()}
for field in field_issns:
    print(f"{field}: {len(field_issns[field])}")

agriculture: 23304
bio: 29346
biomed: 40839
design: 16981
chem: 14367
commerce: 36349
art: 21390
earth: 18827
econ: 17338
ed: 33035
engineering: 34164
envisci: 17960
health: 31736
hist: 0
soc: 43902
indig: 0
infocomp: 35852
comm: 30551
law: 24092
math: 12319
phil: 32670
phys: 10944
psych: 21045


In [7]:
# Load Indonesian IDs
with open("data/beacon.csv", "r") as infile:
    beacon = pd.read_csv(infile)
beaconID = beacon[(beacon["record_count_2022"] >= 5) &
                  (beacon["country_tld"].notnull()) & 
                  (beacon["country_tld"].str.contains("ID"))]
issnsID = beaconID[beaconID["issn"].notnull()].issn.tolist()
issnsID = set([sub for issn in issnsID for sub in issn.split("\n") if sub])
print(len(issnsID))

26102


In [8]:
def build_citation_graph(issnEdges):
    G = nx.DiGraph()
    for citing_journal, cited_journals in issnEdges.items():
        if citing_journal is not None:
            for cited_journal in cited_journals:
                if cited_journal is not None:
                    G.add_edge(citing_journal, cited_journal)
    return G

In [9]:
def get_top_central_journals(G, issn_set, top_n=50):
    # Calculate in-degree and out-degree centrality
    in_degree = nx.in_degree_centrality(G)
    out_degree = nx.out_degree_centrality(G)
    
    # Combine in-degree and out-degree
    total_degree = {node: in_degree.get(node, 0) + out_degree.get(node, 0) 
                    for node in set(in_degree) | set(out_degree)}
    
    # Filter for journals in the given ISSN set
    filtered_degree = {k: v for k, v in total_degree.items() if k is not None and is_publisher_journal(k, issn_set)}
    
    # Sort and return top N journals
    top_journals = sorted(filtered_degree.items(), key=lambda x: x[1], reverse=True)[:top_n]
    return top_journals

In [10]:
def calculate_ratios(issn_set, name, field_issns=None):
    confidence = 0.95
    internal_citations = 0
    external_citations = 0
    
    # Create a set of ISSNs that are both in the field and in the publisher set
    field_publisher_issns = issn_set & field_issns if field_issns else issn_set
    
    for citing_journal, cited_journals in issnEdges.items():
        if is_publisher_journal(citing_journal, field_publisher_issns):
            for cited_journal in cited_journals:
                if is_publisher_journal(cited_journal, field_publisher_issns):
                    internal_citations += 1
                elif field_issns is None or is_publisher_journal(cited_journal, field_issns):
                    external_citations += 1

    total_citations = internal_citations + external_citations
    internal_ratio = internal_citations / total_citations
    
    n = total_citations
    p = internal_ratio
    z = stats.norm.ppf((1 + confidence) / 2)
    denominator = 1 + z**2/n
    p_tilde = (p + z**2/(2*n)) / denominator
    stderr_tilde = z * np.sqrt(p*(1-p)/n + z**2/(4*n**2)) / denominator
    lower_bound = max(0, p_tilde - stderr_tilde)
    upper_bound = min(1, p_tilde + stderr_tilde)

    print(f"\n{name}")
    print(f"Internal citations: {internal_citations}")
    print(f"External citations: {external_citations}")
    print(f"Internal ratio: {internal_ratio:.4f}")
    print(f"95% Wilson score interval: ({lower_bound:.4f}, {upper_bound:.4f})")

    # Build the citation graph
    G = build_citation_graph(issnEdges)

    # Get top central journals
    top_journals = get_top_central_journals(G, issn_set)

    print(f"\nTop 50 most central journals for {name}:")
    for i, (journal, centrality) in enumerate(top_journals, 1):
        print(f"{i}. ISSN: {journal}, Centrality: {centrality:.4f}")

In [11]:
calculate_ratios(publisher_issns & publisher_issn_lookup, f"OJS (overall in Dimensions)")
calculate_ratios(publisher_issns & issnsID, f"Indonesian OJS (overall in Dimensions)")


OJS (overall in Dimensions)
Internal citations: 8056513
External citations: 19424042
Internal ratio: 0.2932
95% Wilson score interval: (0.2930, 0.2933)

Top 50 most central journals for OJS (overall in Dimensions):
1. ISSN: 0036-8075
1095-9203, Centrality: 0.5481
2. ISSN: 0028-0836
1476-4687, Centrality: 0.5271
3. ISSN: 1412-4262
2620-7389, Centrality: 0.1878
4. ISSN: 1857-9655
1857-5773, Centrality: 0.1744
5. ISSN: 1807-5932
1980-5322, Centrality: 0.1621
6. ISSN: 1937-8688, Centrality: 0.1620
7. ISSN: 1682-024X
1681-715X, Centrality: 0.1533
8. ISSN: 1680-6905
1729-0503, Centrality: 0.1498
9. ISSN: 0034-8910
1518-8787, Centrality: 0.1364
10. ISSN: 1424-7860
1424-3997, Centrality: 0.1312
11. ISSN: 1745-3674
1745-3682, Centrality: 0.1252
12. ISSN: 1548-7660, Centrality: 0.1205
13. ISSN: 2074-1804
2074-1812, Centrality: 0.1182
14. ISSN: 0104-1169
1518-8345, Centrality: 0.1157
15. ISSN: 0392-4203
2531-6745, Centrality: 0.1142
16. ISSN: 0301-2212
1179-6391, Centrality: 0.1130
17. ISSN: 000

^ The most central publications in the non-Indonesian OJS graph are Science (0036-8075) and Nature (0028-0836), followed by Jurnal Agriculture.<br>
In the Indonesian OJS citation graph, the most central are Jurnal Agriculture (1412-4262, 2620-7389), Personnel Review, & Ekonomika.

## Describe subgraphs for specific medical publications

In [12]:
medIssns = {
    "Ethiopian Journal of Health Sciences": "1029-1857\n2413-7170",
    "Sultan Qaboos University Medical Journal": "2075-051X\n2075-0528",
    "Upsala Journal of Medical Sciences": "0300-9734\n2000-1967",
    "International Journal of Health Sciences": "1658-3639\n1658-7774",
    "Acta Dermato-Venereologica": "0001-5555\n1651-2057",
    "Revista do Instituto de Medicina Tropical de São Paulo": "0036-4665\n1678-9946",
    "EXCLI Journal: Experimental and Clinical Sciences": "1611-2156",
    "The Southern African Journal of Critical Care": "1562-8264\n2078-676X",
    "Acta Odontologica Scandinavica": "0001-6357\n1502-3850",
    "Acta Biomedica Atenei Parmensis": "0392-4203\n2531-6745",
    "Iranian Red Crescent Medical Journal": "2074-1804\n2074-1812",
    "Acta Orthopaedica": "1745-3674\n1745-3682",
    "African Health Sciences": "1680-6905\n1729-0503",
    "Pakistan Journal of Medical Sciences": "1682-024X\n1681-715X",
    "Open Access Macedonian Journal of Medical Sciences": "1857-9655\n1857-5773",
    "Scandinavian Journal of Urology": "2168-1805\n2168-1813",
    "Biomedica": "0120-4157\n2590-7379",
    "Journal of Evolution of Dental and Medical Sciences": "2278-4748\n2278-4802",
    "Journal of Rehabilitation Medicine": "1650-1977\n1651-2081",
    "Biomolecules and Biomedicine": "2831-0896\n2831-090X"
}

In [13]:
otherIssns = {
    "Journal of Statistical Software": "1548-7660",
    "Jurnal Agriculture": "1412-4262\n2620-7389"
}

In [14]:
def analyze_journal_network(name, issn, issn_edges, publisher_issns):
    G = nx.MultiGraph()  # Use a MultiGraph to allow multiple edges

    # Add the central node
    G.add_node(issn)
    
    # Add direct connections (one-hop neighbors)
    if issn in issn_edges:
        for connected_issn in issn_edges[issn]:
            if connected_issn is not None:
                G.add_edge(issn, connected_issn)
    
    # Add reverse connections
    for connecting_issn, connected_issns in issn_edges.items():
        if connecting_issn is not None and issn in connected_issns:
            for _ in range(connected_issns.count(issn)):
                G.add_edge(connecting_issn, issn)

    if issn not in G:
        print(f"ISSN {issn} not found in the network.")
        return None

    # Calculate metrics
    degree = sum(dict(G.degree()).values())
    
    total_nodes = G.number_of_nodes()
    total_edges = G.number_of_edges()
    
    # Calculate internal and external connections
    internal_connections = sum(1 for u, v in G.edges(issn) if v in publisher_issns)
    external_connections = sum(1 for u, v in G.edges(issn) if v not in publisher_issns)
    
    total_connections = internal_connections + external_connections
    internal_ratio = internal_connections / total_connections if total_connections > 0 else 0
    external_ratio = external_connections / total_connections if total_connections > 0 else 0
    
    self_connections = G.number_of_edges(issn, issn)
    within_pub_ratio = self_connections / degree if degree > 0 else 0
    
    # Count connections for each neighbor
    neighbors = {n: G.number_of_edges(issn, n) for n in G.neighbors(issn)}
    
    top_connected = sorted(neighbors.items(), key=lambda x: x[1], reverse=True)[:10]
    
    density = nx.density(G)
    
    print(f"Analysis for Journal: {name}")
    print(f"ISSN: {issn}")
    print(f"Total nodes in network: {total_nodes}")
    print(f"Total edges in network: {total_edges}")
    print(f"Degree (total connections): {degree}")
    print(f"OJS-Internal connections: {internal_connections}")
    print(f"OJS-External connections: {external_connections}")
    print(f"OJS-Internal connection ratio: {internal_ratio:.2f}")
    print(f"OJS-External connection ratio: {external_ratio:.2f}")
    print(f"Within-publication connection ratio: {within_pub_ratio:.2f}")
    print(f"Network density: {density:.4f}")
    
    print("\nTop 10 connected journals:")
    for journal, count in top_connected:
        print(f"  ISSN: {journal}, Connections: {count}")

    print("\n___________________________\n")
    
    return G

In [15]:
for name, issn in medIssns.items():
    analyze_journal_network(name, issn, issnEdges, publisher_issn_lookup)

Analysis for Journal: Ethiopian Journal of Health Sciences
ISSN: 1029-1857
2413-7170
Total nodes in network: 6041
Total edges in network: 50059
Degree (total connections): 100118
OJS-Internal connections: 1837
OJS-External connections: 48222
OJS-Internal connection ratio: 0.04
OJS-External connection ratio: 0.96
Within-publication connection ratio: 0.02
Network density: 0.0027

Top 10 connected journals:
  ISSN: 1029-1857
2413-7170, Connections: 1772
  ISSN: 1932-6203, Connections: 1678
  ISSN: 1471-2458, Connections: 704
  ISSN: 1937-8688, Connections: 656
  ISSN: 1471-2393, Connections: 625
  ISSN: 0140-6736
1474-547X, Connections: 579
  ISSN: 1021-6790
2309-7388, Connections: 560
  ISSN: 1756-0500, Connections: 470
  ISSN: 0014-1755
2415-2420, Connections: 469
  ISSN: 1680-6905
1729-0503, Connections: 355

___________________________

Analysis for Journal: Sultan Qaboos University Medical Journal
ISSN: 2075-051X
2075-0528
Total nodes in network: 6036
Total edges in network: 42330
De

In [20]:
def analyze_journal_network_directed(name, issn, issn_edges, publisher_issns):
    G = nx.MultiDiGraph()  # Use a MultiDiGraph for directed edges with multiple connections

    # Add the central node
    G.add_node(issn)
    
    # Add outgoing edges (citations made by the journal)
    if issn in issn_edges:
        for cited_issn in issn_edges[issn]:
            if cited_issn is not None:
                G.add_edge(issn, cited_issn)
    
    # Add incoming edges (citations received by the journal)
    for citing_issn, cited_issns in issn_edges.items():
        if citing_issn is not None:
            for cited_issn in cited_issns:
                if cited_issn == issn:
                    G.add_edge(citing_issn, issn)

    if issn not in G:
        print(f"ISSN {issn} not found in the network.")
        return None

    # Calculate metrics
    in_degree = G.in_degree(issn)
    out_degree = G.out_degree(issn)
    total_degree = in_degree + out_degree
    
    total_nodes = G.number_of_nodes()
    total_edges = G.number_of_edges()
    
    # Calculate internal and external connections
    internal_citations_made = sum(1 for _, v, _ in G.out_edges(issn, keys=True) if v in publisher_issns)
    external_citations_made = sum(1 for _, v, _ in G.out_edges(issn, keys=True) if v not in publisher_issns)
    internal_citations_received = sum(1 for u, _, _ in G.in_edges(issn, keys=True) if u in publisher_issns)
    external_citations_received = sum(1 for u, _, _ in G.in_edges(issn, keys=True) if u not in publisher_issns)
    
    total_citations_made = internal_citations_made + external_citations_made
    total_citations_received = internal_citations_received + external_citations_received
    
    internal_ratio_made = internal_citations_made / total_citations_made if total_citations_made > 0 else 0
    external_ratio_made = external_citations_made / total_citations_made if total_citations_made > 0 else 0
    internal_ratio_received = internal_citations_received / total_citations_received if total_citations_received > 0 else 0
    external_ratio_received = external_citations_received / total_citations_received if total_citations_received > 0 else 0
    
    self_citations = G.number_of_edges(issn, issn)
    self_citation_ratio = self_citations / total_degree if total_degree > 0 else 0
    
    # Count connections for each neighbor
    cited_journals = {n: G.number_of_edges(issn, n) for n in G.successors(issn)}
    citing_journals = {n: G.number_of_edges(n, issn) for n in G.predecessors(issn)}
    
    top_cited = sorted(cited_journals.items(), key=lambda x: x[1], reverse=True)[:5]
    top_citing = sorted(citing_journals.items(), key=lambda x: x[1], reverse=True)[:5]
    
    density = nx.density(G)
    
    print(f"Analysis for Journal: {name}")
    print(f"ISSN: {issn}")
    print(f"Total nodes in network: {total_nodes}")
    print(f"Total edges in network: {total_edges}")
    print(f"In-degree (citations received): {in_degree}")
    print(f"Out-degree (citations made): {out_degree}")
    print(f"Total degree: {total_degree}")
    print(f"OJS-Internal citations made: {internal_citations_made}")
    print(f"OJS-External citations made: {external_citations_made}")
    print(f"OJS-Internal citations received: {internal_citations_received}")
    print(f"OJS-External citations received: {external_citations_received}")
    print(f"OJS-Internal citation ratio (made): {internal_ratio_made:.2f}")
    print(f"OJS-External citation ratio (made): {external_ratio_made:.2f}")
    print(f"OJS-Internal citation ratio (received): {internal_ratio_received:.2f}")
    print(f"OJS-External citation ratio (received): {external_ratio_received:.2f}")
    print(f"Self-citation ratio: {self_citation_ratio:.2f}")
    print(f"Network density: {density:.4f}")
    
    print("\nTop 5 cited journals:")
    for journal, count in top_cited:
        print(f"  ISSN: {journal}, Citations: {count}")
    
    print("\nTop 5 citing journals:")
    for journal, count in top_citing:
        print(f"  ISSN: {journal}, Citations: {count}")

    print("\n___________________________\n")
    
    return G

In [21]:
for name, issn in medIssns.items():
    analyze_journal_network_directed(name, issn, issnEdges, publisher_issn_lookup)

Analysis for Journal: Ethiopian Journal of Health Sciences
ISSN: 1029-1857
2413-7170
Total nodes in network: 6041
Total edges in network: 50059
In-degree (citations received): 20611
Out-degree (citations made): 31220
Total degree: 51831
OJS-Internal citations made: 1095
OJS-External citations made: 30125
OJS-Internal citations received: 742
OJS-External citations received: 19869
OJS-Internal citation ratio (made): 0.04
OJS-External citation ratio (made): 0.96
OJS-Internal citation ratio (received): 0.04
OJS-External citation ratio (received): 0.96
Self-citation ratio: 0.03
Network density: 0.0014

Top 5 cited journals:
  ISSN: 1029-1857
2413-7170, Citations: 1772
  ISSN: 0140-6736
1474-547X, Citations: 572
  ISSN: 1932-6203, Citations: 570
  ISSN: 1021-6790
2309-7388, Citations: 560
  ISSN: 0014-1755
2415-2420, Citations: 469

Top 5 citing journals:
  ISSN: 1029-1857
2413-7170, Citations: 1772
  ISSN: 1932-6203, Citations: 1108
  ISSN: 1471-2393, Citations: 368
  ISSN: 1937-8688, Citat