In [1]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv('with_pmid_v1.csv')
df['Focal Paper Pubmed ID'] = df['Focal Paper Pubmed ID'].astype(str)
df['Citing Paper Pubmed ID'] = df['Citing Paper Pubmed ID'].astype(str)
df['Citing paper references Pubmed ID'] = df['Citing paper references Pubmed ID'].astype(str)

focal_paper_groups = df.groupby('Focal Paper Pubmed ID')

results = []
for focal_paper_id, group in focal_paper_groups:
    citing_papers = set(group['Citing Paper Pubmed ID'])
    references = set(group['Citing paper references Pubmed ID'])
    
    NF = 0
    NB = 0
    NR = 0
    
    for citing_paper in citing_papers:
        citing_paper_refs = set(group[group['Citing Paper Pubmed ID'] == citing_paper]['Citing paper references Pubmed ID'])
        cites_references = bool(citing_paper_refs & references)
        
        if cites_references:
            NB += 1
        else:
            NF += 1
            
    all_references = set(df['Citing paper references Pubmed ID'])
    citing_references = set(df[df['Citing paper references Pubmed ID'].isin(references)]['Citing Paper Pubmed ID'])
    NR = len(citing_references - citing_papers)
    
    #https://www.sciencedirect.com/science/article/pii/S1751157721000900
    DI = (NF - NB) / (NF + NB + NR) if (NF + NB + NR) != 0 else 0
    results.append({
        'Focal Paper Pubmed ID': focal_paper_id,
        'DI': DI
    })
results_df = pd.DataFrame(results)
results_df.to_csv('disruption_indicator_results.csv', index=False)

In [9]:
results_df

Unnamed: 0,Focal Paper Pubmed ID,DI
0,10455412,-0.052786
1,10767559,-1.000000
2,11035115,-0.056995
3,11166368,-0.019763
4,11295236,-0.100000
...,...,...
86,9445265,-0.157895
87,9454902,-0.021277
88,9518670,-0.023669
89,9684800,-0.046154


In [10]:
import pandas as pd

# Load the dataset
file_path = 'with_pmid_v1.csv'
df = pd.read_csv(file_path)

# Convert columns to strings for consistent processing
df['Focal Paper Pubmed ID'] = df['Focal Paper Pubmed ID'].astype(str)
df['Citing Paper Pubmed ID'] = df['Citing Paper Pubmed ID'].astype(str)
df['Citing paper references Pubmed ID'] = df['Citing paper references Pubmed ID'].astype(str)

# Group by Focal Paper Pubmed ID to process each focal paper
focal_paper_groups = df.groupby('Focal Paper Pubmed ID')

# Initialize a list to store the results
results = []

# Process each focal paper group
for focal_paper_id, group in focal_paper_groups:
    # Get unique citing papers and references for the focal paper
    citing_papers = set(group['Citing Paper Pubmed ID'])
    references = set(group['Citing paper references Pubmed ID'])
    
    # Initialize counters
    NF = 0
    NB = 0
    
    # Process each citing paper for the focal paper
    for citing_paper in citing_papers:
        # Get the references cited by this citing paper
        citing_paper_refs = set(group[group['Citing Paper Pubmed ID'] == citing_paper]['Citing paper references Pubmed ID'])
        
        # Check if the citing paper cites any references of the focal paper
        if citing_paper_refs & references:
            NB += 1
        else:
            NF += 1
    
    # Determine NR (papers citing the references but not the focal paper)
    all_references = set(df['Citing paper references Pubmed ID'])
    citing_references = set(df[df['Citing paper references Pubmed ID'].isin(references)]['Citing Paper Pubmed ID'])
    NR = len(citing_references - citing_papers)
    
    # Compute DI
    DI = (NF - NB) / (NF + NB + NR) if (NF + NB + NR) != 0 else 0
    
    # Store the result
    results.append({
        'Focal Paper Pubmed ID': focal_paper_id,
        'DI': DI
    })

# Convert results to DataFrame
results_df = pd.DataFrame(results)


# Save the results to a CSV file
results_df.to_csv('disruption_indicator_results.csv', index=False)


In [11]:
for i in results_df['DI']:
    print(i)

-0.05278592375366569
-1.0
-0.05699481865284974
-0.019762845849802372
-0.1
-1.0
-0.031055900621118012
-0.11715481171548117
-0.08853118712273642
-0.09230769230769231
-0.09433962264150944
-0.1146278870829769
-0.04880694143167028
-0.11389128559102675
-0.03254437869822485
-0.025
-0.2765957446808511
-0.03128911138923655
-0.2535211267605634
-0.041666666666666664
-0.020942408376963352
-0.07977736549165121
-0.018606024808033077
-0.31517183570829843
-0.19348327566320647
-0.3189806678383128
-0.08433734939759036
-0.025974025974025976
-0.1553398058252427
-0.04019053289669545
-0.07894736842105263
-0.09107373868046571
-0.0577485380116959
-0.004862236628849271
-0.04317180616740088
-0.00477815699658703
-0.016877637130801686
-0.02072538860103627
-0.1111759556463379
-0.01448382126348228
-0.12194412107101281
-0.04766107678729038
-0.005
-0.03363636363636364
-0.012017710309930424
-0.001771479185119575
-0.03914590747330961
-0.07790663098424026
-0.014975550122249388
-0.03953968722336972
-0.012115258677144728
