# Title and Abstract Review
**Author:** Jack Galbraith-Edge

In [2]:
from msc_code.scripts.helpers import *
from msc_code.scripts.notebook_setup import *

Notebook setup complete.


In [3]:
# Define exclusion criteria:
exclusion_criteria = {
    1: "Full text not available in English.",
    2: "Studies not focusing on intentional ingestion of foreign object via the oral cavity (mouth).",
    3: "Studies focussing solely on accidental ingestion.",
    4: "Non-Human/Animal studies.",
    5: "Reviews, editorials, commentaries, and opinion pieces without original empirical data.",
    6: "Duplicate publications or studies with overlapping data sets (the most comprehensive or recent study will be included).",
    7: "Studies focusing on ingestion of substances (e.g. poisons, medications) rather than physical foreign objects.",
    8: "Ingestions undertaken in controlled environment as part of voluntary study."
}

In [4]:
# Import google scholar and database search data
google_df = pd.read_csv("/".join([RAW_DATA_DIR, "google_scholar", "cleaned_google_scholar_title_abstract_screen_end_1.csv"]))
database_df = pd.read_csv("/".join([RAW_DATA_DIR, "database_search", "database_search_title_abstract_screen_end.csv"]))

In [5]:
# remove duplicates from google dataframe that were identified in title and abstract review
google_df = google_df[google_df['Duplicate'] != True]

In [6]:
# Merge all Google Scholar and Database search results
all_df = pd.merge(
    database_df, google_df,
    how='outer',
    on=["Publication Year", 
        "First Author", 
        "Authors",
        "Publication Title",
        "Title",
        "Abstract",
        "Database",
        "Exclude",
        "Reason ID",
        "DOI"]
)

# Specify Desired Column Order
desired_column_order = ["Publication Year", "First Author", "Authors", "Publication Title", "Summary", "Database", "DOI", "URL", "Title", "Abstract", "Abstract2", "Exclude", "Reason ID"]

# Apply to dataframe
all_df = all_df[desired_column_order]

# Fix capitalisation on of fields
all_df["Title"] = all_df["Title"].str.title()
all_df["First Author"] = all_df["First Author"].str.title()
all_df["Authors"] = all_df["Authors"].str.title()
all_df["Abstract"] = all_df["Abstract"].str.title()

# Sort by Publication Year ascending.
all_df = all_df.sort_values(by="Publication Year", ascending=True).reset_index(drop=True)

all_df.index.name = 'id' # Name the index 'id', giving each paper a unique ID.

In [7]:
# Export all JGE title and abstract reviewed articles
all_df.to_csv("/".join([RAW_DATA_DIR, "title_abstract_review", "all_results_title_abstract_reviewed_jge_end.csv"]))

In [8]:
# Create clean dataframe to export for other author review
clean_df = all_df

# Remove reasons for exlusion and previously marked exclusions for clean export and second author review.
clean_df['Exclude'] = None
clean_df['Reason ID'] = None

# Export to CSV
clean_df.to_csv("/".join([PROC_DATA_DIR, "title_abstract_review", "all_results_title_abstract_start.csv"]))

In [9]:
# Create sample of 10% of papers
clean_sample_df = create_results_sample(clean_df, 0.1, 42) # '42' argument specificies random number to great sample from

# export clean data to raw data directory as ms_title_abstract review start.csv
clean_sample_df.to_csv("/".join([PROC_DATA_DIR, "title_abstract_review", "ms_title_abstract_review_start.csv"]))

Given sample size: 492
Calculating desired sample size... 10% of 492 = 49
Creating Sample Dataframe


In [10]:
# Import second author (MS) Title Abstract Review Data
ms_reviewed_df = pd.read_csv("/".join([RAW_DATA_DIR, "title_abstract_review", "ms_title_abstract_review_end.csv"]))

In [11]:
# Import title and abstract review results from JGE.
jge_reviewed_df = pd.read_csv("/".join([RAW_DATA_DIR, "title_abstract_review", "all_results_title_abstract_reviewed_jge_end.csv"]))

# Rename columns before merge with second author (MS) review data.
jge_reviewed_df = jge_reviewed_df.rename(columns={
    'Exclude': 'Exclude_JGE',
    'Reason ID': 'Reason ID_JGE'
})

In [12]:
# Drop columns in ms_reviewed_df that are redundant in jge_reviewed_df (except for columns to add)
ms_reviewed_df = ms_reviewed_df[['id', 'Exclude', 'Reason ID']]

# Rename columns in the reviewed dataset
ms_reviewed_df.rename(columns={
    'Exclude': 'Exclude_MS', 
    'Reason ID': 'Reason ID_MS'
    }, 
    inplace=True
)

# Merge the datasets rightwards, so only papers review by MS and JGE are together.
jge_ms_reviewed_df = pd.merge(jge_reviewed_df, ms_reviewed_df, on='id', how='right')

In [13]:
# See how many papers JGE and MS agree on.
jge_ms_agree = jge_ms_reviewed_df[jge_ms_reviewed_df['Exclude_JGE'] == jge_ms_reviewed_df['Exclude_MS']] # create JGE/MS agreement dataframe
print(f"JGE and MS agree on {len(jge_ms_agree)}/{len(jge_ms_reviewed_df)} reviewed by MS.")

jge_ms_disagree = jge_ms_reviewed_df[jge_ms_reviewed_df['Exclude_JGE'] != jge_ms_reviewed_df['Exclude_MS']] # create JGE/MS disagreement dataframe
print(f"JGE and MS disagree on {len(jge_ms_disagree)}/{len(jge_ms_reviewed_df)} review by MS.") 

# Export to CSV
jge_ms_disagree.to_csv("/".join([PROC_DATA_DIR, "title_abstract_review", "jge_ms_disagree_title_abstract.csv"]), index=False) # Export JGE/MS disagreements
jge_ms_agree.to_csv("/".join([PROC_DATA_DIR, "title_abstract_review", "jge_ms_agree_title_abstract.csv"]), index=False) # Export JGE/MS aggreements

# Calculate Cohen's Kappa
jge_include = jge_ms_reviewed_df[jge_ms_reviewed_df['Exclude_JGE'] == False] # create dataframe of ms and jge reviewed articles that jge wishes to include
jge_exclude = jge_ms_reviewed_df[jge_ms_reviewed_df['Exclude_JGE'] == True] # create dataframe of ms and jge reviewed articles that jge wishes to exclude
ms_include = jge_ms_reviewed_df[jge_ms_reviewed_df['Exclude_MS'] == False]  # create dataframe of ms and jge reviewed articles that ms wishes to include
ms_exclude = jge_ms_reviewed_df[jge_ms_reviewed_df['Exclude_MS'] == True]   # create dataframe of ms and jge reviewed articles that ms wishes to exclude

ms_reviewed_count = len(ms_reviewed_df) # calculate number of papers review by JGE and MS
jge_ms_agree_count = len(jge_ms_agree) # calculate number of papers JGE and MS agree on

jge_include_count = len(jge_include) # calculate number of JGE inclusions
jge_exclude_count = len(jge_exclude) # calculate number of JGE exclusions
ms_include_count = len(ms_include) # calculate number of MS inclusion
ms_exclude_count = len(ms_exclude) # calculate number of MS exclusions

# Observer agreement
P_o = jge_ms_agree_count / ms_reviewed_count

# Expected agreement
P_e =   ((jge_include_count / ms_reviewed_count) * (ms_include_count / ms_reviewed_count)) + \
        ((jge_exclude_count / ms_reviewed_count) * (ms_exclude_count / ms_reviewed_count))

# Cohen's Kappa
kappa = (P_o - P_e) / (1 - P_e)

print(f"Cohen's Kappa: {kappa:.3f}")

JGE and MS agree on 30/50 reviewed by MS.
JGE and MS disagree on 20/50 review by MS.
Cohen's Kappa: 0.299


At this point, the disagreements were then sent to my supervisor (GC) for a third review

In [14]:
gc_reviewed_df = pd.read_csv("/".join([RAW_DATA_DIR, "title_abstract_review", "gc_review_title_abstract_review_end.csv"]))

gc_reviewed_df = gc_reviewed_df[["id", "Exclude_GC", "Reason ID_GC", "Review_GC", "Comments_GC"]]

In [15]:
# Merge all JGE reviewed articles and all MS review articles
reviewed_df = pd.merge(
    jge_reviewed_df,
    ms_reviewed_df,
    on='id',
    how='left'
)

# Merge GC reviewed articles with these
reviewed_df = pd.merge(
    reviewed_df,
    gc_reviewed_df,
    on='id',
    how='left'
)

In [16]:
# Export to CSV
reviewed_df.to_csv("/".join([PROC_DATA_DIR, "title_abstract_review", "title_abstract_review_complete.csv"]))

In [17]:
# Merge the datasets rightwards, so only papers review by GC and JGE are together.
jge_gc_reviewed_df = pd.merge(jge_reviewed_df, gc_reviewed_df, on='id', how='right')

# Calculate Cohen's Kappa between JGE and GC

# Calculate agreement
jge_gc_agree = jge_gc_reviewed_df[jge_gc_reviewed_df['Exclude_JGE'] == jge_gc_reviewed_df['Exclude_GC']]
print(f"JGE and GC agree on {len(jge_gc_agree)}/{len(jge_gc_reviewed_df)} results reviewed by GC.")

# Calculate disagreement
jge_gc_disagree = jge_gc_reviewed_df[jge_gc_reviewed_df['Exclude_JGE'] != jge_gc_reviewed_df['Exclude_GC']]
print(f"JGE and GC disagree on {len(jge_gc_disagree)}/{len(jge_gc_reviewed_df)} results reviewed by GC.")

# Export to CSV
jge_gc_agree.to_csv("/".join([PROC_DATA_DIR, "title_abstract_review", "jge_gc_agree_title_abstract.csv"]), index=False)
jge_gc_disagree.to_csv("/".join([PROC_DATA_DIR, "title_abstract_review", "jge_gc_disagree_title_abstract.csv"]), index=False)

# Calculate Cohen's Kappa
jge_include = jge_gc_reviewed_df[jge_gc_reviewed_df['Exclude_JGE'] == False] 
jge_exclude = jge_gc_reviewed_df[jge_gc_reviewed_df['Exclude_JGE'] == True]
gc_include = jge_gc_reviewed_df[jge_gc_reviewed_df['Exclude_GC'] == False]
gc_exclude = jge_gc_reviewed_df[jge_gc_reviewed_df['Exclude_GC'] == True]

gc_reviewed_count = len(gc_reviewed_df) # Calculate number of results GC reviewed.
jge_gc_agree_count = len(jge_gc_agree) # Calculate number of results JGE and GC agree on.

jge_include_count = len(jge_include) # Calculate number of JGE inclusions
jge_exclude_count = len(jge_exclude) # Calculate number of JGE exclusions
gc_include_count = len(gc_include) # Cacluate number of GC inclusions
gc_exclude_count = len(gc_exclude) # Calculate number of GC exclusions

# Observer agreement
P_o = jge_gc_agree_count / gc_reviewed_count
print(f"Observed agreement (P_o): {P_o:.3f}")

# Expected agreement 
P_e =   ((jge_include_count / gc_reviewed_count) * (gc_include_count / gc_reviewed_count)) + \
        ((jge_exclude_count / gc_reviewed_count) * (gc_exclude_count / gc_reviewed_count))
print(f"Expected agreement (P_e): {P_e:.3f}")

# Cohen's Kappa
kappa = (P_o - P_e) / (1 - P_e)

print(f"Cohen's Kappa: {kappa:.3f}")

print(f"""As part of the review process, disputed cases (n={len(jge_gc_reviewed_df)}) between 
      the primary author (JGE) and a second reviewer (MS) were adjudicated by the supervisor (GC). 
      Agreement between JGE and GC was achieved in {(jge_gc_agree_count / gc_reviewed_count):.0%} of the cases ({jge_gc_agree_count}/{gc_reviewed_count}). 
      Cohen’s Kappa was {kappa:.3f}, reflecting the small sample size and the inherent bias in 
      focusing solely on disputed cases. 
      The primary objective of this step was to reach consensus and ensure consistency in decision-making 
      rather than measure interrater reliability across the entire dataset.
      """)

JGE and GC agree on 6/14 results reviewed by GC.
JGE and GC disagree on 8/14 results reviewed by GC.
Observed agreement (P_o): 0.429
Expected agreement (P_e): 0.551
Cohen's Kappa: -0.273
As part of the review process, disputed cases (n=14) between 
      the primary author (JGE) and a second reviewer (MS) were adjudicated by the supervisor (GC). 
      Agreement between JGE and GC was achieved in 43% of the cases (6/14). 
      Cohen’s Kappa was -0.273, reflecting the small sample size and the inherent bias in 
      focusing solely on disputed cases. 
      The primary objective of this step was to reach consensus and ensure consistency in decision-making 
      rather than measure interrater reliability across the entire dataset.
      


In [18]:
# Merge the datasets rightwards, so only papers review by MS and JGE are together.
ms_gc_reviewed_df = pd.merge(ms_reviewed_df, gc_reviewed_df, on='id', how="right")

# Calculate Cohen's Kappa between JGE and GC

# Calculate agreement
ms_gc_agree = ms_gc_reviewed_df[ms_gc_reviewed_df['Exclude_MS'] == ms_gc_reviewed_df['Exclude_GC']]
print(f"MS and GC agree on {len(ms_gc_agree)}/{len(ms_gc_reviewed_df)} results reviewed by GC.")

# Calculate disagreement
ms_gc_disagree = ms_gc_reviewed_df[ms_gc_reviewed_df['Exclude_MS'] != ms_gc_reviewed_df['Exclude_GC']]
print(f"MS and GC disagree on {len(ms_gc_disagree)}/{len(ms_gc_reviewed_df)} results reviewed by GC.")

# Export to CSV
ms_gc_agree.to_csv("/".join([PROC_DATA_DIR, "title_abstract_review", "ms_gc_agree_title_abstract.csv"]), index=False)
ms_gc_disagree.to_csv("/".join([PROC_DATA_DIR, "title_abstract_review", "ms_gc_disagree_title_abstract.csv"]), index=False)

# Calculate Cohen's Kappa
ms_include = ms_gc_reviewed_df[ms_gc_reviewed_df['Exclude_MS'] == False] 
ms_exclude = ms_gc_reviewed_df[ms_gc_reviewed_df['Exclude_MS'] == True]
gc_include = ms_gc_reviewed_df[ms_gc_reviewed_df['Exclude_GC'] == False]
gc_exclude = ms_gc_reviewed_df[ms_gc_reviewed_df['Exclude_GC'] == True]

gc_reviewed_count = len(gc_reviewed_df) # Calculate number of results GC reviewed.
ms_gc_agree_count = len(ms_gc_agree) # Calculate number of results MS and GC agree on.

ms_include_count = len(ms_include) # Calculate number of MS inclusions
ms_exclude_count = len(ms_exclude) # Calculate number of MS exclusions
gc_include_count = len(gc_include) # Cacluate number of GC inclusions
gc_exclude_count = len(gc_exclude) # Calculate number of GC exclusions

# Observer agreement
P_o = ms_gc_agree_count / gc_reviewed_count
print(f"Observed agreement (P_o): {P_o:.3f}")

# Expected agreement 
P_e =   ((ms_include_count / gc_reviewed_count) * (gc_include_count / gc_reviewed_count)) + \
        ((ms_exclude_count / gc_reviewed_count) * (gc_exclude_count / gc_reviewed_count))
print(f"Expected agreement (P_e): {P_e:.3f}")

# Cohen's Kappa
kappa = (P_o - P_e) / (1 - P_e)

print(f"Cohen's Kappa: {kappa:.3f}")

print(f"""As part of the review process, disputed cases (n={len(jge_gc_reviewed_df)}) between 
      the primary author (JGE) and a second reviewer (MS) were adjudicated by the supervisor (GC). 
      Agreement between MS and GC was achieved in {(jge_gc_agree_count / gc_reviewed_count):.0%} of the cases ({jge_gc_agree_count}/{gc_reviewed_count}). 
      Cohen’s Kappa was {kappa:.3f}.
      """)

MS and GC agree on 7/14 results reviewed by GC.
MS and GC disagree on 7/14 results reviewed by GC.
Observed agreement (P_o): 0.500
Expected agreement (P_e): 0.439
Cohen's Kappa: 0.109
As part of the review process, disputed cases (n=14) between 
      the primary author (JGE) and a second reviewer (MS) were adjudicated by the supervisor (GC). 
      Agreement between MS and GC was achieved in 43% of the cases (6/14). 
      Cohen’s Kappa was 0.109.
      


In response to the adjucation of the JGE-MS title and abstract review by GC, GC comments:

"Just gone through them. I’m afraid I’ve probably muddied the waters still further. I’ve put ‘unclear’ for a few of them, that might be resolved by looking at the full text. A key question for your exclusion criteria is: are you including children who swallow things like coins and magnets? It’s debatable whether these are ‘intentional’ – children often just put them in their mouths and accidentally swallow them. Even when they do intentionally swallow, it’s unlikely to be DSH. So it depends a bit on what you mean by ‘intentional’. Intending to put it in their mouth? Intending to swallow? Intending to cause themselves harm?"

In [19]:
# Create Exclude_FINAL column in reviewed dataframe to store final decision
reviewed_df['Exclude_FINAL'] = None

In [20]:
# Ensure 'Exclude_FINAL' exists in the DataFrame
reviewed_df['Exclude_FINAL'] = np.nan

reviewed_df['Exclude_FINAL'] = reviewed_df.apply(
    lambda row: row['Exclude_JGE']
    if pd.isna(row['Exclude_MS'])  # If MS is NaN, take JGE's decision
    else (row['Exclude_JGE']
          if row['Exclude_JGE'] == row['Exclude_MS']  # If JGE and MS agree
          else row['Exclude_GC']),  # If JGE and MS disagree, take GC's decision
    axis=1
)

In [21]:
# Create dataframe containing only reviewer decisions, not paper information, just paper ID.
reviewer_decisions_df = reviewed_df[['id', 'Exclude_JGE', 'Reason ID_JGE', 'Exclude_MS', 'Reason ID_MS', 'Exclude_GC', 'Reason ID_GC', 'Exclude_FINAL']]

# make index equal to paper id
reviewer_decisions_df.index = reviewer_decisions_df['id']

# Export decisions to CSV
reviewer_decisions_df.to_csv("/".join([PROC_DATA_DIR, "title_abstract_review", "title_abstract_reviewer_decisions_final.csv"]))

In [22]:
# Create dataframes of included and excluded results
title_abstract_include_final_df = reviewed_df[reviewed_df['Exclude_FINAL'] == False] # Included
title_abstract_exclude_final_df = reviewed_df[reviewed_df['Exclude_FINAL'] == True] # Excluded

# Calculate number of inclusions and exclusions
title_abstract_inclusion_count = len(title_abstract_include_final_df) # Inclusions
title_abstract_exclusion_count = len(title_abstract_exclude_final_df) # Exclusions
print(f"{title_abstract_exclusion_count} results were excluded during title and abstract screening.")
print(f"{title_abstract_inclusion_count} results were included during title and abstract screening.")

# Export results to CSV
title_abstract_include_final_df.to_csv("/".join([PROC_DATA_DIR, "title_abstract_review", "title_abstract_review_include_final.csv"]), index=False)
title_abstract_exclude_final_df.to_csv("/".join([PROC_DATA_DIR, "title_abstract_review", "title_abstract_review_exclude_final.csv"]), index=False)

175 results were excluded during title and abstract screening.
311 results were included during title and abstract screening.


In [23]:
# Export to RIS format for import into Zotero reference manager for full text review
output_file = os.path.join(*[PROC_DATA_DIR, "title_abstract_review", "title_abstract_results.ris"])

# Export dataframe to RIS
dataframe_to_ris(title_abstract_include_final_df, output_file)


Unnamed: 0,id,Publication Year,First Author,Authors,Publication Title,Summary,Database,DOI,URL,Title,...,Abstract2,Exclude_JGE,Reason ID_JGE,Exclude_MS,Reason ID_MS,Exclude_GC,Reason ID_GC,Review_GC,Comments_GC,Exclude_FINAL
0,0,0,"Listed, N.",Yee A,,A Yee,Google Scholar,,https://scholar.google.com/scholar?hl=en&as_sd...,K9 Tactical Emergency Casualty Care,...,Abstract not available,True,4.0,True,2.0,,,,,True
1,1,1849,"Ri, Q.",Q Ri,,Q RI - search.proquest.com,Google Scholar,,https://www.proquest.com/openview/eb41c2191c53...,Quarterly Summary Of The Improvements And Disc...,...,Abstract not available,True,4.0,,,,,,,True
2,2,1883,"Howe, A.",Aj Howe,,AJ Howe - 1883 - books.google.com,Google Scholar,,https://books.google.lk/books?hl=en&lr=&id=Ytb...,A Practical And Systematic Treatise On Fractur...,...,Abstract not available,True,2.0,True,2.0,,,,,True
5,5,1895,"Morrison, W.",Wa Morrison,The Boston Medical And Surgical Journal,WA MORRISON - The Boston Medical and Surgical ...,Google Scholar,10.1056/NEJM189502071320604,https://www.nejm.org/doi/pdf/10.1056/nejm18950...,The Value Of The Stomach-Tube In Feeding After...,...,Abstract not available,True,2.0,,,,,,,True
6,6,1901,"Roger, H.",H Roger,,H Roger - 1901 - books.google.com,Google Scholar,,https://books.google.com/books?hl=en&lr=&id=x7...,Introduction To The Study Of Medicine,...,Abstract not available,True,5.0,,,,,,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
473,473,2024,Medina G.,"Medina Gontier, Juli Celina; Wienandts, Lea; E...",American Journal Of Case Reports,,PubMed,10.12659/ajcr.943514,,Unintentional Plastic Blister Ingestion Leadin...,...,,True,3.0,,,,,,,True
479,479,2024,"Jaison, V.","Jaison Varghese J., Aithal V.U., Sharan K., Ma...",Preventive Medicine Reports,,Embase,10.1016/j.pmedr.2024.102713,,Profiling Of Swallowing Function In Head And N...,...,,True,2.0,,,,,,,True
481,481,2024,"Gontier, J.","Gontier, Jcm; Wienandts, L; Endermann, S",American Journal Of Case Reports,,Web of Science,10.12659/a1cr.943514,,Unintentional Plastic Blister Ingestion Leadin...,...,,True,3.0,,,,,,,True
490,490,2024,"Marano, M.","Marano M., Goffredo B.M., Faraci S., Torroni F...",Toxicology Reports,,Embase,10.1016/j.toxrep.2024.101683,,Pharmacokinetic Effects Of Endoscopic Gastric ...,...,,True,7.0,,,,,,,True


#