In [425]:
# Bibliography Search

In [426]:
# Setup notebook
from msc_code.scripts.notebook_setup import *

In [427]:
input_path = os.path.join(RAW_DATA_DIR, 'bibliography_search')
output_path = os.path.join(PROC_DATA_DIR, 'bibliography_search')

In [428]:
# import included studies
import_path = os.path.join(PROC_DATA_DIR, 'full_text_screen', 'full_text_screen_end_final.csv')
ft_data = pd.read_csv(import_path)
included_ids = list(ft_data[ft_data['Exclude_FINAL'] == False]['Study_ID'])

In [429]:
# Create dataframe for bibliography search
bib_search_df = pd.DataFrame(columns=['Study_ID', 'Bib_Search_Complete'])

bib_search_df['Study_ID'] = included_ids

file_path = os.path.join(output_path, 'bib_search_start.csv')
bib_search_df.to_csv(file_path, index=False)

In [430]:
# Reimport data from bibliography search
file_path = os.path.join(input_path, 'bib_search_end.xlsx')
bib_search_df = pd.read_excel(file_path)

In [431]:
# Filter out excluded papers
bib_search_df = bib_search_df[bib_search_df['Study_ID'].isin(included_ids)]

In [432]:
bib_search_df['Bib_Search_Complete'] = bib_search_df['Bib_Search_Complete'].astype(str)

status_map = {
    'nan': 'Awaiting Search',
    'Y': 'Search Complete',
    'N': 'For second review'
}

bib_search_df['Bib_Search_Complete'] = bib_search_df['Bib_Search_Complete'].map(status_map)

In [433]:
bib_search_df['Bib_Search_Complete'].value_counts()

Bib_Search_Complete
Search Complete    96
Name: count, dtype: int64

In [434]:
# Import bibliography search results
import_path = os.path.join(input_path, 'bib_search_results.csv')
bib_search_df = pd.read_csv(import_path)

# Sort Results by Publication Year, Author, Title ascending.
bib_search_df = bib_search_df.sort_values(by=['Publication Year', 'Author', 'Title'], ascending=True).reset_index(drop=True)

# Import initial results to
import_path = os.path.join(PROC_DATA_DIR, "title_abstract_review", "all_results_title_abstract_start.csv")
ta_start = pd.read_csv(import_path)

# Rename id column
ta_start = ta_start.rename(columns={
    'id': 'Study_ID'
})

# Find max study ID
starting_index = ta_start['Study_ID'].max() + 1

# Create Study_ID for items in bibliography search
bib_search_df['Study_ID'] = range(starting_index, starting_index + len(bib_search_df))

# Export to CSV
file_path = os.path.join(output_path, 'bib_search_results.csv')
bib_search_df.to_csv(file_path)

In [435]:
bib_search_df.columns

Index(['Key', 'Item Type', 'Publication Year', 'Author', 'Title',
       'Publication Title', 'ISBN', 'ISSN', 'DOI', 'Url', 'Abstract Note',
       'Date', 'Date Added', 'Date Modified', 'Access Date', 'Pages',
       'Num Pages', 'Issue', 'Volume', 'Number Of Volumes',
       'Journal Abbreviation', 'Short Title', 'Series', 'Series Number',
       'Series Text', 'Series Title', 'Publisher', 'Place', 'Language',
       'Rights', 'Type', 'Archive', 'Archive Location', 'Library Catalog',
       'Call Number', 'Extra', 'Notes', 'File Attachments', 'Link Attachments',
       'Manual Tags', 'Automatic Tags', 'Editor', 'Series Editor',
       'Translator', 'Contributor', 'Attorney Agent', 'Book Author',
       'Cast Member', 'Commenter', 'Composer', 'Cosponsor', 'Counsel',
       'Interviewer', 'Producer', 'Recipient', 'Reviewed Author',
       'Scriptwriter', 'Words By', 'Guest', 'Number', 'Edition',
       'Running Time', 'Scale', 'Medium', 'Artwork Size', 'Filing Date',
       'Applicatio

In [436]:
# Compare to title abstract screening data

import_path = os.path.join(PROC_DATA_DIR, 'title_abstract_review', 'title_abstract_review_FINAL.csv')
ta_review = pd.read_csv(import_path)

print(f'{len(bib_search_df)} items in bib_search_results before duplicate removal')
print(f'{len(bib_search_df)} items in bib_search_results after duplicate titles removed')
duplicates = pd.concat([bib_search_df[bib_search_df['DOI'].isin(ta_review['DOI'])],
                       bib_search_df[bib_search_df['Title'].isin(ta_review['Title'])]])

duplicate_ids = list(duplicates['Study_ID'])
bib_search_df = bib_search_df[~bib_search_df['Study_ID'].isin(duplicate_ids)]

print(f'{len(bib_search_df)} items in bib_search_results after duplicate DOI removal')

204 items in bib_search_results before duplicate removal
204 items in bib_search_results after duplicate titles removed
156 items in bib_search_results after duplicate DOI removal


In [437]:
ta_review = ta_review.rename(columns={
    'id': 'Study_ID'
})

In [438]:
import_path = os.path.join(PROC_DATA_DIR, 'full_text_screen', 'full_text_screen_end_final.csv')
ft_review = pd.read_csv(import_path) 

ft_review = pd.merge(
    ft_review,
    ta_review[['Study_ID', 'DOI']],
    how='left',
    on='Study_ID'
)

# Create list of excluded papers
ft_excluded = ft_review[ft_review['Exclude_FINAL'] == True]

# Remove duplicates that have a doi in the list of excluded papers
print(f"{len(bib_search_df)} items in bib_search_results before comparison with full_text_screen.")
exclusions = bib_search_df[bib_search_df['DOI'].isin(ft_excluded['DOI'])]

bib_search_df = bib_search_df[~bib_search_df.isin(exclusions)]
print(f"{len(bib_search_df)} items in bib_search_results after DOI comparison")

156 items in bib_search_results before comparison with full_text_screen.
156 items in bib_search_results after DOI comparison


In [448]:
pre_screen_exclusions = pd.concat([
    duplicates,
    exclusions
])

# Export pre-screening exclusions (Duplicate papers and items already excluded).
export_path = os.path.join(output_path, 'pre_screen_exclusions.csv')
pre_screen_exclusions.to_csv(export_path)

In [439]:
bib_search_df = bib_search_df.rename(columns={
    'Access Date': 'Accessed'
})

In [440]:
screening_columns = ['Study_ID', 'Publication Year', 'Author', 'Title',	'Publication Title', 'Database', 'Exclude', 'Reason ID', 'Paediatric', 'Intention Reported', 'Deliberate intention', 'Unclear', 'Accessed', 'Comments']

In [441]:
bib_search_df.columns

Index(['Key', 'Item Type', 'Publication Year', 'Author', 'Title',
       'Publication Title', 'ISBN', 'ISSN', 'DOI', 'Url', 'Abstract Note',
       'Date', 'Date Added', 'Date Modified', 'Accessed', 'Pages', 'Num Pages',
       'Issue', 'Volume', 'Number Of Volumes', 'Journal Abbreviation',
       'Short Title', 'Series', 'Series Number', 'Series Text', 'Series Title',
       'Publisher', 'Place', 'Language', 'Rights', 'Type', 'Archive',
       'Archive Location', 'Library Catalog', 'Call Number', 'Extra', 'Notes',
       'File Attachments', 'Link Attachments', 'Manual Tags', 'Automatic Tags',
       'Editor', 'Series Editor', 'Translator', 'Contributor',
       'Attorney Agent', 'Book Author', 'Cast Member', 'Commenter', 'Composer',
       'Cosponsor', 'Counsel', 'Interviewer', 'Producer', 'Recipient',
       'Reviewed Author', 'Scriptwriter', 'Words By', 'Guest', 'Number',
       'Edition', 'Running Time', 'Scale', 'Medium', 'Artwork Size',
       'Filing Date', 'Application Number',

In [442]:
bib_search_screening = pd.DataFrame(columns=screening_columns)
bib_search_screening

Unnamed: 0,Study_ID,Publication Year,Author,Title,Publication Title,Database,Exclude,Reason ID,Paediatric,Intention Reported,Deliberate intention,Unclear,Accessed,Comments


In [443]:
# Create a DataFrame with all screening columns, indexed the same as bib_search_df
bib_search_screening = pd.DataFrame(index=bib_search_df.index, columns=screening_columns)

# Fill in matching columns from bib_search_df
for col in screening_columns:
    if col in bib_search_df.columns:
        bib_search_screening[col] = bib_search_df[col]

bib_search_screening['Database'] = "Bibliography Search"

bib_search_screening['Publication Year'] = bib_search_screening['Publication Year'].astype(int)
bib_search_screening['Study_ID'] = bib_search_screening['Study_ID'].astype(int)

bib_search_screening

Unnamed: 0,Study_ID,Publication Year,Author,Title,Publication Title,Database,Exclude,Reason ID,Paediatric,Intention Reported,Deliberate intention,Unclear,Accessed,Comments
0,492,1941,"Macmanus, Joseph E.",Perforations of the intestine by ingested fore...,The American Journal of Surgery,Bibliography Search,,,,,,,2025-04-14 17:11:45,
3,495,1969,"Johnson, Wilbur E.",On Ingestion of Razor Blades,JAMA,Bibliography Search,,,,,,,2025-04-14 14:35:26,
4,496,1969,"Schechter, D. C.; Gilbert, L.",Injuries of the heart and great vessels due to...,Thorax,Bibliography Search,,,,,,,,
5,497,1971,"Balch, Charles M.; Silver, Donald",Foreign Bodies in the Appendix,Archives of Surgery,Bibliography Search,,,,,,,2025-04-14 16:07:04,
6,498,1974,"Witzel, L.; Scheurer, U.; Mühlemann, A.; Halte...",Removal of razor blades from stomach with fibr...,British Medical Journal,Bibliography Search,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191,683,2022,"Yan, Tyler D.; Leung, Philemon H. Y.; Zwirewic...",An unusual cause of pericardial effusion: A ca...,International Journal of Surgery Case Reports,Bibliography Search,,,,,,,2025-04-14 14:09:48,
192,684,2023,"Anand, Madhur; Topno, Noor; Lynrah, Kyrshanlan...",Doormatobezoar: first case report of a bezoar ...,International Surgery Journal,Bibliography Search,,,,,,,2025-04-14 12:39:27,
194,686,2023,"Jin, Shengjian; Horiguchi, Taigo; Ma, Xiaolong...",Metallic foreign bodies ingestion by schizophr...,Annals of Medicine and Surgery,Bibliography Search,,,,,,,2025-03-30 10:22:52,
195,687,2023,"Ngu, Natalie Lee Yee; Karp, Jadon; Taylor, Kir...","Patient characteristics, outcomes and hospital...",BMJ Open Gastroenterology,Bibliography Search,,,,,,,2025-03-30 10:21:11,


In [444]:
# Export to CSV
export_path = os.path.join(output_path, 'bib_search_screen_start.csv')
bib_search_screening.to_csv(export_path, index=False)

In [445]:
exclusion_criteria

{1: 'Full text not available in English.',
 2: 'Studies not focusing on intentional self-ingestion (into the gastrointestinal tract) of foreign object via the oral cavity (mouth) or where unclear if ingested.',
 3: 'Studies focussing solely on accidental ingestion.',
 4: 'Non-Human/ animal studies.',
 5: 'Reviews, editorials, commentaries, and opinion pieces without original empirical data.',
 6: 'Duplicate publications or studies with overlapping data sets (the most comprehensive or recent study will be included).',
 7: 'Studies focusing on ingestion or co-ingestion of substances (e.g. poisons, medications) rather than physical foreign objects.',
 8: 'Ingestions undertaken in controlled environment as part of voluntary study.',
 9: 'Ingestions not explicitly stated to be intentional and history not suggestive of deliberate ingestion (i.e. Age < 8, no history of previous ingestions, no psychiatric co-morbidities, not a prisoner/detainee/vulnerable group).',
 10: 'Does not meet inclusio