In [3]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import random
import numpy as np
from bibtexparser.bibdatabase import BibDatabase
from bibtexparser.bparser import BibTexParser
import bibtexparser

file_path = "/mnt/c/Users/Charlotte/Nextcloud2/Uni/Masterarbeit/Fulltext screening/dataExtractionForm_v2.7_2025-12-06.xlsx"

df_DEF = pd.read_excel(file_path, sheet_name="DEF")

df_paper_list = pd.read_excel(file_path, sheet_name="Full cited-by list")


## Cleaning DEF

df_DEF = df_DEF.transpose()

# Get rid of row that contains notes
df_DEF = df_DEF.drop("Unnamed: 2")

# Make Questions column names
# new_columns = df_DEF.iloc[0].astype(str) + ' ' + df_DEF.iloc[1].astype(str)
new_columns = df_DEF.iloc[1].astype(str)

df_DEF = df_DEF.drop(df_DEF.index[[0, 1]]).reset_index(drop=True)
df_DEF.columns = new_columns


In [42]:
# Add additional purpose as own column 
column = 'Additional interpretations'
new_column_name = 'Visually localizing lesions as part of the final tool'  # Name der neuen Spalte

# Neue Spalte erstellen: "yes", wenn der Eintrag exakt übereinstimmt, sonst NaN (oder z. B. "no")
df_DEF[new_column_name] = df_DEF[column].apply(
    lambda x: "yes" if x == "Visually localizing lesions as part of the final tool" else np.nan
)

In [None]:
# To get bibtex label

def extract_label_and_doi(bib_file_path):
    with open(bib_file_path, 'r', encoding='utf-8') as bib_file:
        parser = BibTexParser()
        bib_db = BibDatabase()
        bib_db = bibtexparser.load(bib_file, parser=parser)

    data = []
    for entry in bib_db.entries:
        label = entry['ID']
        doi = entry.get('doi', 'No DOI found')
        data.append({'label': label, 'doi': doi})

    df = pd.DataFrame(data)
    return df

bib_file_path = 'data/Fulltext screening suitable.bib'  
df = extract_label_and_doi(bib_file_path)

In [None]:
columns_purpose = [
       'Model (in)validation', 
       'Data (in)validation',
       'Scientific discovery', 
       'Finding intervention targets',
       'Visually localizing lesions as part of the final tool', 
       'Vague explainability claims']

columns_feature_assumptions = [
       'Features “used” by the model',
       'Features statistically associated',
       'Features causally driving the target',
       'Features caused by the target',
       'Features being confounded',
       'Features acting as suppressors',
       'Features representing outliers or reflecting distribution shifts',
       'Salient image features, e.g. edges',
       'Other assumptions']

columns_consequences = [
       'Model rejection, refinement or retraining', 
       'Training data (and thus model) rejection',
       'Test data point rejection',
       'Data cleaning, denoising, artifact removal/mitigation etc.',
       'Independent replication due to validity concerns',
       '“Clearing” for subsequent use or next validation stage',
       'Followup-study to test the associational/causal role of identified features',
       'Prospective intervention on a feature to change/improve model output',
       'Prospective intervention on a feature to change/improve target variable in the real world',
       'Other consequences']

columns_purpose_replacement = {
       'Model (in)validation' : "a", 
       'Data (in)validation': "b",
       'Scientific discovery': "c", 
       'Finding intervention targets': "d",
       'Visually localizing lesions as part of the final tool': "e",
       'Vague explainability claims': "f",
}

columns_feature_assumptions_replacement = {
       'Features “used” by the model' : "a",
       'Features statistically associated' : "b",
       'Features causally driving the target' : "c",
       'Features caused by the target' : "d",
       'Features being confounded' : "e",
       'Features acting as suppressors' : "f",
       'Features representing outliers or reflecting distribution shifts' : "g",
       'Salient image features, e.g. edges' : "h",
       'Other assumptions' : "i"}

columns_consequences_replacement = {
       'Model rejection, refinement or retraining': "a" ,
       'Training data (and thus model) rejection': "b",
       'Test data point rejection': "c",
       'Data cleaning, denoising, artifact removal/mitigation etc.': "d",
       'Independent replication due to validity concerns': "e",
       '“Clearing” for subsequent use or next validation stage': "f",
       'Followup-study to test the associational/causal role of identified features': "g",
       'Prospective intervention on a feature to change/improve model output': "h",
       'Prospective intervention on a feature to change/improve target variable in the real world': "i",
       'Other consequences': "j"}



In [None]:
# Merge Bib data to DEF data by using the DOI

# Merge the DataFrames on the 'doi' column in df and 'DOI' column in df_paper_list
merged_df = pd.merge(df, df_paper_list[['DOI', 'EID', 'Cited by']], left_on='doi', right_on='DOI', how='left')

merged_df = merged_df.sort_values(by=['Cited by'], ascending = False)

merged_df = merged_df.drop(columns=['DOI', 'doi', 'Cited by'])

# Add wanted data for the table from the DEF

columns = ['Paper EID',"Which XAI methods are used?",
                "Which medical imaging modality is used?",
                "Which anatomical regions are shown in the images?"
        ] + columns_purpose + columns_feature_assumptions + columns_consequences

merged_df = pd.merge(merged_df, df_DEF[columns], left_on='EID', right_on='Paper EID', how='left')

merged_df = merged_df.drop(columns=['Paper EID', 'EID'])

### Preparing Claim values


In [54]:
# Replacing claim values with letters
for spalte, ersatzwert in columns_purpose_replacement.items():
    merged_df.loc[
        (~merged_df[spalte].isin(['NR', 'N/A'])) & (~merged_df[spalte].isna()),
        spalte
    ] = ersatzwert

for spalte, ersatzwert in columns_feature_assumptions_replacement.items():
    merged_df.loc[
        (~merged_df[spalte].isin(['NR', 'N/A'])) & (~merged_df[spalte].isna()),
        spalte
    ] = ersatzwert

for spalte, ersatzwert in columns_consequences_replacement.items():
    merged_df.loc[
        (~merged_df[spalte].isin(['NR', 'N/A'])) & (~merged_df[spalte].isna()),
        spalte
    ] = ersatzwert

In [55]:
# Zusammenführen der Spalten Purpose
merged_df['Purpose'] = merged_df[columns_purpose].apply(
    lambda row: ', '.join(
        [str(value) for value in row if pd.notna(value) and value not in ['NR', 'N/A']]
    ),
    axis=1
)
merged_df = merged_df.drop(columns=columns_purpose)

# Zusammenführen der Spalten Feature assumptions
merged_df['Feature assumptions'] = merged_df[columns_feature_assumptions].apply(
    lambda row: ', '.join(
        [str(value) for value in row if pd.notna(value) and value not in ['NR', 'N/A']]
    ),
    axis=1
)
merged_df = merged_df.drop(columns=columns_feature_assumptions)

# Zusammenführen der Spalten Consequences
merged_df['Consequences'] = merged_df[columns_consequences].apply(
    lambda row: ', '.join(
        [str(value) for value in row if pd.notna(value) and value not in ['NR', 'N/A']]
    ),
    axis=1
)
merged_df = merged_df.drop(columns=columns_consequences)



In [56]:
merged_df = merged_df.rename(
    columns={"label": "Article", 
            'Which XAI methods are used?': 'Applied XAI methods', 
            "Which medical imaging modality is used?": "Imaging Modality", "Which parts of the body are shown in the medical image?": "Anatomical regions"}) 

In [57]:
# Replace each entry in the "Article" column with "\cite{x}"
merged_df['Article'] = merged_df['Article'].apply(lambda x: f"\\cite{{{x}}}")


In [58]:
latex_table = merged_df.to_latex(index=False, longtable = True)
print(latex_table)

\begin{longtable}{lllllll}
\toprule
Article & Applied XAI methods & Imaging Modality & Anatomical regions & Purpose & Feature assumptions & Consequences \\
\midrule
\endfirsthead
\toprule
Article & Applied XAI methods & Imaging Modality & Anatomical regions & Purpose & Feature assumptions & Consequences \\
\midrule
\endhead
\midrule
\multicolumn{7}{r}{Continued on next page} \\
\midrule
\endfoot
\bottomrule
\endlastfoot
\cite{kermany_identifying_2018} & Occlusion Sensitivity & Optical coherence tomography & Eye & a, f & a, b &  \\
\cite{ozturk_automated_2020} & Grad-CAM & x-ray & Chest & a, e & a &  \\
\cite{li_using_2020} & Grad-CAM & CT & Chest & a, f & a, b &  \\
\cite{rajpurkar_deep_2018} & CAM & x-ray & Chest & a, f & a, b, c, h &  \\
\cite{rahman_exploring_2021} & Score-CAM & x-ray & Chest & a, f & a & a \\
\cite{polsinelli_light_2020} & CAM & CT & Chest & a, b, f & a, b & b \\
\cite{gu_ca-net_2021} & Own method & Dermoscopic images, MRI & Skin, Fetal & a, f & a, b &  \\
\cite{wa

In [None]:
columns =  ["Article", "Applied XAI methods", "Imaging modality", "Anatomical regions", "Purposes", "Feature assumptions", "Suggested Consequences"]

