# Analyse full screening answers

In [71]:
# imports
import pandas as pd

In [72]:
# for printing issues
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199

In [83]:
# read in dataframe
df = pd.read_excel('..\data\Full Screening Questions (1-619).xlsx')

# Remove non sense column
df.drop(columns=['Name'], inplace=True)

In [84]:
# remove \n from strings
df.replace(to_replace=[r"\\t|\\n|\\r", "\t|\n|\r"], value=["",""], regex=True, inplace=True)

In [85]:
# strip strings in object columns
df_obj = df.select_dtypes(['object'])
df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())

In [86]:
# check for multiple answers on the same paper
df = df.drop_duplicates(subset=['Number - Author Year'], keep='first')

In [87]:
# drop papers that have been excluded due to at least one of the following reasons
# - No concrete supervised medical machine learning use case
df = df[df['Concrete medical supervised Machine Learning usecase?']!='No -> Exclude from review and submit form']
# - No XAI method provided
df = df[df['Which XAI method is used?']!='None -> Exclude from review and submit form;']
# - No image or tabular data as input
df = df[df['Tabular or Image data as input?']!='Text-> Exclude from review and submit form']
df = df[df['Tabular or Image data as input?']!='Audio-> Exclude from review and submit form']

In [100]:
# check for missing values
authors_list = []
for row in df.index:
    df_row = df.loc[row, :]
    
    if pd.isnull(df_row).any():
        authors_list.append(df.loc[row, 'Number - Author Year'])

authors_list = list(set(authors_list))
sorted(authors_list)

['#1049 - Slomka 2020',
 '#1107 - Beluzo 2020',
 '#1190 - Paul 2021',
 '#1208 - PalatnikdeSousa 2021',
 '#1209 - Castano-Candamil 2019',
 '#1230 - Xie 2022',
 '#14 - Kermany 2018',
 '#202 - Vivar 2021',
 '#2071 - Torres-Macho 2020',
 '#2316 - Stojanović 2020',
 '#248 - Gutfleisch 2022',
 '#326 - Kim 2021',
 '#378 - Jiménez-Luna 2022',
 '#396 - Gandin 2021',
 '#414 - DeFalco 2022',
 '#794 - Shetty 2021',
 '#844 - García-Carretero 2021']

In [81]:
df['Concrete medical supervised Machine Learning usecase?'].value_counts(dropna=False)

Yes    443
NaN      8
Name: Concrete medical supervised Machine Learning usecase?, dtype: int64

In [32]:
df['Which XAI method is used?']

0                                                                                                                                                                                                 NaN
1                                                                                           Model is intrinsic interpretable (i.e., decision tree or linear regression)\n;Partial Dependence Plots\n;
2                                                                                                                      Class Activation Mapping or related (i. e., Grad-CAM);guided back-propagation;
3                                                                                                                                                                                    Shannon entropy;
4                                                                                                  Model is intrinsic interpretable (i.e., decision tree or linear regression)\n;Logistic Regression;
5         