### Supplemental Tables and Figures

In this notebook, we create supplemental tables that describe the type of data withheld in each split or under other conditions.

In [1]:
import pathlib
import pandas as pd
import dataframe_image as dfi

In [2]:
# columns to select
cols = ["injury_type", "n_wells", "n_compounds"]

# setting up input paths
data_splits_dir = pathlib.Path("../../results/1.data_splits").resolve(strict=True)
fig_dir = pathlib.Path("./figures/supplemental").resolve(strict=True)

# summary info data paths
injury_summary_before_holdout_path = (
    data_splits_dir / "injury_data_summary_before_holdout.csv"
).resolve(strict=True)
injury_summary_after_holdout_path = (
    data_splits_dir / "injury_data_summary_after_holdout.csv"
).resolve(strict=True)
injury_summary_train_split_path = (
    data_splits_dir / "injury_data_summary_train_split.csv"
).resolve(strict=True)
injury_summary_test_split_path = (
    data_splits_dir / "injury_data_summary_test_split.csv"
).resolve(strict=True)

In [3]:
# loading all the data
injury_summary_before_holdout_df = pd.read_csv(injury_summary_before_holdout_path)[cols]
injury_summary_after_holdout_df = pd.read_csv(injury_summary_after_holdout_path)[cols]
injury_summary_train_split_df = pd.read_csv(injury_summary_train_split_path)[cols]
injury_summary_test_split_df = pd.read_csv(injury_summary_test_split_path)[cols]

## showing training datasets before and after holdout

In [4]:
dfi.export(
    injury_summary_before_holdout_df,
    str(fig_dir / "stable_A_injury_summary_before_holdout.png"),
)
injury_summary_before_holdout_df

[0524/075019.692258:INFO:config_dir_policy_loader.cc(118)] Skipping mandatory platform policies because no policy file was found at: /etc/opt/chrome/policies/managed
[0524/075019.692317:INFO:config_dir_policy_loader.cc(118)] Skipping recommended platform policies because no policy file was found at: /etc/opt/chrome/policies/recommended
46795 bytes written to file /tmp/tmp8pcby5jh/temp.png


Unnamed: 0,injury_type,n_wells,n_compounds
0,Control,9855,1
1,Cytoskeletal,1472,15
2,Miscellaneous,1304,39
3,Kinase,1104,13
4,Genotoxin,944,22
5,Hsp90,552,3
6,Redox,312,12
7,Saponin,288,11
8,HDAC,168,5
9,Mitochondria,144,4


In [5]:
dfi.export(
    injury_summary_after_holdout_df,
    str(fig_dir / "stable_B_injury_summary_after_holdout.png"),
)
injury_summary_after_holdout_df

[0524/075020.274957:INFO:config_dir_policy_loader.cc(118)] Skipping mandatory platform policies because no policy file was found at: /etc/opt/chrome/policies/managed
[0524/075020.275044:INFO:config_dir_policy_loader.cc(118)] Skipping recommended platform policies because no policy file was found at: /etc/opt/chrome/policies/recommended
45851 bytes written to file /tmp/tmpc4a9kamx/temp.png


Unnamed: 0,injury_type,n_wells,n_compounds
0,Control,8408,1
1,Cytoskeletal,1102,14
2,Miscellaneous,1007,38
3,Kinase,750,12
4,Genotoxin,737,21
5,Hsp90,418,3
6,Redox,215,11
7,Saponin,164,10
8,HDAC,138,5
9,Proteasome,117,4


## Display data summary from training splits  

In [6]:
dfi.export(
    injury_summary_train_split_df,
    str(fig_dir / "stable_C_injury_summary_train_split.png"),
)
injury_summary_train_split_df

[0524/075020.824644:INFO:config_dir_policy_loader.cc(118)] Skipping mandatory platform policies because no policy file was found at: /etc/opt/chrome/policies/managed
[0524/075020.824720:INFO:config_dir_policy_loader.cc(118)] Skipping recommended platform policies because no policy file was found at: /etc/opt/chrome/policies/recommended
46024 bytes written to file /tmp/tmpiynggbra/temp.png


Unnamed: 0,injury_type,n_wells,n_compounds
0,Control,6726,1
1,Cytoskeletal,881,14
2,Miscellaneous,806,38
3,Kinase,600,12
4,Genotoxin,590,21
5,Hsp90,334,3
6,Redox,172,11
7,Saponin,131,10
8,HDAC,110,5
9,Proteasome,94,4


In [7]:
dfi.export(
    injury_summary_test_split_df,
    str(fig_dir / "stable_D_injury_summary_test_split.png"),
)
injury_summary_test_split_df

[0524/075021.415509:INFO:config_dir_policy_loader.cc(118)] Skipping mandatory platform policies because no policy file was found at: /etc/opt/chrome/policies/managed
[0524/075021.415576:INFO:config_dir_policy_loader.cc(118)] Skipping recommended platform policies because no policy file was found at: /etc/opt/chrome/policies/recommended
44478 bytes written to file /tmp/tmp8w51bw17/temp.png


Unnamed: 0,injury_type,n_wells,n_compounds
0,Control,1682,1
1,Cytoskeletal,221,14
2,Miscellaneous,201,37
3,Kinase,150,12
4,Genotoxin,147,21
5,Hsp90,84,3
6,Redox,43,11
7,Saponin,33,8
8,HDAC,28,5
9,Mitochondria,23,4
