# Read raw screen data from spreadsheets and reformat

In [1]:
import pandas
pandas.__version__

'1.0.3'

## Read FDA compounds

> The SCREENWELL FDA-approved drug library V2 containing 741
compounds was purchased from Enzo Life Sciences (Hayashi Kasei
Co., Ltd.), and the International Drug Collection (IDC) containing 311
compounds was purchased from MicroSource Discovery Systems, Inc.
(Namiki Shoji Co., Ltd.).

In [2]:
def read_fda_spreadsheet(concentration):
    concentration_to_cols = {"1 uM": "B:G", "10 uM": "J:O"}
    path = "data/tamai-screen/FDA1_FDA2_Period_24-120.xlsx"
    renamer = {
        f"{concentration} AVG": "Period After",
        "HITS/non-HITS": "compound_name"
    }
    df = (
        pandas.read_excel(path,
            sheet_name="FDA_1uM_10uM_DRUGS_GRAPHS",
            usecols=f"{concentration_to_cols[concentration]},T",
            mangle_dupe_cols=True,
        )
        .rename(columns=renamer)
        .assign(concentration = concentration)
    )
    # undo mangle_dupe_cols=True (remove ".1" suffix) 
    df.columns = df.columns.map(lambda x: x[:-2] if x.endswith(".1") else x)
    df.dropna(subset=["96-well"], inplace=True)
    return df


In [3]:
fda_df = pandas.concat([
    read_fda_spreadsheet("1 uM"),
    read_fda_spreadsheet("10 uM"),
]).assign(compound_set="fda")
fda_df

Unnamed: 0,96-well,Plate,384-well,Period,Period After,Period Change,compound_name,concentration,compound_set
0,1-A02,1,A4,21.5,21.633333,0.029167,Clindamycin·HCl,1 uM,fda
3,1-A03,1,A7,21.7,21.666667,0.062500,Felbamate,1 uM,fda
6,1-A04,1,A10,21.6,21.700000,0.095833,Cyclosporine A,1 uM,fda
9,1-A05,1,A13,21.4,21.433333,-0.170833,Donepezil·HCl,1 uM,fda
12,1-A06,1,A16,21.5,21.600000,-0.004167,Lincomycin·HCl,1 uM,fda
...,...,...,...,...,...,...,...,...,...
2445,11-B06,7,O13,22.1,22.133333,0.589583,Zaleplon,10 uM,fda
2448,10-H10,7,P4,21.5,21.533333,-0.010417,Blank,10 uM,fda
2451,10-H11,7,P7,21.6,21.600000,0.056250,Trientine Dihydrochloride,10 uM,fda
2454,11-A09,7,P10,21.3,21.433333,-0.110417,Valproate·Na,10 uM,fda


In [4]:
# path = "data/tamai-screen/FDA1_FDA2_Period_24-120.xlsx"
# fda_1uM_df = (
#     pandas.read_excel(path, sheet_name="FDA_1uM_10uM_DRUGS_GRAPHS", usecols="B:G,T")
#     .dropna(subset=["96-well"])
# )
# fda_1uM_df.head(2)