## Reading Single Cell profiles into the memory
- All the information about single cells are stored in a sqlite file for each plate
- sqlite files are huge (up to 50 GB) and loading them to memory may cause memory errors


#### Here are alternative ways of handling this issue:

- Reading All the Single Cells of a plate

- Reading random images or defind subset of the plate images 

- Reading a subset of wells from the plate 

- Reading a subset of features from the plate 

- Reading a subset of features and a subset of wells of a plate 
   
- Reading a subset of objects from a subset of wells plate    
  
  
** Timing Example **
* SQ00015195  :  11.55 GB

- Reading All the Single Cells of a plate

- Reading random images or defind subset of the plate images 

- Reading a subset of wells from the plate 

- Reading a subset of features from the plate 
   - One feature: 7 mins

- Reading a subset of features and a subset of wells of a plate 
   - One feature and one well: 0.6 mins
   
- Reading a subset of objects from a subset of wells plate    
  

In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd 
import time
import sys, os
# from utils import read_data, visualize_data
from utils.read_data import *
import pandas as pd
from sqlalchemy import create_engine
from functools import reduce
import time



In [2]:
# Example dataset:
#     drug rep
meta_lincs=pd.read_csv("/home/ubuntu/bucket/projects/2018_04_20_Rosetta/workspace/results/synth_meta/meta_lincs_repLevel.csv")
rootDirDrug='/home/ubuntu/bucket/projects/2015_10_05_DrugRepurposing_AravindSubramanian_GolubLab_Broad/workspace'
batchName='2016_04_01_a549_48hr_batch1'
p,wells="SQ00015195",["A13"]
fileName=rootDirDrug+"/backend/"+batchName+"/"+p+"/"+p+".sqlite"


###### Check file size

In [3]:
sqlFileSizGB=os.stat(fileName).st_size/10e8
print(p,' : ',sqlFileSizGB)

SQ00015195  :  11.553037312


## Reading All the Single Cells of a plate

## Reading random images or defind subset of the plate images 

In [None]:
df_p_s=readSingleCellData_sqlalch_random_image_subset(fileName,50);

## Reading a subset of wells from the plate

In [None]:
df_p_s=readSingleCellData_sqlalch_well_subset(fileName,wells);

## Reading a subset of objects from a subset of wells plate

In [None]:
df_p_s=readSingleCellData_sqlalch_wellAndObject_subset(fileName,wells,50);

## Reading a subset of features from the plate 

In [15]:
selected_features='Cells_Intensity_IntegratedIntensity_DNA'
df_p_s=readSingleCellData_sqlalch_features_subset(fileName,selected_features);


time elapsed: 7.294410037994385


## Reading a subset of features and a subset of wells of a plate 

In [6]:
selected_features='Cells_Intensity_IntegratedIntensity_DNA'
wells=["A13"]

p,wells="SQ00015199", ['P20']
fileName=rootDirDrug+"/backend/"+batchName+"/"+p+"/"+p+".sqlite"
df_p_s=readSingleCellData_sqlalch_FeatureAndWell_subset(fileName,selected_features,wells);

time elapsed: 5.4183234333992  mins


In [33]:
# df_p_s.columns.duplicated()

In [None]:
    sql_file="sqlite:////"+fileName
    engine = create_engine(sql_file)
    conn = engine.connect()
    compartments=["cells", "cytoplasm", "nuclei"]
    # compartments=["Neurites","CellBodies","CellBodiesPlusNeurites","Nuclei","Cytoplasm"]

#     rand_img_num=np.random.choice(range(1,4000), n_rand_ims)
#     rand_img_num=np.array(range(50))
#     list_str="("
#     for i in rand_img_num:
#         list_str=list_str+str(i)+',' 
#     list_str=list_str[:-1]+")"
    
#     rand_img_num=wells[:40]
    rand_img_num=wells.copy()
    list_str="('"
    for i in rand_img_num:
        list_str=list_str+str(i)+"','" 
    list_str=list_str[:-2]+")"

#     compartment_query = "select * from {}".format("Image")
#     compartment_query = "select * from {} WHERE {} IN {};".format("Image","ImageNumber",list_str)
#     compartment_query = "select * from {} WHERE {} IN {};".\
    compartment_query = "select * from {} WHERE {} IN {};".\
    format("Image","Image_Metadata_Well",list_str)
    start1 = time.time()
    plateImageDf= pd.read_sql(sql=compartment_query, con=conn);
#     print(plateImageDf.columns[plateImageDf.columns.str.contains("Metadata_")])
#     print(plateImageDf['Metadata_Well'].unique())
    end1 = time.time()
    print('time elapsed:',(end1 - start1)/60)
    img_nums=plateImageDf.ImageNumber.unique().tolist()
    print(plateImageDf.shape,img_nums)
    list_str2="("
    for i in img_nums:
        list_str2=list_str2+str(i)+',' 
    list_str2=list_str2[:-1]+")"
    start2 = time.time()
    plateDf_list=[]
    for compartment in compartments:
#         compartment_query = "select * from {}".format(compartment)
        compartment_query = "select * from {} WHERE {} IN {};".format(compartment,"ImageNumber",list_str2)
#         compartment_query = "select * from {} WHERE {} IN {};".format(compartment,"Metadata_Well",list_str)
        plateDf_list.append(pd.read_sql(sql=compartment_query, con=conn))

    plateDf = reduce(lambda left,right: pd.merge(left,right,on=["TableNumber", "ImageNumber", "ObjectNumber"]), plateDf_list)
    end = time.time()
    print('time elapsed:',(end - start2)/60)
    
#     print(plateDf.columns[plateDf.columns.str.contains("Metadata_")])
    plateDfwMeta = pd.merge(plateDf, plateImageDf, on=["TableNumber", "ImageNumber"])
    
    del plateDf
#     gc.collect()
    
    plateDfwMeta = plateDfwMeta.loc[:,~plateDfwMeta.columns.duplicated()]
#     print(plateDfwMeta.shape)
#     print(plateDfwMeta.Image_Metadata_ImageSizeX.values[0])
    plateDfwMeta=edgeCellFilter2(plateDfwMeta);  


In [13]:
meta_lincs_repLevel=pd.read_csv("/home/ubuntu/bucket/projects/2018_04_20_Rosetta/\
workspace/results/synth_meta/meta_lincs_repLevel.csv")

rootDirDrug='/home/ubuntu/bucket/projects/2015_10_05_DrugRepurposing_AravindSubramanian_GolubLab_Broad/workspace'
batchName='2016_04_01_a549_48hr_batch1'


selected_feature='Cells_Intensity_IntegratedIntensity_DNA'

pert_df=meta_lincs_repLevel[meta_lincs_repLevel['Metadata_pert_id_dose']=='BRD-K61567297_3.33'].reset_index(drop=True)
pert_df_allP0=[]
for j in range(pert_df.shape[0]):
    # p,wells="SQ00015195",["A13"]
    p,wells=pert_df.loc[j,"Metadata_Plate"],[pert_df.loc[j,"Metadata_Well"]]
    fileName=rootDirDrug+"/backend/"+batchName+"/"+p+"/"+p+".sqlite"

    print(p,wells)
    print("readSingleCellData_sqlalch_FeatureAndWell_subset")
    df_p_s=readSingleCellData_sqlalch_FeatureAndWell_subset(fileName,selected_feature,wells);
#     print("readSingleCellData_sqlalch_random_image_subset")
#     df_p_s=readSingleCellData_sqlalch_random_image_subset(fileName,50);
    print("readSingleCellData_sqlalch_well_subset")
    df_p_s=readSingleCellData_sqlalch_well_subset(fileName,wells);
#     print("readSingleCellData_sqlalch_wellAndObject_subset")
#     df_p_s=readSingleCellData_sqlalch_wellAndObject_subset(fileName,wells,50);

    pert_df_allP0.append(df_p_s)

pert_df_allP = pd.concat(pert_df_allP0)


# readSingleCellData_sqlalch_well_subset(fileName,wells):

SQ00015199 ['P20']
readSingleCellData_sqlalch_FeatureAndWell_subset
time elapsed: 5.229200692971547  mins
readSingleCellData_sqlalch_well_subset
time elapsed: 4.095426261425018
(9, 658) [3412, 3413, 3414, 3415, 3416, 3417, 3418, 3419, 3420]
time elapsed: 2.020168689886729
SQ00015134 ['P20']
readSingleCellData_sqlalch_FeatureAndWell_subset


KeyboardInterrupt: 

In [3]:
# selected_feature='Cells_RadialDistribution_MeanFrac_mito_tubeness_16of16'
#     selected_feature='Cells_Intensity_IntegratedIntensity_DNA'
# f2='Cells_Intensity_IntegratedIntensity_DNA'

sql_file="sqlite:////"+fileName
engine = create_engine(sql_file)
conn = engine.connect()

######## Query wells from Image table
ls_wells=wells.copy()
list_str="('"
for i in ls_wells:
    list_str=list_str+str(i)+"','" 
list_str=list_str[:-2]+")"

img_query = "select * from {} WHERE {} IN {};".\
format("Image","Image_Metadata_Well",list_str)

plateImageDf= pd.read_sql(sql=img_query, con=conn);
img_nums=plateImageDf.ImageNumber.unique().tolist()

list_str2="("
for i in img_nums:
    list_str2=list_str2+str(i)+',' 
list_str2=list_str2[:-1]+")"
###########################

In [7]:
selected_features='Cells_Intensity_IntegratedIntensity_DNA'
start1 = time.time()
compartments=selected_features.split("_")[0]
query_cols = "TableNumber, ImageNumber, "+selected_features#+", "+f2
compartment_query = "select {} from {} WHERE {} IN {};".format(query_cols,compartments,"ImageNumber",list_str2)
plateDf=pd.read_sql(sql=compartment_query, con=conn)

plateDfwMeta = pd.merge(plateDf, plateImageDf, on=["TableNumber", "ImageNumber"])

end1 = time.time()
print('time elapsed:',(end1 - start1)/60, " mins")

time elapsed: 0.07481953700383505  mins


In [27]:
meta_lincs.columns

Index(['alternative_moa', 'Metadata_Batch_Number', 'Metadata_pert_id_vendor',
       'Batch_Number', 'InChIKey14', 'moa', 'Metadata_pert_vehicle',
       'Metadata_broad_id', 'Metadata_mg_per_ml', 'broad_date', 'broad_id',
       'Metadata_cell_id', 'Metadata_alternative_moa',
       'Metadata_Assay_Plate_Barcode', 'Metadata_pert_well', 'plate_map_name',
       'Metadata_target', 'Metadata_Plate', 'Metadata_pert_type',
       'Metadata_mmoles_per_liter', 'Metadata_broad_sample_type', 'Batch_Date',
       'pert_iname', 'Metadata_moa', 'Metadata_alternative_target',
       'Metadata_broad_sample', 'Metadata_dose_recode', 'Metadata_Well',
       'Metadata_InChIKey14', 'clinical_phase', 'target', 'Metadata_pert_id',
       'Metadata_Batch_Date', 'Metadata_pert_id_dose', 'solvent',
       'alternative_target', 'Metadata_pert_mfc_id', 'Metadata_plate_map_name',
       'Metadata_Plate_Map_Name', 'Metadata_broad_date', 'Metadata_solvent'],
      dtype='object')

In [45]:
meta_lincs.groupby(["Metadata_broad_sample","Metadata_mmoles_per_liter"]).size()

Metadata_broad_sample   Metadata_mmoles_per_liter
BRD-A00147595-001-01-5  0.04                            4
                        0.12                            4
                        0.37                            4
                        1.11                            4
                        3.33                            4
                                                     ... 
BRD-M98279124-300-01-1  0.37                            5
                        1.11                            5
                        3.33                            5
                        10.00                           5
DMSO                    0.00                         3264
Length: 9411, dtype: int64

In [28]:
drug_list_rank

Unnamed: 0.1,Unnamed: 0,Metadata_broad_sample,Metadata_mmoles_per_liter,0,Metadata_moa,Metadata_pert_name,Count_Cells,phenotype_abundance_pval,phenotype_abundance_t
0,3802,BRD-K24576554-001-04-8,0.37,5,Aurora kinase inhibitor|JAK inhibitor,AT-9283,499,5.144916e-07,57.690956
1,1290,BRD-A61221616-001-04-3,0.04,5,progesterone receptor agonist,medroxyprogesterone-acetate,2006,3.914460e-06,30.592161
2,3801,BRD-K24576554-001-04-8,0.12,5,Aurora kinase inhibitor|JAK inhibitor,AT-9283,586,6.954569e-06,30.293511
3,2626,BRD-K07881437-001-03-8,0.37,5,Aurora kinase inhibitor|growth factor receptor...,danusertib,353,1.088438e-05,27.103758
4,3630,BRD-K21718444-001-06-8,3.33,5,Abl kinase inhibitor|Aurora kinase inhibitor|F...,KW-2449,578,1.205541e-05,26.441848
...,...,...,...,...,...,...,...,...,...
9390,9371,BRD-M39350793-334-01-1,0.12,1,adrenergic receptor antagonist,reboxetine,2410,,
9391,9372,BRD-M39350793-334-01-1,0.37,1,adrenergic receptor antagonist,reboxetine,2359,,
9392,9373,BRD-M39350793-334-01-1,1.11,1,adrenergic receptor antagonist,reboxetine,2856,,
9393,9374,BRD-M39350793-334-01-1,3.33,1,adrenergic receptor antagonist,reboxetine,2654,,


In [42]:
# pd.merge(drug_list_rank,meta_lincs,on=["Metadata_broad_sample"],how="inner")
X_Metadata_broad_sample,X_Metadata_mmoles_per_liter="BRD-K24576554-001-04-8", 0.37
meta_lincs[(meta_lincs["Metadata_broad_sample"]==X_Metadata_broad_sample) &\
           (meta_lincs["Metadata_mmoles_per_liter"]==X_Metadata_mmoles_per_liter)]

Unnamed: 0,alternative_moa,Metadata_Batch_Number,Metadata_pert_id_vendor,Batch_Number,InChIKey14,moa,Metadata_pert_vehicle,Metadata_broad_id,Metadata_mg_per_ml,broad_date,...,Metadata_pert_id,Metadata_Batch_Date,Metadata_pert_id_dose,solvent,alternative_target,Metadata_pert_mfc_id,Metadata_plate_map_name,Metadata_Plate_Map_Name,Metadata_broad_date,Metadata_solvent
31100,,1,,1.0,LOLPPWBBNUVNQZ,Aurora kinase inhibitor|JAK inhibitor,DMSO,BRD-K24576554,0.14127,broad_id_20170327,...,BRD-K24576554,2016-03-22,BRD-K24576554_0.37,DMSO,,BRD-K24576554-001-04-8,C-7161-01-LM6-006,C-7161-01-LM6-006,broad_id_20170327,DMSO
31484,,2,,2.0,LOLPPWBBNUVNQZ,Aurora kinase inhibitor|JAK inhibitor,DMSO,BRD-K24576554,0.14127,broad_id_20170327,...,BRD-K24576554,2016-04-05,BRD-K24576554_0.37,DMSO,,BRD-K24576554-001-04-8,C-7161-01-LM6-006,C-7161-01-LM6-006,broad_id_20170327,DMSO
31868,,2,,2.0,LOLPPWBBNUVNQZ,Aurora kinase inhibitor|JAK inhibitor,DMSO,BRD-K24576554,0.14127,broad_id_20170327,...,BRD-K24576554,2016-04-05,BRD-K24576554_0.37,DMSO,,BRD-K24576554-001-04-8,C-7161-01-LM6-006,C-7161-01-LM6-006,broad_id_20170327,DMSO
32252,,2,,2.0,LOLPPWBBNUVNQZ,Aurora kinase inhibitor|JAK inhibitor,DMSO,BRD-K24576554,0.14127,broad_id_20170327,...,BRD-K24576554,2016-04-05,BRD-K24576554_0.37,DMSO,,BRD-K24576554-001-04-8,C-7161-01-LM6-006,C-7161-01-LM6-006,broad_id_20170327,DMSO
32636,,2,,2.0,LOLPPWBBNUVNQZ,Aurora kinase inhibitor|JAK inhibitor,DMSO,BRD-K24576554,0.14127,broad_id_20170327,...,BRD-K24576554,2016-04-05,BRD-K24576554_0.37,DMSO,,BRD-K24576554-001-04-8,C-7161-01-LM6-006,C-7161-01-LM6-006,broad_id_20170327,DMSO


In [43]:
meta_lincs.columns

Index(['alternative_moa', 'Metadata_Batch_Number', 'Metadata_pert_id_vendor',
       'Batch_Number', 'InChIKey14', 'moa', 'Metadata_pert_vehicle',
       'Metadata_broad_id', 'Metadata_mg_per_ml', 'broad_date', 'broad_id',
       'Metadata_cell_id', 'Metadata_alternative_moa',
       'Metadata_Assay_Plate_Barcode', 'Metadata_pert_well', 'plate_map_name',
       'Metadata_target', 'Metadata_Plate', 'Metadata_pert_type',
       'Metadata_mmoles_per_liter', 'Metadata_broad_sample_type', 'Batch_Date',
       'pert_iname', 'Metadata_moa', 'Metadata_alternative_target',
       'Metadata_broad_sample', 'Metadata_dose_recode', 'Metadata_Well',
       'Metadata_InChIKey14', 'clinical_phase', 'target', 'Metadata_pert_id',
       'Metadata_Batch_Date', 'Metadata_pert_id_dose', 'solvent',
       'alternative_target', 'Metadata_pert_mfc_id', 'Metadata_plate_map_name',
       'Metadata_Plate_Map_Name', 'Metadata_broad_date', 'Metadata_solvent'],
      dtype='object')

Unnamed: 0,Metadata_broad_sample,Metadata_mmoles_per_liter,Metadata_Plate,Metadata_Well,0
0,BRD-A00147595-001-01-5,0.04,SQ00015196,B12,1
1,BRD-A00147595-001-01-5,0.04,SQ00015222,B12,1
2,BRD-A00147595-001-01-5,0.04,SQ00015223,B12,1
3,BRD-A00147595-001-01-5,0.04,SQ00015224,B12,1
4,BRD-A00147595-001-01-5,0.12,SQ00015196,B11,1
...,...,...,...,...,...
52218,DMSO,0.00,SQ00015233,J14,1
52219,DMSO,0.00,SQ00015233,J15,1
52220,DMSO,0.00,SQ00015233,J16,1
52221,DMSO,0.00,SQ00015233,J17,1


In [54]:
# drug_list_rank.loc[0,['Metadata_broad_sample','Metadata_mmoles_per_liter']].values

In [56]:
X_Metadata_broad_sample,X_Metadata_mmoles_per_liter=drug_list_rank.loc[1,['Metadata_broad_sample','Metadata_mmoles_per_liter']].values

In [64]:
# X_Metadata_mmoles_per_liter
meta_lincs2

Unnamed: 0,Metadata_broad_sample,Metadata_mmoles_per_liter,Metadata_Plate,Metadata_Well,0
0,BRD-A00147595-001-01-5,0.04,SQ00015196,B12,1
1,BRD-A00147595-001-01-5,0.04,SQ00015222,B12,1
2,BRD-A00147595-001-01-5,0.04,SQ00015223,B12,1
3,BRD-A00147595-001-01-5,0.04,SQ00015224,B12,1
4,BRD-A00147595-001-01-5,0.12,SQ00015196,B11,1
...,...,...,...,...,...
52218,DMSO,0.00,SQ00015233,J14,1
52219,DMSO,0.00,SQ00015233,J15,1
52220,DMSO,0.00,SQ00015233,J16,1
52221,DMSO,0.00,SQ00015233,J17,1


In [66]:
meta_lincs2[(meta_lincs2["Metadata_broad_sample"]==X_Metadata_broad_sample) &\
           (meta_lincs2["Metadata_mmoles_per_liter"]==X_Metadata_mmoles_per_liter)]

Unnamed: 0,Metadata_broad_sample,Metadata_mmoles_per_liter,Metadata_Plate,Metadata_Well,0
18513,BRD-K24576554-001-04-8,0.37,SQ00015130,P22,1
18514,BRD-K24576554-001-04-8,0.37,SQ00015165,P22,1
18515,BRD-K24576554-001-04-8,0.37,SQ00015166,P22,1
18516,BRD-K24576554-001-04-8,0.37,SQ00015167,P22,1
18517,BRD-K24576554-001-04-8,0.37,SQ00015168,P22,1


In [None]:
import pickle
# from funcs.utils import readSingleCellData_sqlalch_well_subset,readSingleCellData_sqlalch_random_image_subset

drug_list_rank=pd.read_excel("/home/ubuntu/bucket/projects/2016_08_01_RadialMitochondriaDistribution_donna/\
workspace/Metadata_drugRep/drugList_20210115_uncorrForSiteAgg.xlsx")
drug_list_rank=drug_list_rank[~drug_list_rank["phenotype_abundance_pval"].isnull()]

drug_list_rank=drug_list_rank.sort_values(by=['phenotype_abundance_t'],ascending=1)

meta_lincs=pd.read_csv("/home/ubuntu/bucket/projects/2018_04_20_Rosetta/\
workspace/results/synth_meta/meta_lincs_repLevel.csv")
meta_lincs.Metadata_mmoles_per_liter=meta_lincs.Metadata_mmoles_per_liter.values.round(2)
meta_lincs2=meta_lincs.groupby(['Metadata_broad_sample','Metadata_mmoles_per_liter','Metadata_Plate','Metadata_Well']).size().reset_index()


rootDirDrug='/home/ubuntu/bucket/projects/2015_10_05_DrugRepurposing_AravindSubramanian_GolubLab_Broad/workspace'
batchName='2016_04_01_a549_48hr_batch1_Mito_Project'

# cp_features=mergProf_treatLevel_lincs.columns[mergProf_treatLevel_lincs.columns.str.contains("Cells_|Cytoplasm_|Nuclei_")].tolist()
              
# pert_plate_well=mergProf_treatLevel_lincs.groupby(['PERT','Metadata_Plate','Metadata_Well']).size().reset_index()
# PERTS=pert_plate_well.PERT.unique().tolist()



for row in range(drug_list_rank.shape[0]):#[1140:]:
    X_Metadata_broad_sample,X_Metadata_mmoles_per_liter=\
    drug_list_rank.loc[row,['Metadata_broad_sample','Metadata_mmoles_per_liter']].values;
    
    pert=X_Metadata_broad_sample+"_"+str(X_Metadata_mmoles_per_liter)
    print(pert)
    pert_df=meta_lincs2[(meta_lincs2["Metadata_broad_sample"]==X_Metadata_broad_sample) &\
           (meta_lincs2["Metadata_mmoles_per_liter"]==X_Metadata_mmoles_per_liter)].reset_index(drop=True)
    start_time = time.time()
    
    pert_df_allP0=[]
    for j in range(pert_df.shape[0]):
        # p,wells="SQ00015195",["A13"]
        p,wells=pert_df.loc[j,"Metadata_Plate"],[pert_df.loc[j,"Metadata_Well"]]
        fileName=rootDirDrug+"/backend/"+batchName+"/"+p+"/"+p+".sqlite"
        
        print(p,wells)
        # df_p_s=readSingleCellData_sqlalch_random_image_subset(fileName,50);
        df_p_s=readSingleCellData_sqlalch_well_subset(fileName,wells);
        pert_df_allP0.append(df_p_s)
    
    pert_df_allP = pd.concat(pert_df_allP0)
        
    perWellData={}
    perWellData['cp_ss']=pert_df_allP

    a_file = open("/home/ubuntu/bucket/projects/2016_08_01_RadialMitochondriaDistribution_donna/workspace/drugSCprofiles/"+pert+".pkl", "wb")
    pickle.dump(perWellData, a_file)
    a_file.close()
#     perWellData.to_pickle('/home/ubuntu/bucket/projects/2018_04_20_Rosetta/workspace/synth_l1k_ssCP_meta/'+pert);
    print("--- %s minutes ---" % ((time.time() - start_time)/60))

BRD-K24576554-001-04-8_0.37
SQ00015130 ['P22']
time elapsed: 3.3348270654678345
(9, 165) [3430, 3431, 3432, 3433, 3434, 3435, 3436, 3437, 3438]
time elapsed: 11.871920867760975
SQ00015165 ['P22']
time elapsed: 11.769270992279052
SQ00015166 ['P22']
time elapsed: 3.4306668639183044
(9, 165) [3430, 3431, 3432, 3433, 3434, 3435, 3436, 3437, 3438]
time elapsed: 11.729095792770385
SQ00015167 ['P22']
time elapsed: 3.4790203094482424
(9, 165) [3430, 3431, 3432, 3433, 3434, 3435, 3436, 3437, 3438]
time elapsed: 12.062717000643412
SQ00015168 ['P22']
time elapsed: 3.4862897713979084
(9, 165) [3430, 3431, 3432, 3433, 3434, 3435, 3436, 3437, 3438]
time elapsed: 11.219723665714264
--- 76.20144403378168 minutes ---
BRD-A61221616-001-04-3_0.04
SQ00015134 ['P12']
time elapsed: 3.070054284731547
(9, 165) [3340, 3341, 3342, 3343, 3344, 3345, 3346, 3347, 3348]
time elapsed: 11.92899509270986
SQ00015135 ['P12']
time elapsed: 3.3087177713712057
(9, 165) [3340, 3341, 3342, 3343, 3344, 3345, 3346, 3347, 3348]

In [70]:
row

24

In [73]:
0.8
# https://imaging-platform.s3.us-east-1.amazonaws.com/projects/2016_08_01_RadialMitochondriaDistribution_donna/workspace/Metadata_drugRep/drugList_20210115_uncorrForSiteAgg.xlsx



Unnamed: 0.1,Unnamed: 0,Metadata_broad_sample,Metadata_mmoles_per_liter,0,Metadata_moa,Metadata_pert_name,Count_Cells,phenotype_abundance_pval,phenotype_abundance_t
0,3802,BRD-K24576554-001-04-8,0.37,5,Aurora kinase inhibitor|JAK inhibitor,AT-9283,499,5.144916e-07,57.690956
1,1290,BRD-A61221616-001-04-3,0.04,5,progesterone receptor agonist,medroxyprogesterone-acetate,2006,3.914460e-06,30.592161
2,3801,BRD-K24576554-001-04-8,0.12,5,Aurora kinase inhibitor|JAK inhibitor,AT-9283,586,6.954569e-06,30.293511
3,2626,BRD-K07881437-001-03-8,0.37,5,Aurora kinase inhibitor|growth factor receptor...,danusertib,353,1.088438e-05,27.103758
4,3630,BRD-K21718444-001-06-8,3.33,5,Abl kinase inhibitor|Aurora kinase inhibitor|F...,KW-2449,578,1.205541e-05,26.441848
...,...,...,...,...,...,...,...,...,...
9057,6473,BRD-K61195623-001-01-4,3.33,5,tubulin inhibitor,combretastatin-A-4,292,1.104520e-05,-25.988986
9058,6474,BRD-K61195623-001-01-4,10.00,5,tubulin inhibitor,combretastatin-A-4,277,1.074911e-05,-26.143598
9059,5424,BRD-K47869605-001-32-0,0.12,5,microtubule inhibitor|tubulin inhibitor,podophyllotoxin,204,6.225714e-06,-28.687840
9060,6401,BRD-K59875992-001-01-8,10.00,5,cyclooxygenase inhibitor|lipoxygenase inhibitor,FPL-62064,2504,3.841602e-06,-33.010802


In [76]:
drug_list_rank.sort_values(by=['phenotype_abundance_t'],ascending=1)

Unnamed: 0.1,Unnamed: 0,Metadata_broad_sample,Metadata_mmoles_per_liter,0,Metadata_moa,Metadata_pert_name,Count_Cells,phenotype_abundance_pval,phenotype_abundance_t
9061,6472,BRD-K61195623-001-01-4,1.11,5,tubulin inhibitor,combretastatin-A-4,312,3.348999e-07,-52.218304
9060,6401,BRD-K59875992-001-01-8,10.00,5,cyclooxygenase inhibitor|lipoxygenase inhibitor,FPL-62064,2504,3.841602e-06,-33.010802
9059,5424,BRD-K47869605-001-32-0,0.12,5,microtubule inhibitor|tubulin inhibitor,podophyllotoxin,204,6.225714e-06,-28.687840
9058,6474,BRD-K61195623-001-01-4,10.00,5,tubulin inhibitor,combretastatin-A-4,277,1.074911e-05,-26.143598
9057,6473,BRD-K61195623-001-01-4,3.33,5,tubulin inhibitor,combretastatin-A-4,292,1.104520e-05,-25.988986
...,...,...,...,...,...,...,...,...,...
9390,9371,BRD-M39350793-334-01-1,0.12,1,adrenergic receptor antagonist,reboxetine,2410,,
9391,9372,BRD-M39350793-334-01-1,0.37,1,adrenergic receptor antagonist,reboxetine,2359,,
9392,9373,BRD-M39350793-334-01-1,1.11,1,adrenergic receptor antagonist,reboxetine,2856,,
9393,9374,BRD-M39350793-334-01-1,3.33,1,adrenergic receptor antagonist,reboxetine,2654,,
