In [1]:
import sys
import pathlib
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
import plotly.express as px
from pycytominer.cyto_utils import infer_cp_features
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
import pdfkit
import pyarrow as pa
import pyarrow.parquet as pq

# import Union
from typing import Union

sys.path.append("..")
# from ..utils.utils import df_stats
import matplotlib.pyplot as plt

In [2]:
# Define inputs
feature_file = pathlib.Path(
    "../../Extracted_Features_(CSV_files)/SHSY5Y_run_sc_norm.parquet"
)
feature_df = pq.read_table(feature_file).to_pandas()
# feature_df = pd.read_csv(feature_file, engine="pyarrow")

In [3]:
# define output file path
feature_df_out_path = pathlib.Path(
    "../../Extracted_Features_(CSV_files)/feature_df_sc_norm.parquet"
)

In [4]:
print(feature_df.shape)
feature_df

(597902, 2926)


Unnamed: 0,Metadata_cell_type,Metadata_Well,Metadata_number_of_singlecells,Metadata_incubation inducer (h),Metadata_inhibitor,Metadata_inhibitor_concentration,Metadata_inhibitor_concentration_unit,Metadata_inducer1,Metadata_inducer1_concentration,Metadata_inducer1_concentration_unit,...,Nuclei_Texture_Variance_CorrGasdermin_3_02_256,Nuclei_Texture_Variance_CorrGasdermin_3_03_256,Nuclei_Texture_Variance_CorrMito_3_00_256,Nuclei_Texture_Variance_CorrMito_3_01_256,Nuclei_Texture_Variance_CorrMito_3_02_256,Nuclei_Texture_Variance_CorrMito_3_03_256,Nuclei_Texture_Variance_CorrPM_3_00_256,Nuclei_Texture_Variance_CorrPM_3_01_256,Nuclei_Texture_Variance_CorrPM_3_02_256,Nuclei_Texture_Variance_CorrPM_3_03_256
0,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,0.011881,0.003817,0.353469,0.365618,0.378125,0.342100,0.099225,0.096054,0.110432,0.107764
1,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.044113,-0.042322,1.780397,1.603914,1.571446,1.582787,0.020683,0.016122,0.012990,0.013631
2,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.051521,-0.054760,0.019005,0.041005,0.029673,0.007339,0.027519,0.045982,0.037653,0.017281
3,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.058159,-0.059141,-0.005212,-0.010673,0.000258,0.003475,-0.002945,0.000293,0.008048,-0.002151
4,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.034966,-0.030255,-0.044078,-0.052547,-0.042483,-0.029738,0.016846,0.006499,0.014114,0.026206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
597897,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,-0.070301,-0.068560,-0.117209,-0.125985,-0.127684,-0.122355,-0.024370,-0.033577,-0.033811,-0.028148
597898,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,0.000763,-0.007537,-0.108656,-0.110885,-0.112064,-0.113660,-0.049855,-0.048752,-0.050176,-0.049417
597899,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,-0.049428,-0.050434,-0.088444,-0.089816,-0.092630,-0.092441,-0.027676,-0.024066,-0.026279,-0.027050
597900,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,0.078521,0.081602,0.098873,0.110850,0.109012,0.112935,0.308702,0.309290,0.311365,0.309627


In [5]:
# removing costes features as they behave with great variance across all data
feature_df = feature_df.drop(feature_df.filter(regex="Costes").columns, axis=1)
print(feature_df.shape)
feature_df

(597902, 2866)


Unnamed: 0,Metadata_cell_type,Metadata_Well,Metadata_number_of_singlecells,Metadata_incubation inducer (h),Metadata_inhibitor,Metadata_inhibitor_concentration,Metadata_inhibitor_concentration_unit,Metadata_inducer1,Metadata_inducer1_concentration,Metadata_inducer1_concentration_unit,...,Nuclei_Texture_Variance_CorrGasdermin_3_02_256,Nuclei_Texture_Variance_CorrGasdermin_3_03_256,Nuclei_Texture_Variance_CorrMito_3_00_256,Nuclei_Texture_Variance_CorrMito_3_01_256,Nuclei_Texture_Variance_CorrMito_3_02_256,Nuclei_Texture_Variance_CorrMito_3_03_256,Nuclei_Texture_Variance_CorrPM_3_00_256,Nuclei_Texture_Variance_CorrPM_3_01_256,Nuclei_Texture_Variance_CorrPM_3_02_256,Nuclei_Texture_Variance_CorrPM_3_03_256
0,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,0.011881,0.003817,0.353469,0.365618,0.378125,0.342100,0.099225,0.096054,0.110432,0.107764
1,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.044113,-0.042322,1.780397,1.603914,1.571446,1.582787,0.020683,0.016122,0.012990,0.013631
2,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.051521,-0.054760,0.019005,0.041005,0.029673,0.007339,0.027519,0.045982,0.037653,0.017281
3,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.058159,-0.059141,-0.005212,-0.010673,0.000258,0.003475,-0.002945,0.000293,0.008048,-0.002151
4,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.034966,-0.030255,-0.044078,-0.052547,-0.042483,-0.029738,0.016846,0.006499,0.014114,0.026206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
597897,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,-0.070301,-0.068560,-0.117209,-0.125985,-0.127684,-0.122355,-0.024370,-0.033577,-0.033811,-0.028148
597898,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,0.000763,-0.007537,-0.108656,-0.110885,-0.112064,-0.113660,-0.049855,-0.048752,-0.050176,-0.049417
597899,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,-0.049428,-0.050434,-0.088444,-0.089816,-0.092630,-0.092441,-0.027676,-0.024066,-0.026279,-0.027050
597900,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,0.078521,0.081602,0.098873,0.110850,0.109012,0.112935,0.308702,0.309290,0.311365,0.309627


In [6]:
# replacing '/' in treatment dosage column to avoid errors in file interpolation including such strings
feature_df = feature_df.replace(to_replace="/", value="_per_", regex=True)
feature_df

Unnamed: 0,Metadata_cell_type,Metadata_Well,Metadata_number_of_singlecells,Metadata_incubation inducer (h),Metadata_inhibitor,Metadata_inhibitor_concentration,Metadata_inhibitor_concentration_unit,Metadata_inducer1,Metadata_inducer1_concentration,Metadata_inducer1_concentration_unit,...,Nuclei_Texture_Variance_CorrGasdermin_3_02_256,Nuclei_Texture_Variance_CorrGasdermin_3_03_256,Nuclei_Texture_Variance_CorrMito_3_00_256,Nuclei_Texture_Variance_CorrMito_3_01_256,Nuclei_Texture_Variance_CorrMito_3_02_256,Nuclei_Texture_Variance_CorrMito_3_03_256,Nuclei_Texture_Variance_CorrPM_3_00_256,Nuclei_Texture_Variance_CorrPM_3_01_256,Nuclei_Texture_Variance_CorrPM_3_02_256,Nuclei_Texture_Variance_CorrPM_3_03_256
0,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,0.011881,0.003817,0.353469,0.365618,0.378125,0.342100,0.099225,0.096054,0.110432,0.107764
1,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.044113,-0.042322,1.780397,1.603914,1.571446,1.582787,0.020683,0.016122,0.012990,0.013631
2,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.051521,-0.054760,0.019005,0.041005,0.029673,0.007339,0.027519,0.045982,0.037653,0.017281
3,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.058159,-0.059141,-0.005212,-0.010673,0.000258,0.003475,-0.002945,0.000293,0.008048,-0.002151
4,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.034966,-0.030255,-0.044078,-0.052547,-0.042483,-0.029738,0.016846,0.006499,0.014114,0.026206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
597897,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,-0.070301,-0.068560,-0.117209,-0.125985,-0.127684,-0.122355,-0.024370,-0.033577,-0.033811,-0.028148
597898,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,0.000763,-0.007537,-0.108656,-0.110885,-0.112064,-0.113660,-0.049855,-0.048752,-0.050176,-0.049417
597899,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,-0.049428,-0.050434,-0.088444,-0.089816,-0.092630,-0.092441,-0.027676,-0.024066,-0.026279,-0.027050
597900,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,0.078521,0.081602,0.098873,0.110850,0.109012,0.112935,0.308702,0.309290,0.311365,0.309627


In [7]:
# Recycled code from: https://github.com/WayScience/NF1_SchwannCell_data/blob/main/5_analyze_data/notebooks/linear_model/fit_linear_model.ipynb
cell_count_df = (
    feature_df.groupby("Metadata_Well")["Metadata_Plate"]
    .count()
    .reset_index()
    .rename(columns={"Metadata_Plate": "Metadata_number_of_singlecells"})
)

In [8]:
feature_df

Unnamed: 0,Metadata_cell_type,Metadata_Well,Metadata_number_of_singlecells,Metadata_incubation inducer (h),Metadata_inhibitor,Metadata_inhibitor_concentration,Metadata_inhibitor_concentration_unit,Metadata_inducer1,Metadata_inducer1_concentration,Metadata_inducer1_concentration_unit,...,Nuclei_Texture_Variance_CorrGasdermin_3_02_256,Nuclei_Texture_Variance_CorrGasdermin_3_03_256,Nuclei_Texture_Variance_CorrMito_3_00_256,Nuclei_Texture_Variance_CorrMito_3_01_256,Nuclei_Texture_Variance_CorrMito_3_02_256,Nuclei_Texture_Variance_CorrMito_3_03_256,Nuclei_Texture_Variance_CorrPM_3_00_256,Nuclei_Texture_Variance_CorrPM_3_01_256,Nuclei_Texture_Variance_CorrPM_3_02_256,Nuclei_Texture_Variance_CorrPM_3_03_256
0,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,0.011881,0.003817,0.353469,0.365618,0.378125,0.342100,0.099225,0.096054,0.110432,0.107764
1,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.044113,-0.042322,1.780397,1.603914,1.571446,1.582787,0.020683,0.016122,0.012990,0.013631
2,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.051521,-0.054760,0.019005,0.041005,0.029673,0.007339,0.027519,0.045982,0.037653,0.017281
3,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.058159,-0.059141,-0.005212,-0.010673,0.000258,0.003475,-0.002945,0.000293,0.008048,-0.002151
4,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.034966,-0.030255,-0.044078,-0.052547,-0.042483,-0.029738,0.016846,0.006499,0.014114,0.026206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
597897,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,-0.070301,-0.068560,-0.117209,-0.125985,-0.127684,-0.122355,-0.024370,-0.033577,-0.033811,-0.028148
597898,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,0.000763,-0.007537,-0.108656,-0.110885,-0.112064,-0.113660,-0.049855,-0.048752,-0.050176,-0.049417
597899,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,-0.049428,-0.050434,-0.088444,-0.089816,-0.092630,-0.092441,-0.027676,-0.024066,-0.026279,-0.027050
597900,SH-SY5Y,O23,3555,6,Media ctr,,,media ctr,,,...,0.078521,0.081602,0.098873,0.110850,0.109012,0.112935,0.308702,0.309290,0.311365,0.309627


In [9]:
# show max column in pandas df
pd.set_option("display.max_columns", 100)

In [10]:
# replace nan values with 0
feature_df["Metadata_inducer1_concentration"] = feature_df[
    "Metadata_inducer1_concentration"
].fillna(0)
feature_df["Metadata_inducer2_concentration"] = feature_df[
    "Metadata_inducer2_concentration"
].fillna(0)
feature_df["Metadata_inhibitor_concentration"] = feature_df[
    "Metadata_inhibitor_concentration"
].fillna(0)

In [11]:
feature_df["Metadata_inducer2"].unique()

array([None, 'Nigericin'], dtype=object)

In [12]:
feature_df

Unnamed: 0,Metadata_cell_type,Metadata_Well,Metadata_number_of_singlecells,Metadata_incubation inducer (h),Metadata_inhibitor,Metadata_inhibitor_concentration,Metadata_inhibitor_concentration_unit,Metadata_inducer1,Metadata_inducer1_concentration,Metadata_inducer1_concentration_unit,Metadata_inducer2,Metadata_inducer2_concentration,Metadata_inducer2_concentration_unit,Metadata_ImageNumber,Metadata_Plate,Metadata_Cells_Number_Object_Number,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Nuclei_Number_Object_Number,Cytoplasm_AreaShape_Area,Cytoplasm_AreaShape_BoundingBoxArea,Cytoplasm_AreaShape_BoundingBoxMaximum_X,Cytoplasm_AreaShape_BoundingBoxMaximum_Y,Cytoplasm_AreaShape_BoundingBoxMinimum_X,Cytoplasm_AreaShape_BoundingBoxMinimum_Y,Cytoplasm_AreaShape_Center_X,Cytoplasm_AreaShape_Center_Y,Cytoplasm_AreaShape_Compactness,Cytoplasm_AreaShape_ConvexArea,Cytoplasm_AreaShape_Eccentricity,Cytoplasm_AreaShape_EquivalentDiameter,Cytoplasm_AreaShape_EulerNumber,Cytoplasm_AreaShape_Extent,Cytoplasm_AreaShape_FormFactor,Cytoplasm_AreaShape_MajorAxisLength,Cytoplasm_AreaShape_MaxFeretDiameter,Cytoplasm_AreaShape_MaximumRadius,Cytoplasm_AreaShape_MeanRadius,Cytoplasm_AreaShape_MedianRadius,Cytoplasm_AreaShape_MinFeretDiameter,Cytoplasm_AreaShape_MinorAxisLength,Cytoplasm_AreaShape_Orientation,Cytoplasm_AreaShape_Perimeter,Cytoplasm_AreaShape_Solidity,Cytoplasm_AreaShape_Zernike_0_0,Cytoplasm_AreaShape_Zernike_1_1,Cytoplasm_AreaShape_Zernike_2_0,Cytoplasm_AreaShape_Zernike_2_2,Cytoplasm_AreaShape_Zernike_3_1,Cytoplasm_AreaShape_Zernike_3_3,...,Nuclei_Texture_SumEntropy_CorrGasdermin_3_02_256,Nuclei_Texture_SumEntropy_CorrGasdermin_3_03_256,Nuclei_Texture_SumEntropy_CorrMito_3_00_256,Nuclei_Texture_SumEntropy_CorrMito_3_01_256,Nuclei_Texture_SumEntropy_CorrMito_3_02_256,Nuclei_Texture_SumEntropy_CorrMito_3_03_256,Nuclei_Texture_SumEntropy_CorrPM_3_00_256,Nuclei_Texture_SumEntropy_CorrPM_3_01_256,Nuclei_Texture_SumEntropy_CorrPM_3_02_256,Nuclei_Texture_SumEntropy_CorrPM_3_03_256,Nuclei_Texture_SumVariance_CorrDNA_3_00_256,Nuclei_Texture_SumVariance_CorrDNA_3_01_256,Nuclei_Texture_SumVariance_CorrDNA_3_02_256,Nuclei_Texture_SumVariance_CorrDNA_3_03_256,Nuclei_Texture_SumVariance_CorrER_3_00_256,Nuclei_Texture_SumVariance_CorrER_3_01_256,Nuclei_Texture_SumVariance_CorrER_3_02_256,Nuclei_Texture_SumVariance_CorrER_3_03_256,Nuclei_Texture_SumVariance_CorrGasdermin_3_00_256,Nuclei_Texture_SumVariance_CorrGasdermin_3_01_256,Nuclei_Texture_SumVariance_CorrGasdermin_3_02_256,Nuclei_Texture_SumVariance_CorrGasdermin_3_03_256,Nuclei_Texture_SumVariance_CorrMito_3_00_256,Nuclei_Texture_SumVariance_CorrMito_3_01_256,Nuclei_Texture_SumVariance_CorrMito_3_02_256,Nuclei_Texture_SumVariance_CorrMito_3_03_256,Nuclei_Texture_SumVariance_CorrPM_3_00_256,Nuclei_Texture_SumVariance_CorrPM_3_01_256,Nuclei_Texture_SumVariance_CorrPM_3_02_256,Nuclei_Texture_SumVariance_CorrPM_3_03_256,Nuclei_Texture_Variance_CorrDNA_3_00_256,Nuclei_Texture_Variance_CorrDNA_3_01_256,Nuclei_Texture_Variance_CorrDNA_3_02_256,Nuclei_Texture_Variance_CorrDNA_3_03_256,Nuclei_Texture_Variance_CorrER_3_00_256,Nuclei_Texture_Variance_CorrER_3_01_256,Nuclei_Texture_Variance_CorrER_3_02_256,Nuclei_Texture_Variance_CorrER_3_03_256,Nuclei_Texture_Variance_CorrGasdermin_3_00_256,Nuclei_Texture_Variance_CorrGasdermin_3_01_256,Nuclei_Texture_Variance_CorrGasdermin_3_02_256,Nuclei_Texture_Variance_CorrGasdermin_3_03_256,Nuclei_Texture_Variance_CorrMito_3_00_256,Nuclei_Texture_Variance_CorrMito_3_01_256,Nuclei_Texture_Variance_CorrMito_3_02_256,Nuclei_Texture_Variance_CorrMito_3_03_256,Nuclei_Texture_Variance_CorrPM_3_00_256,Nuclei_Texture_Variance_CorrPM_3_01_256,Nuclei_Texture_Variance_CorrPM_3_02_256,Nuclei_Texture_Variance_CorrPM_3_03_256
0,SH-SY5Y,B13,3765,6,Media ctr,0.0,,media ctr,0,,,0.0,,1,*70117_20230210MM1_Gasdermin514_CP_BC430856,1,1,3,3,-0.947035,-0.688884,1.399821,-1.742919,1.476026,-1.723436,1.432244,-1.725772,0.948831,-1.007165,1.115044,-1.231865,0.029569,-1.711744,-0.972326,-0.485847,-0.728180,-1.202883,-1.163814,-1.040024,-1.393217,-1.567537,-0.653935,-1.043596,-0.871740,-1.420223,-0.749731,-0.813838,-0.383000,-0.352649,0.143507,...,1.579814,1.533187,2.200225,2.126000,2.255644,2.198033,1.979142,1.840980,2.020992,1.984205,-0.035052,-0.057861,0.005679,0.002817,0.006575,0.015716,0.016641,0.004589,0.003825,0.011069,0.012475,0.001836,0.333028,0.346700,0.381177,0.347100,0.099663,0.090248,0.109975,0.120139,-0.016976,-0.021861,-0.000860,-0.006703,0.010342,0.017295,0.015778,0.006830,0.007037,0.012481,0.011881,0.003817,0.353469,0.365618,0.378125,0.342100,0.099225,0.096054,0.110432,0.107764
1,SH-SY5Y,B13,3765,6,Media ctr,0.0,,media ctr,0,,,0.0,,1,*70117_20230210MM1_Gasdermin514_CP_BC430856,2,2,4,4,-0.913518,-1.056311,0.933086,-1.785479,0.970419,-1.665695,0.942905,-1.721770,-0.085263,-1.008538,0.921843,-1.162893,0.029569,-0.135468,-0.234154,-0.705164,-1.012085,-0.936796,-0.877317,-0.906692,-1.392152,-1.411366,1.528873,-1.305414,-0.561745,-0.793221,-0.490967,-0.925769,0.926085,-1.261681,-0.851192,...,0.575092,0.607704,3.154776,3.067184,3.102794,3.081879,0.856316,0.885179,0.870666,0.831797,-0.022946,-0.012120,0.010344,-0.018577,-0.035799,-0.045468,-0.044845,-0.040345,-0.038451,-0.044510,-0.044607,-0.041363,1.731336,1.483850,1.533487,1.577185,0.023170,0.019382,0.014214,0.016084,-0.015379,-0.012790,0.002863,-0.013019,-0.036163,-0.043404,-0.044215,-0.041627,-0.038627,-0.043031,-0.044113,-0.042322,1.780397,1.603914,1.571446,1.582787,0.020683,0.016122,0.012990,0.013631
2,SH-SY5Y,B13,3765,6,Media ctr,0.0,,media ctr,0,,,0.0,,1,*70117_20230210MM1_Gasdermin514_CP_BC430856,3,3,5,5,-0.501773,-0.564248,0.686092,-1.737811,0.700534,-1.677583,0.697357,-1.714370,-0.566302,-0.700422,0.228820,-0.470830,0.029569,-0.006674,0.335354,-0.442794,-0.731056,-0.657008,-0.575687,-0.522960,-0.587704,-0.345945,1.162336,-0.731367,0.672216,0.371611,0.063930,-0.819243,1.313545,-1.785511,0.218615,...,0.066941,0.031085,0.437458,0.528542,0.463568,0.396539,0.696620,0.813264,0.750366,0.620113,-0.268905,-0.252446,-0.268663,-0.275261,-0.048933,-0.040401,-0.045701,-0.054093,-0.053070,-0.047236,-0.051049,-0.055976,0.008813,0.039253,0.030021,-0.008041,0.024382,0.049886,0.033607,0.004704,-0.269541,-0.261498,-0.269736,-0.269411,-0.048776,-0.042967,-0.046188,-0.051640,-0.053136,-0.049114,-0.051521,-0.054760,0.019005,0.041005,0.029673,0.007339,0.027519,0.045982,0.037653,0.017281
3,SH-SY5Y,B13,3765,6,Media ctr,0.0,,media ctr,0,,,0.0,,1,*70117_20230210MM1_Gasdermin514_CP_BC430856,4,4,6,6,-0.175166,-0.120636,-1.197879,-1.734407,-1.250151,-1.686074,-1.222694,-1.715213,-0.140817,-0.116804,0.614591,-0.038786,0.029569,-0.099833,-0.178647,0.265248,0.088385,-0.064736,-0.268405,-0.172749,-0.294475,-0.133465,1.716842,0.025087,-0.055551,-0.268212,-0.150536,-0.107099,0.214561,-0.592411,0.403406,...,0.268874,0.235097,0.647950,0.626441,0.710587,0.702333,0.515412,0.533001,0.598788,0.494720,-0.141250,-0.120914,-0.100235,-0.152087,-0.066991,-0.066562,-0.064684,-0.066001,-0.059516,-0.058998,-0.057855,-0.058864,-0.010493,-0.016394,0.000874,0.003669,-0.004126,-0.001737,0.005885,-0.003722,-0.130365,-0.120529,-0.104180,-0.135741,-0.067116,-0.067025,-0.065093,-0.066471,-0.059500,-0.059191,-0.058159,-0.059141,-0.005212,-0.010673,0.000258,0.003475,-0.002945,0.000293,0.008048,-0.002151
4,SH-SY5Y,B13,3765,6,Media ctr,0.0,,media ctr,0,,,0.0,,1,*70117_20230210MM1_Gasdermin514_CP_BC430856,5,5,7,7,-0.833762,-0.535665,1.720062,-1.725895,1.759576,-1.689471,1.733447,-1.704966,0.905852,-0.736984,0.433695,-1.008891,0.029569,-1.510952,-0.949705,-0.898175,-0.549749,-0.827239,-0.939033,-1.040024,-1.162478,-1.013888,1.060445,-0.741556,-1.218318,-1.219576,-0.397864,0.015001,-1.389833,0.106419,-1.216382,...,0.582163,0.638808,0.474624,0.420568,0.493785,0.542841,1.100476,1.022466,1.109403,1.221044,-0.281107,-0.259096,-0.277888,-0.286163,-0.036354,-0.042160,-0.036182,-0.028801,-0.035405,-0.039754,-0.035169,-0.029675,-0.045652,-0.055215,-0.041894,-0.027275,0.020176,0.012139,0.014474,0.026012,-0.281334,-0.268747,-0.279088,-0.279470,-0.035965,-0.041147,-0.036168,-0.029990,-0.034789,-0.038793,-0.034966,-0.030255,-0.044078,-0.052547,-0.042483,-0.029738,0.016846,0.006499,0.014114,0.026206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
597897,SH-SY5Y,O23,3555,6,Media ctr,0.0,,media ctr,0,,,0.0,,1246,*70117_20230210MM1_Gasdermin514_CP_BC430856,242,242,254,254,-0.062008,-0.523952,-0.759343,1.377191,-0.690124,1.358337,-0.732849,1.387336,-1.104636,-0.407934,0.262974,0.108057,0.027758,1.639502,1.696163,-0.323623,-0.254798,0.534942,0.534341,0.423450,-0.338981,-0.252085,0.213535,-0.598881,1.398633,0.793497,1.642893,0.294837,1.503638,0.155427,-1.100791,...,-0.321217,-0.247681,-1.084179,-1.139108,-1.088619,-1.075569,-0.238309,-0.244925,-0.254599,-0.196033,-0.317161,-0.318129,-0.327788,-0.311405,-0.072488,-0.075932,-0.075135,-0.071220,-0.067889,-0.071164,-0.070410,-0.067168,-0.112699,-0.121413,-0.125394,-0.117095,-0.022616,-0.035296,-0.035448,-0.027368,-0.319038,-0.315293,-0.323182,-0.313105,-0.072933,-0.074633,-0.074855,-0.072779,-0.068538,-0.070206,-0.070301,-0.068560,-0.117209,-0.125985,-0.127684,-0.122355,-0.024370,-0.033577,-0.033811,-0.028148
597898,SH-SY5Y,O23,3555,6,Media ctr,0.0,,media ctr,0,,,0.0,,1247,*70117_20230210MM1_Gasdermin514_CP_BC430856,146,146,159,159,0.120327,0.042647,0.528168,0.059438,0.501173,0.069177,0.509626,0.055531,-0.567248,0.079153,0.974595,0.307410,0.027758,0.400955,0.398058,0.627890,0.202900,0.133778,0.109954,0.131350,-0.086712,-0.460151,-1.101670,0.121373,0.457187,0.252413,-0.797719,0.182644,2.045721,-1.371684,-0.671827,...,1.317695,1.285343,-0.836951,-0.837538,-0.784916,-0.836163,-0.168117,-0.164723,-0.180013,-0.201412,-0.181238,-0.148837,-0.158525,-0.185238,-0.012716,-0.003699,-0.005347,-0.015725,-0.006316,0.003050,0.001651,-0.009862,-0.104498,-0.102764,-0.108054,-0.109865,-0.049415,-0.047082,-0.049425,-0.048455,-0.172663,-0.157357,-0.161094,-0.168201,-0.011726,-0.005788,-0.006666,-0.013906,-0.005016,0.001303,0.000763,-0.007537,-0.108656,-0.110885,-0.112064,-0.113660,-0.049855,-0.048752,-0.050176,-0.049417
597899,SH-SY5Y,O23,3555,6,Media ctr,0.0,,media ctr,0,,,0.0,,1247,*70117_20230210MM1_Gasdermin514_CP_BC430856,221,221,234,234,0.710568,0.798295,0.755476,0.806336,0.713299,0.745345,0.750819,0.797060,-0.362531,0.804987,-0.352303,0.877974,0.027758,0.280130,0.079732,0.507466,0.342725,0.805954,0.431581,0.392697,1.086639,1.346805,0.600319,0.979724,0.427871,0.888729,-0.772545,0.533274,0.317343,0.060171,0.488201,...,0.913287,0.849582,0.324691,0.298674,0.348806,0.300346,0.595559,0.684271,0.632351,0.517274,4.250827,4.268314,4.352385,4.405133,-0.056359,-0.053639,-0.054459,-0.056618,-0.051666,-0.049045,-0.049992,-0.052185,-0.087563,-0.087777,-0.091452,-0.091275,-0.029232,-0.023341,-0.025710,-0.030199,4.275382,4.284463,4.243751,4.230354,-0.055439,-0.052984,-0.053963,-0.054820,-0.050759,-0.048411,-0.049428,-0.050434,-0.088444,-0.089816,-0.092630,-0.092441,-0.027676,-0.024066,-0.026279,-0.027050
597900,SH-SY5Y,O23,3555,6,Media ctr,0.0,,media ctr,0,,,0.0,,1247,*70117_20230210MM1_Gasdermin514_CP_BC430856,288,288,302,302,-0.458549,-0.275185,-1.349664,1.503668,-1.397775,1.561529,-1.394310,1.521632,1.271973,-0.069709,0.938246,-0.383247,0.027758,-0.579257,-1.204379,0.567070,0.347625,-0.119109,-0.384470,-0.671560,-0.384827,-0.429767,1.482063,0.418873,-1.051889,-1.159900,-0.047890,-0.855161,-0.093183,-0.343085,0.654400,...,2.152903,2.248176,1.599785,1.635067,1.539933,1.603372,2.901715,2.906290,2.846213,2.912251,1.466348,1.471782,1.442949,1.480023,0.068465,0.072724,0.072036,0.073975,0.076572,0.081342,0.080445,0.082096,0.101730,0.117995,0.116520,0.122871,0.306754,0.305971,0.316947,0.317828,1.431741,1.429763,1.433085,1.436083,0.069005,0.070542,0.069986,0.073439,0.077174,0.079196,0.078521,0.081602,0.098873,0.110850,0.109012,0.112935,0.308702,0.309290,0.311365,0.309627


In [13]:
(
    feature_df["Metadata_inducer1"]
    + "_"
    + feature_df["Metadata_inducer1_concentration"].astype(str)
    + "_"
    + feature_df["Metadata_inhibitor"]
    + "_"
    + feature_df["Metadata_inhibitor_concentration"].astype(str)
    + "_"
    + feature_df["Metadata_inducer2"]
    + "_"
    + feature_df["Metadata_inducer2_concentration"].astype(str)
).unique()


(
    feature_df["Metadata_inducer1"]
    + "_"
    + feature_df["Metadata_inducer1_concentration"].astype(str)
    + "_"
    + feature_df["Metadata_inhibitor"]
    + "_"
    + feature_df["Metadata_inhibitor_concentration"].astype(str)
).unique()

array(['media ctr_0_Media ctr_0.0', 'DMSO_0.100_DMSO_1.0',
       'DMSO_0.100_Z-VAD-FMK_100.0', 'DMSO_0.100_Z-VAD-FMK_30.0',
       'DMSO_0.100_DMSO_0.025', 'Thapsigargin_1.000_DMSO_0.025',
       'Thapsigargin_10.000_DMSO_0.025', 'Topotecan_5.000_DMSO_0.025',
       'Topotecan_10.000_DMSO_0.025', 'Topotecan_20.000_DMSO_0.025',
       'LPS_0.010_DMSO_0.025', 'LPS_0.100_DMSO_0.025',
       'LPS_1.000_DMSO_0.025', 'LPS_10.000_DMSO_0.025',
       'LPS_10.000_Disulfiram_0.1', 'LPS_10.000_Disulfiram_1.0',
       'LPS_10.000_Disulfiram_2.5', 'LPS_100.000_DMSO_0.025',
       'Disulfiram_0.100_DMSO_0.025', 'Disulfiram_1.000_DMSO_0.025',
       'Disulfiram_2.500_DMSO_0.025', 'H2O2_100.000_DMSO_0.025',
       'LPS_10.000_Z-VAD-FMK_100.0', 'LPS_1.000_Disulfiram_1.0',
       'LPS_1.000_Z-VAD-FMK_100.0', 'H2O2_100.000_Disulfiram_1.0',
       'H2O2_100.000_Z-VAD-FMK_100.0', 'Flagellin_0.100_DMSO_0.025',
       'Flagellin_1.000_DMSO_0.025', 'Flagellin_1.000_Disulfiram_1.0'],
      dtype=object)

In [14]:
# feature_df = feature_df.merge(cell_count_df, on="Metadata_Well")

# Drop na and reindex accordingly
feature_df = feature_df.assign(
    Metadata_Treatment_and_Dose=lambda x: feature_df["Metadata_inducer1"]
    + "_"
    + feature_df["Metadata_inducer1_concentration"].astype(str)
)

We are testing 2847 CellProfiler features
The unique Treatment-Dosages are: media ctr_0, 
DMSO_0.100, 
Thapsigargin_1.000, 
Thapsigargin_10.000, 
Topotecan_5.000, 
Topotecan_10.000, 
Topotecan_20.000, 
LPS_0.010, 
LPS_0.100, 
LPS_1.000, 
LPS_10.000, 
LPS_100.000, 
Disulfiram_0.100, 
Disulfiram_1.000, 
Disulfiram_2.500, 
H2O2_100.000, 
Flagellin_0.100, 
Flagellin_1.000


In [20]:
feature_df["Metadata_inducer1_concentration"] = feature_df[
    "Metadata_inducer1_concentration"
].astype(float)
feature_df["Metadata_inducer1_concentration"].unique()

array([0.0e+00, 1.0e-01, 1.0e+00, 1.0e+01, 5.0e+00, 2.0e+01, 1.0e-02,
       1.0e+02, 2.5e+00])

In [21]:
# feature_df.to_csv(feature_df_out_path, index=False)
feature_df_table = pa.Table.from_pandas(feature_df)
pq.write_table(feature_df_table, feature_df_out_path)