# Perform Two-Sample KS-Test on NF1 Genotype Samples

## Import libraries

In [1]:
import numpy as np
import pandas as pd
import pathlib

import kstest_utils as ks_utils

## Set seed

In [2]:
np.random.seed(0)

## Set paths

In [3]:
# directory with CellProfiler pipeline features
input_dir = pathlib.Path("../../../4_processing_features/data/Plate2/CellProfiler/")
# path to normalized features
norm_data = pathlib.Path(f"{input_dir}/nf1_sc_norm_cellprofileric_cellpose.csv.gz")
# save path for kstest result
save_path = pathlib.Path("data/Plate2/nf1_cellprofileric_cellpose_kstest_results.csv")

## Load in NF1 data

In [4]:
data = pd.read_csv(norm_data, compression="gzip", index_col=0)

print(data.shape)
data.head()

(1757, 1208)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_number_of_singlecells,Metadata_gene_name,Metadata_genotype,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,...,Nuclei_Texture_Variance_DAPI_3_02_256,Nuclei_Texture_Variance_DAPI_3_03_256,Nuclei_Texture_Variance_GFP_3_00_256,Nuclei_Texture_Variance_GFP_3_01_256,Nuclei_Texture_Variance_GFP_3_02_256,Nuclei_Texture_Variance_GFP_3_03_256,Nuclei_Texture_Variance_RFP_3_00_256,Nuclei_Texture_Variance_RFP_3_01_256,Nuclei_Texture_Variance_RFP_3_02_256,Nuclei_Texture_Variance_RFP_3_03_256
0,A,1,54,NF1,WT,8,1,A1,1,5,...,-0.144344,-0.138047,0.073091,0.034282,0.030681,0.009522,-0.233773,-0.222703,-0.232712,-0.231201
1,A,1,54,NF1,WT,8,1,A1,2,8,...,-0.5786,-0.57414,-0.282463,-0.263345,-0.30744,-0.298484,-0.305637,-0.308507,-0.308217,-0.298329
2,A,1,54,NF1,WT,8,1,A1,3,7,...,-0.281587,-0.258892,-0.409919,-0.399022,-0.390164,-0.407884,-0.3653,-0.366681,-0.372685,-0.368402
3,A,1,54,NF1,WT,8,1,A1,4,9,...,-0.640216,-0.648688,-0.685768,-0.689027,-0.701257,-0.691572,-0.465312,-0.463033,-0.470317,-0.464007
4,A,1,54,NF1,WT,8,1,A1,5,11,...,-0.453436,-0.440061,-0.085631,-0.044371,-0.069082,-0.09385,-0.143716,-0.133815,-0.140179,-0.131277


## Perform two sample KS-test

In [5]:
feature_results = ks_utils.nf1_ks_test_two_sample(data)
feature_results

Unnamed: 0,statistic,pvalue,statistic_location,statistic_sign
0,0.180732,1.197950e-12,0.547073,1
1,0.427561,1.831993e-70,0.200427,-1
2,0.357012,8.893414e-49,-0.077152,-1
3,0.087608,2.654036e-03,-0.926146,-1
4,0.061400,7.618998e-02,-1.216991,-1
...,...,...,...,...
1191,0.358144,4.320249e-49,-0.444785,1
1192,0.172069,1.674882e-11,-0.137798,1
1193,0.171823,1.800721e-11,-0.177703,1
1194,0.175868,5.346385e-12,-0.183398,1


In [6]:
# find feature names in the columns from the data
feature_names = [
        col_name
        for col_name in data.columns.tolist()
        if "Metadata" not in col_name
    ]

feature_names = pd.DataFrame(feature_names)
feature_names.columns = ["Features"]

feature_names

Unnamed: 0,Features
0,Cytoplasm_Number_Object_Number
1,Cytoplasm_AreaShape_Area
2,Cytoplasm_AreaShape_BoundingBoxArea
3,Cytoplasm_AreaShape_BoundingBoxMaximum_X
4,Cytoplasm_AreaShape_BoundingBoxMaximum_Y
...,...
1191,Nuclei_Texture_Variance_GFP_3_03_256
1192,Nuclei_Texture_Variance_RFP_3_00_256
1193,Nuclei_Texture_Variance_RFP_3_01_256
1194,Nuclei_Texture_Variance_RFP_3_02_256


## Save the final `csv` file with merged features and results

In [7]:
ks_utils.merge_features_kstest(feature_results, feature_names, save_path)

Unnamed: 0,Features,statistic,pvalue,statistic_location,statistic_sign
0,Cytoplasm_Number_Object_Number,0.180732,1.197950e-12,0.547073,1
1,Cytoplasm_AreaShape_Area,0.427561,1.831993e-70,0.200427,-1
2,Cytoplasm_AreaShape_BoundingBoxArea,0.357012,8.893414e-49,-0.077152,-1
3,Cytoplasm_AreaShape_BoundingBoxMaximum_X,0.087608,2.654036e-03,-0.926146,-1
4,Cytoplasm_AreaShape_BoundingBoxMaximum_Y,0.061400,7.618998e-02,-1.216991,-1
...,...,...,...,...,...
1191,Nuclei_Texture_Variance_GFP_3_03_256,0.358144,4.320249e-49,-0.444785,1
1192,Nuclei_Texture_Variance_RFP_3_00_256,0.172069,1.674882e-11,-0.137798,1
1193,Nuclei_Texture_Variance_RFP_3_01_256,0.171823,1.800721e-11,-0.177703,1
1194,Nuclei_Texture_Variance_RFP_3_02_256,0.175868,5.346385e-12,-0.183398,1
