# Combine runs run parquet files into one

## Import libraries

In [1]:
import pandas as pd
import pathlib

from pycytominer.cyto_utils import output

## Set paths

In [2]:
# set path to parquet directory with annotated runs
annotated_dir = pathlib.Path("./data/annotated_data")

# directory where the combined parquet file is saved to
output_dir = pathlib.Path("./data/combined_data")
output_dir.mkdir(exist_ok=True)

# set path for combined run parquet file
merged_runs_path = pathlib.Path(f"{output_dir}/SHSY5Y_sc.parquet")

In [3]:
# set paths to each individual run file after annotation
first_run_sc_path = pathlib.Path(f"{annotated_dir}/SHSY5Y_batch_1_sc.parquet")
second_run_sc_path = pathlib.Path(f"{annotated_dir}/SHSY5Y_batch_2_sc.parquet")

## Combine the parquet files into one

In [4]:
# read parquet files into pandas dataframes
first_run_sc = pd.read_parquet(first_run_sc_path)
second_run_sc = pd.read_parquet(second_run_sc_path)

# concatenate dataframes and save as parquet file
SHSY5Y_run_sc = pd.concat(
    [
        first_run_sc,
        second_run_sc,
    ],
    ignore_index=True,
)
output(
    df=SHSY5Y_run_sc,
    output_filename=merged_runs_path,
    output_type="parquet",
)
print(f"The runs have been merged into one file called {merged_runs_path.name}!")

The runs have been merged into one file called SHSY5Y_sc.parquet!


In [5]:
# check to see if the merge function worked (should be approximately 600,000 rows)
print(SHSY5Y_run_sc.shape)
SHSY5Y_run_sc.head()

(597902, 2927)


Unnamed: 0,Metadata_cell_type,Metadata_Well,Metadata_number_of_singlecells,Metadata_Site,Metadata_incubation inducer (h),Metadata_inhibitor,Metadata_inhibitor_concentration,Metadata_inhibitor_concentration_unit,Metadata_inducer1,Metadata_inducer1_concentration,...,Nuclei_Texture_Variance_CorrGasdermin_3_02_256,Nuclei_Texture_Variance_CorrGasdermin_3_03_256,Nuclei_Texture_Variance_CorrMito_3_00_256,Nuclei_Texture_Variance_CorrMito_3_01_256,Nuclei_Texture_Variance_CorrMito_3_02_256,Nuclei_Texture_Variance_CorrMito_3_03_256,Nuclei_Texture_Variance_CorrPM_3_00_256,Nuclei_Texture_Variance_CorrPM_3_01_256,Nuclei_Texture_Variance_CorrPM_3_02_256,Nuclei_Texture_Variance_CorrPM_3_03_256
0,SH-SY5Y,B13,3765,1,6,Media ctr,,,media ctr,,...,5.562025,4.98208,9.650958,9.534828,10.401584,9.631431,2.530134,2.475556,2.686235,2.645955
1,SH-SY5Y,B13,3765,1,6,Media ctr,,,media ctr,,...,2.089223,2.160826,37.217708,32.460108,34.302228,34.505932,1.553695,1.473267,1.461804,1.450196
2,SH-SY5Y,B13,3765,1,6,Media ctr,,,media ctr,,...,1.629784,1.400294,3.189469,3.525086,3.42255,2.919814,1.638686,1.847693,1.771707,1.496571
3,SH-SY5Y,B13,3765,1,6,Media ctr,,,media ctr,,...,1.099983,1.055338,1.459082,1.430853,1.583214,1.526601,0.862926,0.842006,0.897397,0.855124
4,SH-SY5Y,B13,3765,1,6,Media ctr,,,media ctr,,...,1.303876,1.19548,3.613733,3.474545,3.494755,3.221265,0.430443,0.448364,0.419315,0.401586
