In [2]:
import pickle
import os
from pathlib import Path

import pandas as pd

In [3]:
# test well/cycle for merge step
TEST_WELL = "A1"
TEST_CYCLE = 1

### Copy metadata files

In [4]:
# copy phenotype metadata file
if not os.path.exists(
    "analysis_root/preprocess/metadata/phenotype/"
):
    os.makedirs(
        "analysis_root/preprocess/metadata/phenotype/"
    )
with open(f"/lab/barcheese01/screens/denali/metadata/20X_{TEST_WELL}.metadata.pkl", "rb") as f:
    ph_test_metadata = pickle.load(f)
    ph_test_metadata.rename(
        columns={
            "field_of_view": "tile",
            "x_data": "x_pos",
            "y_data": "y_pos",
            "z_pos": "z_pos",
        },
        inplace=True,
    )
    ph_test_metadata.to_csv(
        f"analysis_root/preprocess/metadata/phenotype/W{TEST_WELL}__combined_metadata.tsv",
        sep="\t",
        index=False,
    )

# copy sbs metadata file
if not os.path.exists(
    "analysis_root/preprocess/metadata/sbs/"
):
    os.makedirs(
        "analysis_root/preprocess/metadata/sbs/"
    )
with open(
    f"/lab/barcheese01/screens/denali/metadata/10X_c{TEST_CYCLE}-SBS-{TEST_CYCLE}_{TEST_WELL}.metadata.pkl", "rb"
) as f:
    sbs_test_metadata = pickle.load(f)
    sbs_test_metadata.rename(
        columns={
            "field_of_view": "tile",
            "x_data": "x_pos",
            "y_data": "y_pos",
            "z_data": "z_pos",
        },
        inplace=True,
    )
    sbs_test_metadata.to_csv(
        f"analysis_root/preprocess/metadata/sbs/W{TEST_WELL}_C{TEST_CYCLE}__combined_metadata.tsv",
        sep="\t",
        index=False,
    )

### make minimal info files

In [5]:
# anchor sites to copy over
ANCHOR_SITES = [(5, 0), (141, 32), (370, 86), (896, 212), (1163, 270), (1599, 376)]
# Extract phenotype and SBS tiles from initial sites
ph_tiles = [site[0] for site in ANCHOR_SITES]
sbs_tiles = [site[1] for site in ANCHOR_SITES]

In [6]:
if not os.path.exists(
    "analysis_root/phenotype_process/tsvs"
):
    os.makedirs(
        "analysis_root/phenotype_process/tsvs/"
    )
phenotype_info_dir = Path("/lab/barcheese01/screens/denali/process_ph/tables/")
for phenotype_info_fp in phenotype_info_dir.iterdir():
    # Convert to string for matching
    file_name = str(phenotype_info_fp.name)
    tile = int(file_name.split("Tile-")[1].split(".")[0])
    # Check if the file name contains the test well and any phenotype tile
    if "phenotype_info" in file_name and TEST_WELL in file_name and tile in ph_tiles:
        print(file_name)

        phenotype_info = pd.read_csv(phenotype_info_fp)
        phenotype_info.to_csv(f'analysis_root/phenotype_process/tsvs/W{TEST_WELL}_T{tile}__phenotype_info.tsv', sep="\t", index=False)

if not os.path.exists(
    "analysis_root/sbs_process/tsvs"
):
    os.makedirs(
        "analysis_root/sbs_process/tsvs/"
    )
sbs_info_dir = Path("/lab/barcheese01/screens/denali/process_sbs/tables/")
for sbs_info_fp in sbs_info_dir.iterdir():
    # Convert to string for matching
    file_name = str(sbs_info_fp.name)
    tile = int(file_name.split("Tile-")[1].split(".")[0])
    # Check if the file name contains the test well and any phenotype tile
    if "sbs_info" in file_name and TEST_WELL in file_name and tile in sbs_tiles:
        print(file_name)

        sbs_info = pd.read_csv(sbs_info_fp)
        sbs_info.to_csv(f'analysis_root/sbs_process/tsvs/W{TEST_WELL}_T{tile}__sbs_info.tsv', sep="\t", index=False)

20X_A1_Tile-1163.phenotype_info.csv
20X_A1_Tile-141.phenotype_info.csv
20X_A1_Tile-5.phenotype_info.csv
20X_A1_Tile-896.phenotype_info.csv
20X_A1_Tile-370.phenotype_info.csv
20X_A1_Tile-1599.phenotype_info.csv
10X_A1_Tile-86.sbs_info.csv
10X_A1_Tile-32.sbs_info.csv
10X_A1_Tile-212.sbs_info.csv
10X_A1_Tile-376.sbs_info.csv
10X_A1_Tile-270.sbs_info.csv
10X_A1_Tile-0.sbs_info.csv
