# COSMX EXTACT RUN CONFIG AND METADATA FOR SLIDE {{ COSMX_SLIDE_NAME }}
* **Notebook version:** v0.0.1
* **Created by:** NIHR Imperial BRC Genomics Facility
* **Maintained by:** NIHR Imperial BRC Genomics Facility
* **Docker image path:** [Dockerfile](https://github.com/imperial-genomics-facility/igf-dockerfiles/tree/main/cosmx/Dockerfile_v1)
* **Notebook code path:** [Templates](https://github.com/imperial-genomics-facility/igf-dockerfiles/tree/main/cosmx/)
* **Created on:** {{ DATE_TAG }}
* **Contact us:** [NIHR Imperial BRC Genomics Facility - Contact us](https://www.imperial.ac.uk/medicine/research-and-impact/facilities/genomics-facility/contact-us/)
* **License:** Apache [License 2.0](https://github.com/imperial-genomics-facility/igf-dockerfiles/blob/main/LICENSE)


* **Project name:** {{ COSMX_PROJECT_NAME }}
* **COSMX slide name:** {{ COSMX_SLIDE_NAME }}

In [None]:
import os
import json
from pathlib import Path
import pandas as pd
from typing import Dict, List

In [None]:
SLIDE_RAW_FILES_DIR = "{{ SLIDE_RAW_FILES_DIR }}"
SLIDE_FLAT_FILES_DIR = "{{ SLIDE_FLAT_FILES_DIR }}"
JSON_OUTPUT_PATH = "{{ JSON_OUTPUT_PATH }}"

In [None]:
def find_expt_config_file(slide_raw_files_dir: str) -> str:
    raw_slides_path = Path(slide_raw_files_dir)
    run_summary_dir = [
        raw_slides_path/dir_name/"RunSummary"
            for dir_name in os.listdir(raw_slides_path)
                if dir_name != "Logs"]
    if len(run_summary_dir) == 0:
        raise ValueError("No RunSummary path found")
    run_summary_dir = run_summary_dir[0]
    if not run_summary_dir.exists():
        raise IOError(f"RunSummary path {run_summary_dir} not found")
    expt_config_file = [
        file_name for file_name in run_summary_dir.glob("*ExptConfig.txt")]
    if len(expt_config_file) == 0:
        raise ValeError(f"No ExptConfig.txt file found in {run_summary_dir}")
    expt_config_file = expt_config_file[0]
    return expt_config_file.as_posix()

In [None]:
def read_expt_config_file(expt_config_file: str) -> Dict[str, str]:
    if not os.path.exists(expt_config_file):
        raise IOError(f"ExptConfig file {expt_config_file} not found")
    run_summary = dict()
    with open(expt_config_file, 'r') as fp:
        for line in fp:
            entries = line.strip().split(":")
            if len(entries) == 2:
                run_summary.update({entries[0]: entries[1].lstrip()})
            else:
                run_summary.update({entries[0]: ""})
    return run_summary

In [None]:
def get_slide_metadata_info(slide_flat_files_dir: str) -> Dict[str, str]:
    flat_files_path = Path(slide_flat_files_dir)
    metadata_file = [
        file_name for file_name in flat_files_path.glob("*metadata_file.csv.gz")]
    if len(metadata_file) == 0:
        raise IOError(f"Failed to fine metadata file in flatFiles dir {slide_flat_files_dir}")
    metadata_file = metadata_file[0]
    metadata_df = \
        pd.read_csv(
            metadata_file,
            compression="gzip",
            nrows=1)
    slide_metadata = \
        metadata_df[['assay_type', 'version', 'Run_Tissue_name', 'Panel']].\
        drop_duplicates().\
        to_dict(orient='records')
    if len(slide_metadata) == 0:
        raise ValueError(f"No metadata infor found in file {metadata_file.as_posix()}")
    slide_metadata = slide_metadata[0]
    return slide_metadata

In [None]:
def create_slide_metadata_json(slide_raw_files_dir: str, slide_flat_files_dir: str, json_file_path: str) -> None:
    expt_config_file = find_expt_config_file(slide_raw_files_dir)
    slide_config_data = read_expt_config_file(expt_config_file)
    slide_metadata = get_slide_metadata_info(slide_flat_files_dir)
    slide_config_data.update(**slide_metadata)
    with open(json_file_path, 'w') as fp:
        json.dump(slide_config_data, fp)

In [None]:
create_slide_metadata_json(
    slide_raw_files_dir=SLIDE_RAW_FILES_DIR,
    slide_flat_files_dir=SLIDE_FLAT_FILES_DIR,
    json_file_path=JSON_OUTPUT_PATH)