In [1]:
# data_importの場合
from ovarian_can.infrastructure.path_facade.data_import_path_resolver import DataImportPathResolver
from ovarian_can.data_import.abstract_path_resolver import AbstractDataImportPathResolver

resolver: AbstractDataImportPathResolver = DataImportPathResolver()

input_h5_path = resolver.get_input_h5_path()
csv_gz_path = resolver.get_cells_csv_gz_path()
decompressed_csv_path = resolver.get_decompressed_csv_path()
output_path = resolver.get_output_joblib_path()

In [2]:
import scanpy as sc
import pandas as pd
import joblib
import gzip
import shutil
import os


# h5データ読み込み
adata = sc.read_10x_h5(input_h5_path)
print("AnnData loaded:", adata)

# cells.csv.gzを一時的に解凍して読み込み
with gzip.open(csv_gz_path, 'rb') as f_in:
    with open(decompressed_csv_path, 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)

# 解凍されたCSVをDataFrameとして読み込み
df_cells = pd.read_csv(decompressed_csv_path)
print("Cells dataframe loaded:", df_cells.head())

# CSVのcell_idとadata.obs_namesが一致するか検証（安全性確保）
assert adata.obs_names.to_list() == df_cells["cell_id"].to_list(), \
       "Cell IDs do not match between h5 data and cells.csv!"

# adata.obsにCSVからのメタデータをセット
df_cells.set_index('cell_id', inplace=True)
adata.obs = df_cells.loc[adata.obs_names].copy()

# 一時的なファイルを削除
os.remove(decompressed_csv_path)



  """ Implements modularity. This quality function is well-defined only for positive edge weights.
  """ Implements Reichardt and Bornholdt's Potts model with a configuration null model.
  """ Class for doing community detection using the Leiden algorithm.
  """ Optimise the given partitions simultaneously.


AnnData loaded: AnnData object with n_obs × n_vars = 247636 × 480
    var: 'gene_ids', 'feature_types', 'genome'
Cells dataframe loaded:       cell_id  x_centroid  y_centroid  transcript_counts  \
0  aaaacidg-1  123.689438  802.634216                 49   
1  aaaajnee-1  129.189560  813.227417                 31   
2  aaaalogb-1  122.865044  774.362549                  6   
3  aaabacli-1  136.588470  772.567688                  2   
4  aaabbgdg-1  142.446518  828.253662                  3   

   control_probe_counts  control_codeword_counts  unassigned_codeword_counts  \
0                     0                        0                           0   
1                     0                        0                           0   
2                     0                        0                           0   
3                     0                        0                           0   
4                     0                        0                           0   

   deprecated_codewor

In [3]:
joblib.dump(adata, output_path)
print(f"AnnData object saved to {output_path}")

AnnData object saved to /Users/masaki/ovarian_can/data/interim/xenium_ovarian_cancer_preprocessed.joblib
