In [20]:
import numpy as np
import rpy2.robjects as ro
from scipy.sparse import csc_matrix

ro.r['load']('../Data/sc_count.RData')

i = np.array(ro.r('sc_count@i'), dtype=np.int32)    # row indices (0-based)
p = np.array(ro.r('sc_count@p'), dtype=np.int32)    # col indptr
x = np.array(ro.r('sc_count@x'), dtype=float)       # data
nrow, ncol = map(int, list(ro.r('dim(sc_count)')))

# Build CSC (matches R's dgCMatrix layout), then convert to CSR if you prefer
X = csc_matrix((x, i, p), shape=(nrow, ncol)).tocsr()

genes = [str(s) for s in list(ro.r('rownames(sc_count)'))]
cells = [str(s) for s in list(ro.r('colnames(sc_count)'))]
print(X.shape, len(genes), len(cells))


# ---------- Load metadata (2nd RData) ----------
import anndata as ad
import pandas as pd
from rpy2.robjects.conversion import localconverter
from rpy2.robjects import pandas2ri

# X, genes, cells built above

# Load metadata RData
ro.r['load']('../Data/sc_meta.RData')

# Try to find a cell metadata frame (rownames matching cells)
cell_meta = None
for nm in list(ro.r('ls()')):
    if 'data.frame' in list(ro.r(f'class({nm})')):
        rn = [str(s) for s in list(ro.r(f'rownames({nm})'))]
        if set(cells).issubset(set(rn)):
            with localconverter(ro.default_converter + pandas2ri.converter):
                cell_meta = ro.conversion.rpy2py(ro.r[nm]).loc[cells]
            break
if cell_meta is None:
    cell_meta = pd.DataFrame(index=cells)

# (Optional) gene metadata by matching rownames to genes
gene_meta = None
for nm in list(ro.r('ls()')):
    if 'data.frame' in list(ro.r(f'class({nm})')):
        rn = [str(s) for s in list(ro.r(f'rownames({nm})'))]
        if set(genes).issubset(set(rn)):
            with localconverter(ro.default_converter + pandas2ri.converter):
                gene_meta = ro.conversion.rpy2py(ro.r[nm]).loc[genes]
            break
if gene_meta is None:
    gene_meta = pd.DataFrame(index=genes)

adata = ad.AnnData(X=X.T.tocsr(), obs=cell_meta, var=gene_meta)
adata.obs_names = cells
adata.var_names = genes
# Make var_names unique if needed
adata.var_names_make_unique()
adata.write('../Data/sc_data.h5ad')
print(adata)



(19736, 1926) 19736 1926
AnnData object with n_obs × n_vars = 1926 × 19736
    obs: 'cellID', 'cellType', 'sampleInfo'


In [21]:
adata

AnnData object with n_obs × n_vars = 1926 × 19736
    obs: 'cellID', 'cellType', 'sampleInfo'

In [33]:

# pip install anndata pandas scipy rpy2
import numpy as np, pandas as pd, anndata as ad
import rpy2.robjects as ro
from rpy2.robjects.conversion import localconverter
from rpy2.robjects import pandas2ri
from scipy.sparse import csc_matrix

# ---- Load counts ----
ro.r['load']('../Data/spatial_count.RData')      # contains 'spatial_count' (dgCMatrix)
i = np.array(ro.r('spatial_count@i'), dtype=np.int32)
p = np.array(ro.r('spatial_count@p'), dtype=np.int32)
x = np.array(ro.r('spatial_count@x'), dtype=float)
nrow, ncol = map(int, list(ro.r('dim(spatial_count)')))
genes = [str(s) for s in list(ro.r('rownames(spatial_count)'))]
cells_counts = [str(s) for s in list(ro.r('colnames(spatial_count)'))]

# R's dgCMatrix is CSC → build CSC then transpose to cells×genes
X = csc_matrix((x, i, p), shape=(nrow, ncol)).T.tocsr()   # shape: n_cells × n_genes

# ---- Load locations ----
ro.r['load']('../Data/spatial_location.RData')   # contains 'spatial_location' (data.frame)
with localconverter(ro.default_converter + pandas2ri.converter):
    loc = ro.conversion.rpy2py(ro.r['spatial_location'])

# Ensure rownames became index; your preview shows x,y columns already:
# (If rownames didn't carry over for some reason, uncomment the next 2 lines)
# rn = [str(s) for s in list(ro.r('rownames(spatial_location)'))]
# loc.index = pd.Index(rn, name='cell')

# Keep only x,y; ensure numeric
loc = loc[['x','y']].apply(pd.to_numeric, errors='coerce')
loc.index.name = 'cell'

# ---- Align counts to locations by cell/spot IDs ----
cells_loc = loc.index.astype(str).tolist()

# Intersect to be safe (drops any non-overlapping spots)
keep_cells = sorted(set(cells_counts).intersection(cells_loc))
if not keep_cells:
    raise RuntimeError("No overlapping spot IDs between counts and locations.")

# Reorder everything to the same cell order
order_in_counts = {c:i for i,c in enumerate(cells_counts)}
idx = np.array([order_in_counts[c] for c in keep_cells], dtype=int)
X = X[idx, :]                                # subset
obs = loc.loc[keep_cells].copy()             # x,y per cell
var = pd.DataFrame(index=pd.Index(genes, name='gene'))

# ---- Build AnnData ----
adata = ad.AnnData(X=X, obs=obs, var=var)
adata.obs_names = keep_cells
adata.var_names = genes
adata.var_names_make_unique()

# Standard slot for spatial coords (n_cells × 2)
adata.obsm['spatial'] = adata.obs[['x','y']].to_numpy()

adata.write('spatial.h5ad')
print(adata)
print("Stored coordinates in adata.obsm['spatial'] (columns: x, y). Saved to spatial.h5ad.")


AnnData object with n_obs × n_vars = 428 × 25753
    obs: 'x', 'y'
    obsm: 'spatial'
Stored coordinates in adata.obsm['spatial'] (columns: x, y). Saved to spatial.h5ad.
