In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import spatialdata as sd

In [None]:
# Read all 6 wells
Control = sd.read_zarr('/storage/lingyuan2/STATES_data/cellline0101_C3control_2d_dr.zarr')
Tg15min = sd.read_zarr('/storage/lingyuan2/STATES_data/cellline0101_B4Tg15min_2d_dr.zarr')
Tg30min = sd.read_zarr('/storage/lingyuan2/STATES_data/cellline0101_B5Tg30min_2d_dr.zarr') 
Tg1h = sd.read_zarr('/storage/lingyuan2/STATES_data/cellline0101_B6Tg1h_2d_dr.zarr')
Tg2h = sd.read_zarr('/storage/lingyuan2/STATES_data/cellline0101_C4Tg2h_2d_dr.zarr')
Tg4h = sd.read_zarr('/storage/lingyuan2/STATES_data/cellline0101_C5Tg4h_2d_dr.zarr')

# Process data for all wells
points_df_Control = pd.DataFrame(json.loads(Control.tables["table"].uns["points"]))
points_df_Tg15min = pd.DataFrame(json.loads(Tg15min.tables["table"].uns["points"]))
points_df_Tg30min = pd.DataFrame(json.loads(Tg30min.tables["table"].uns["points"]))
points_df_Tg1h = pd.DataFrame(json.loads(Tg1h.tables["table"].uns["points"]))
points_df_Tg2h = pd.DataFrame(json.loads(Tg2h.tables["table"].uns["points"]))
points_df_Tg4h = pd.DataFrame(json.loads(Tg4h.tables["table"].uns["points"]))

In [None]:
# Add condition column to each dataframe
points_df_Control['condition'] = 'Control'
points_df_Tg15min['condition'] = 'Tg15min'
points_df_Tg30min['condition'] = 'Tg30min'
points_df_Tg1h['condition'] = 'Tg1h'
points_df_Tg2h['condition'] = 'Tg2h'
points_df_Tg4h['condition'] = 'Tg4h'

# Concatenate all dataframes
all_points_df = pd.concat([
    points_df_Control,
    points_df_Tg15min, 
    points_df_Tg30min,
    points_df_Tg1h,
    points_df_Tg2h,
    points_df_Tg4h
], ignore_index=True)

all_points_df

In [None]:
all_points_df[~all_points_df['DR'].isna()]

In [5]:
# Create mapping dictionary for conditions to numbers
condition_map = {
    'Control': 0,
    'Tg15min': 1, 
    'Tg30min': 2,
    'Tg1h': 3,
    'Tg2h': 4,
    'Tg4h': 5
}

# Create cell_idx column by subtracting 1 from cell and adding condition offset
all_points_df['cell_idx'] = (all_points_df['cell'] - 1).astype(str) + '-' + all_points_df['condition'].map(condition_map).astype(str)


In [6]:
import scanpy as sc
adata = sc.read_h5ad('/storage/lingyuan2/STATES_data/pseudotime.h5ad')


In [None]:
adata

In [8]:
# Remove rows where cell is 0
filtered_points_df = all_points_df[all_points_df['cell'] != 0]

# Store filtered dataframe in adata.uns
adata.uns['points_df'] = filtered_points_df


In [None]:
# Add gene column by removing everything after underscore in feature_name
points_df = adata.uns['points_df']
points_df['gene'] = points_df['feature_name'].str.split('_').str[0]
adata.uns['points_df'] = points_df
adata.uns['points_df']

In [13]:
###DR_All

# Get points dataframe
points_df = adata.uns['points_df']

# Group by cell_idx and gene, calculate mean DR (skipping nans)
dr_means = points_df.groupby(['cell_idx', 'gene'])['DR'].apply(lambda x: x.mean(skipna=True)).reset_index()

# Create sparse matrix with cell_idx as rows and genes as columns
dr_matrix = pd.pivot_table(dr_means, values='DR', index='cell_idx', columns='gene')

# Reorder rows and columns to match adata
dr_matrix = dr_matrix.reindex(index=adata.obs.index, columns=adata.var.index)



In [14]:
# Store as new layer
adata.layers['DR_All'] = dr_matrix.values

In [15]:
###DR_ntRNA

# Get points dataframe and filter for ntRNA features
points_df = adata.uns['points_df']
ntRNA_df = points_df[points_df['feature_name'].str.endswith('_ntRNA')].copy()

# Remove _ntRNA suffix from feature names
ntRNA_df['gene'] = ntRNA_df['feature_name'].str.replace('_ntRNA', '')

# Group by cell_idx and gene, calculate mean DR (skipping nans)
dr_means = ntRNA_df.groupby(['cell_idx', 'gene'])['DR'].apply(lambda x: x.mean(skipna=True)).reset_index()

# Create sparse matrix with cell_idx as rows and genes as columns
dr_matrix = pd.pivot_table(dr_means, values='DR', index='cell_idx', columns='gene')

# Reorder rows and columns to match adata
dr_matrix = dr_matrix.reindex(index=adata.obs.index, columns=adata.var.index)

# Store as new layer
adata.layers['DR_ntRNA'] = dr_matrix.values

In [16]:
###DR_rbRNA

# Get points dataframe and filter for rbRNA features
points_df = adata.uns['points_df']
rbRNA_df = points_df[points_df['feature_name'].str.endswith('_rbRNA')].copy()

# Remove _rbRNA suffix from feature names
rbRNA_df['gene'] = rbRNA_df['feature_name'].str.replace('_rbRNA', '')

# Group by cell_idx and gene, calculate mean DR (skipping nans)
dr_means = rbRNA_df.groupby(['cell_idx', 'gene'])['DR'].apply(lambda x: x.mean(skipna=True)).reset_index()

# Create sparse matrix with cell_idx as rows and genes as columns
dr_matrix = pd.pivot_table(dr_means, values='DR', index='cell_idx', columns='gene')

# Reorder rows and columns to match adata
dr_matrix = dr_matrix.reindex(index=adata.obs.index, columns=adata.var.index)

# Store as new layer
adata.layers['DR_rbRNA'] = dr_matrix.values

In [17]:
# Save adata object to h5ad file
adata.write_h5ad('/storage/lingyuan2/STATES_data/withDR.h5ad')


In [18]:
# Read in the saved h5ad file
import anndata as ad
adataDR = ad.read_h5ad('/storage/lingyuan2/STATES_data/withDR.h5ad')


In [None]:
adataDR