In [1]:
## Import packages ##
# File I/O
import os
from datetime import datetime

# Data structures and basic operations
import numpy as np
import pandas as pd
import anndata as ad
import scipy.sparse as sp

# Runtime warnings
import warnings
warnings.filterwarnings('ignore', category=UserWarning, module='anndata')
warnings.filterwarnings('ignore', category=FutureWarning, module='anndata')

In [2]:
## Directory information ##

# get current working directory
cwd = os.getcwd()

# Change cwd to the base directory
os.chdir(os.path.join(cwd, '..'))

#Input
input_file='test_anndatas/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz'

# Output
output_directory='notebooks/GpC_conversion_SMF'

In [3]:
## Read in the input h5ad file as adata ##
adata = ad.read_h5ad(input_file)

In [4]:
# Add new observation metadata based on old metadata
# Define the mapping from old values to new values
value_mapping = {
    'barcode0001_sorted': 'B6-BALBc-',
    'barcode0002_sorted': 'BALBc+',
    'barcode0003_sorted': 'B6+',
    'barcode0004_sorted': 'B6+BALBc+',
}

# Create a new observation column based on the mapping
adata.obs['Sample_names'] = adata.obs['Sample'].map(value_mapping)
adata.obs

Unnamed: 0,Sample,Strand,Dataset,Reference,Sample_names
956b4605-f585-4bb6-ab41-b4a3113c2b56,barcode0001_sorted,top,5mC,6B6_5mC_top,B6-BALBc-
6f572b1f-2d00-44fa-9105-39ef453f2deb,barcode0001_sorted,top,5mC,6B6_5mC_top,B6-BALBc-
df3f8d4e-fc43-4cbb-a79d-717724e49ba6,barcode0001_sorted,top,5mC,6B6_5mC_top,B6-BALBc-
cd084f52-cefd-47b7-b350-35794caae7a5,barcode0001_sorted,top,5mC,6B6_5mC_top,B6-BALBc-
306b1151-a1eb-4d04-8bbb-b598e5a45e7c,barcode0001_sorted,top,5mC,6B6_5mC_top,B6-BALBc-
...,...,...,...,...,...
41d110ee-d057-4702-8752-6f82ebbf1d72,barcode0004_sorted,top,5mC,6BALB_cJ_5mC_top,B6+BALBc+
9379f078-275f-429a-bb2f-62fa2c468905,barcode0004_sorted,top,5mC,6BALB_cJ_5mC_top,B6+BALBc+
011b5fc6-64e2-44a4-84d3-681920633c50,barcode0004_sorted,top,5mC,6BALB_cJ_5mC_top,B6+BALBc+
a51ebe0c-f2d6-4250-bc66-220e93297803,barcode0004_sorted,top,5mC,6BALB_cJ_5mC_top,B6+BALBc+


In [7]:
# Filter an anndata object based on certain observation values
# Define the observation column you want to filter on
obs_column = 'Sample'

# Filter the AnnData object
filtered_adata = adata[adata.obs[obs_column].isin(['barcode0001_sorted', 'barcode0002_sorted'])]
filtered_adata_II = adata[adata.obs[obs_column].isin(['barcode0003_sorted', 'barcode0004_sorted'])]
print(filtered_adata.obs, filtered_adata_II.obs)

                                                  Sample Strand Dataset  \
956b4605-f585-4bb6-ab41-b4a3113c2b56  barcode0001_sorted    top     5mC   
6f572b1f-2d00-44fa-9105-39ef453f2deb  barcode0001_sorted    top     5mC   
df3f8d4e-fc43-4cbb-a79d-717724e49ba6  barcode0001_sorted    top     5mC   
cd084f52-cefd-47b7-b350-35794caae7a5  barcode0001_sorted    top     5mC   
306b1151-a1eb-4d04-8bbb-b598e5a45e7c  barcode0001_sorted    top     5mC   
...                                                  ...    ...     ...   
0ef7ea67-e75f-406e-b0f0-ed4663933603  barcode0002_sorted    top     5mC   
22357cd4-5f30-4fc9-a708-b240bd1b3d31  barcode0002_sorted    top     5mC   
e37fe3e7-d173-4c82-971e-2d46cc5df3fd  barcode0002_sorted    top     5mC   
284aa105-4171-4436-88c7-2d38b854b663  barcode0002_sorted    top     5mC   
e4ebef53-62e6-4bf1-a3fd-d5e6581d452f  barcode0002_sorted    top     5mC   

                                             Reference Sample_names  
956b4605-f585-4bb6-ab41-b4a31

In [16]:
# Concatenate anndata objects along the observations (rows)
concatenated_adata = ad.concat([filtered_adata, filtered_adata_II], join='outer', axis=0)

In [None]:
# Save anndata objects
concatenated_adata.write_h5ad('concatenated_adata.h5ad.gz', compression='gzip')