In [1]:
import phagepy as pp
import numpy as np
import scanpy as sc

In [27]:
counts='test_counts.csv' # a 50 peptide x 5 obs count matrix (from mouseome)
meta='test_meta.csv' # contains 3/5 of the samples in the count matrix
adata=pp.create_anndata(counts, meta)

In [18]:
adata

AnnData object with n_obs × n_vars = 3 × 50
    obs: 'Group', 'Round', 'Barcode', 'Placeholder', 'PregGroup', 'Embryonic_day', 'Cross', 'Adjuvant', 'OT2', 'cOVA', 'Genotype'

In [8]:
ctrl_ids=['AG9_R3_Demux2_S248_R1_001','B6_10C_R3_Demux2_S282_R1_001']

# need to define control first
adata.uns['control_keys']=ctrl_ids # normally define_ctrl_set() fxn would do this, but i picked arbitrary obs to be ctrl for test
adata=pp.filter_out_ctrl_set(adata)
adata

View of AnnData object with n_obs × n_vars = 1 × 50
    obs: 'Group', 'Round', 'Barcode', 'Placeholder', 'PregGroup', 'Embryonic_day', 'Cross', 'Adjuvant', 'OT2', 'cOVA', 'Genotype'
    uns: 'control_keys'

In [14]:
ctrl_ids=['AG9_R3_Demux2_S248_R1_001','B6_10C_R3_Demux2_S282_R1_001']


In [16]:
adata.varm['control_counts']=adata[ctrl_ids,:].X.T

In [19]:
adata.obs

Unnamed: 0,Group,Round,Barcode,Placeholder,PregGroup,Embryonic_day,Cross,Adjuvant,OT2,cOVA,Genotype
AG9_R3_Demux2_S248_R1_001,MockIP,3,AG9,,,,,,,,AG
B6_10B_R3_Demux2_S273_R1_001,Pregnancy,3,B6_10,B,B6_mOVA_OT2,6.5,B6_mOVA,No,Yes,No,B6
B6_10C_R3_Demux2_S282_R1_001,Pregnancy,3,B6_10,C,B6_mOVA_OT2,18.5,B6_mOVA,No,Yes,No,B6


In [23]:

def define_ctrl_set(ad, obs_key, obs_value, key_ids='control_ids'):
    # find obs names of control observations
    control_locs=ad.obs.index[ad.obs[obs_key]==obs_value]

    # add as unstructured annotation (uns) of adata object
    # accessible as adata.uns['control_keys']=control_locs
    ad.uns[key_ids]=control_locs
    return ad

def filter_out_ctrl_set(ad, key_ids='control_ids', key_X='X_control'):
    try:
        ad.uns[key_ids]
    except NameError:
        raise ValueError('Need to define control set first')

    ad.varm[key_X]=ad[ad.uns[key_ids],:].X.T

    ad=ad[~ad.obs.index.isin(ad.uns[key_ids])]
    return ad

In [28]:
adata=define_ctrl_set(adata, 'Group', 'MockIP',key_ids='MockIP_ids')
adata

AnnData object with n_obs × n_vars = 3 × 50
    obs: 'Group', 'Round', 'Barcode', 'Placeholder', 'PregGroup', 'Embryonic_day', 'Cross', 'Adjuvant', 'OT2', 'cOVA', 'Genotype'
    uns: 'MockIP_ids'

In [29]:
adata=filter_out_ctrl_set(adata,key_ids='MockIP_ids', key_X='X_MockIP')
adata

View of AnnData object with n_obs × n_vars = 2 × 50
    obs: 'Group', 'Round', 'Barcode', 'Placeholder', 'PregGroup', 'Embryonic_day', 'Cross', 'Adjuvant', 'OT2', 'cOVA', 'Genotype'
    uns: 'MockIP_ids'
    varm: 'X_MockIP'

In [25]:
adata.varm['X_control'].shape

(50, 1)