## Import scanpy as sc

In [None]:
import scanpy as sc

## Settings

In [None]:
# Set up basic scanpy settings
sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80, dpi_save=300, color_map='viridis')

## Reading in data
- see snippet blocks for indexer usage

For all arguments see docs: https://scanpy.readthedocs.io/en/stable/api/reading.html

In [None]:
# Generic
sc.read('/path/to/data.h5ad')

# matrix.mtx, barcodes.tsv and features.tsv
sc.read_10x_mtx('/path/to/data', var_names='gene_symbols')

# mtx
sc.read_mtx('/path/to/data.mtx', dtype='float32')

# HDF5
sc.read_h5ad('/path/to/data.h5ad')

# HDF
sc.read_h5ad('/path/to/data.h5ad')

# H5
sc.read_10x_h5('/path/to/data.h5')

# csv, tsv
sc.read_csv('/path/to/data.csv')

# txt
sc.read_text('/path/to/data.txt')

# loom
sc.read_loom('/path/to/data.loom')

# visium
sc.read_visium('/path/to/visium_data.h5ad')

# excel
adata = sc.read_excel('/path/to/data.xlsx')

# umi-tools
adata = sc.read_umi_tools('/path/to/gzipped_condensed_count_matrix_from_umi_tools')

### Reading in data backed

For more related to backed data working see X

In [None]:
# 'r' - won't load in .X
adata = sc.read('/path/to/data.h5ad', backed = 'r')

# 'r+' - modify backed attributes of the AnnData object - dangerous to do!!!
adata = sc.read('/path/to/data.h5ad', backed = 'r+')
# do stuff
adata.file.close()

### Modifications when reading the data in

In [None]:
# Transpose the data (swap obs and var)
adata = sc.read('/path/to/data.h5ad').T

## Copy the scanpy object

In [None]:
adata_backup = adata[:]
adata_backup = adata.copy()

## Combine scanpy objects

In [None]:
# way 1
adata_list = [adata1, adata2]
adata = sc.AnnData.concatenate(*adata_list, join='inner', batch_categories=None ,index_unique=None)

# way 2
adatas = [adata_1, adata_2, adata_3]
adata = adatas[0].concatenate(adatas[1:], join='inner', batch_key='integrated_data_ids',batch_categories=['data1','data2','data3'],index_unique='-')

# way 3
adata_dict = {
    'sample1': adata_sample1,
    'sample2': adata_sample2,
    'sample3': adata_sample3
}

adata = sc.AnnData.concatenate(
    **adata_dict,
    join='inner',  # Specify the type of join
    batch_key='sample_id',  # Specify the batch key
    batch_categories=['sample1', 'sample2', 'sample3'],  # Specify batch categories
    index_unique='-',  # Specify index uniqueness
)

## Set raw to main .X

In [None]:
adata = adata.raw.to_adata()

## Subsetting

In [None]:
# to subset data by value in adata.var
adata = adata[:, adata.var['intersect-0'] == True]

# to subset data by values in adata.obs
defined_list = ['EC','Mac']
subset = adata[adata.obs["column_of_interest"].isin(defined_list),:]
# or
subset = adata[adata.obs["column_of_interest"].isin(['DC','Mac','Ery']),:]

# want everything else but what it in your list
subset = adata[~adata.obs["column_of_interest"].isin(defined_list),:]

## Check two scanpy datasets or metadata are exactly the same\

In [None]:
# check order is the same
list(adata.obs.index[adata.obs.index.isin(adata2.obs.index)]) == list(adata2.obs.index)

# to reorder adata - both .X and .obs
adata = adata[adata2.obs.index]

## Column manipulation in obs/var

In [None]:
# create new column which mimics index
adata.obs["new_column"] = adata.obs.index

# drop index into dataframe as a column and replace index as normal numerical values
adata.obs = adata.obs.reset_index(level=0, inplace=False, drop = False)

# set index as column - will name the index the same as column name!
adata.obs.index = adata.obs["column"]

# rename a column
adata.obs.rename(columns = {"old_name": "new_name"}, inplace=True)

# delete a column
del adata.obs["column"]

# to add a new column with a certain value
adata.obs['new_column'] = 'value'
adata.obs['new_column'] = adata.obs['old_column']

# how to check for NaN values in a column
adata.obs["column_of_interest"].isna()

# Check for NaN under a single DataFrame column
adata.obs['your column name'].isnull().values.any()

# Count the NaN under a single DataFrame column
adata.obs['your column name'].isnull().sum()

# Check for NaN under an entire DataFrame
adata.obs.isnull().values.any()

# Count the NaN under an entire DataFrame
adata.obs.isnull().sum().sum()

#to access cell value at a specific position
adata.obs.at[row,column]
adata.obs.loc[row].column
adata.obs.loc[row].at[column]
adata.obs.iat[index_num,column_num]
# e.g.
adata.obs.at[2,'B']
adata.obs.loc[1].B
adata.obs.loc[1].at['B']
adata.obs.iat[1, 2]

# to set cell value
adata.obs.at[row,column] = value
adata.obs.iat[row,column] = value
# e.g.
adata.obs.at[1, 'C'] = "hi"
adata.obs.iat[1, 2] = 100

# to get list of unique values in column and numerical value attached e.g. cell types and cell numbers 
adata.obs.groupby(['anno_column']).apply(len)

# value_counts
adata.obs.['anno_column'].value_counts(dropna=False)

# reorder categories 
adata.obs["anno"] = adata.obs["anno"].cat.reorder_categories([
'celltype_3',
'celltype_2',
'celltype_1',
'celltype_4'
])

## Value replacement in obs/var

In [None]:
# to replace string across a single column- example here is to have a column without additional string 
adata.obs['new_index'] = adata.obs.index.str.replace('-1-SIGAG5-SIGAG5','-SIGAD11',regex=True)

# splitting string off a delimiter e.g. create a new column to split AAACCTGAGAGTGACC-1-SKN8104894 and just keep SKN8104894
adata["new_column"] = adata["column_or_index"].str.split("-", n=2, expand = True)[2] 
# [0] keeps AAACCTGAGAGTGACC, [1] keeps 1 and [2] keeps SKN8104894

adata['haniffa_sample_ID'] = adata.index.str.split("-", expand=False).str[1]

# to copy over column from one dataframe to another with matching index's
adata2.obs['column_copy'] = adata.obs['column']

# to copy over column data in one dataframe to another when index's don't match fully
adata.obs['vdj_met'] = adata.obs.loc[adata.obs.index.isin(list(vdj.index)), 'vdj_met'] = vdj.values

# to ammend values in one column based of values in another column - note may need to set column to by string first (astype(str))
adata.obs.loc[(adata.obs['column_to_ref_by'] == "value_in_ref_column"), 'column_to_affect'] = "new_value"

# loop through loc
Clusters_to_reassign = ['old_value_1', 'old_value_2', 'old_value_3' ,'old_value_4'] # Want to call all of these clusters the same name 
for i in Clusters_to_reassign:
        adata.obs.loc[(adata.obs['column_to_ref_by'] == i), 'column_to_affect'] = "new_value"

# Rename clusters using replace command
ker_ad.obs['fig2a_annotation'] = ker_ad.obs['leiden_bk_r0_5']
ker_ad.obs.replace({'fig2a_annotation':'0'},'Placode/matrix',inplace=True)
ker_ad.obs.replace({'fig2a_annotation':'1'},'Basal',inplace=True)
ker_ad.obs.replace({'fig2a_annotation':'2'},'Periderm',inplace=True)
ker_ad.obs.replace({'fig2a_annotation':'3'},'Periderm',inplace=True)
ker_ad.obs.replace({'fig2a_annotation':'4'},'Basal',inplace=True)
ker_ad.obs.replace({'fig2a_annotation':'5'},'Hair shaft/sheath',inplace=True)
ker_ad.obs.replace({'fig2a_annotation':'6'},'Immature basal',inplace=True)
ker_ad.obs.replace({'fig2a_annotation':'7'},'Basal IFE stem cell POSTN+',inplace=True)

## Search for gene

In [None]:
# to search adata for gene by name
adata.var[adata.var_names.str.match("")]

# to search adata for gene by Ensembl
adata.var[adata.var["gene_ids"].str.match("")]

# to search adata.raw for gene by name
adata.raw.var[adata.raw.var_names.str.match("")]

# to search adata.raw for gene by Ensembl
adata.raw.var[adata.raw.var["gene_ids"].str.match("")]

## Weird errors if come up when saving

In [None]:
# can't write columns as scanpy can't write to category
for col in adata.obs.columns:
    adata.obs[col] = adata.obs[col].astype(str)
for col in adata.var.columns:
    adata.var[col] = adata.var[col].astype(str)

In [None]:
# if run into can't write data with error: '_index' reserved name for dataframe columns
adata.__dict__['_raw'].__dict__['_var'] = adata.__dict__['_raw'].__dict__['_var'].rename(columns={'_index': 'Features'})