# What is this doing
Takes the QC file I created from the raw Single Cell data and proccesses it in to clusters and subclusters.

## Setup

### Imports

In [1]:
import numpy as np
import scanpy as sc
import os
import pandas as pd
import bbknn

### Single Cell settings

In [2]:
sc.settings.verbosity = 4
sc.settings.set_figure_params(80) 
sc.settings.file_format_figures = 'png'
sc.settings.savefigs = False
use_first_n_samples = 0
full_sparse = False

  sc.settings.set_figure_params(80)


### Defining a function for processing the data & determing the parameters

In [3]:
def processing_batch(adata, neighbors=15, key='batch', reso=0.4, random_state=0):  # Higher resolution yields more leiden clusters 
    sc.pp.scale(adata, max_value=10)
    sc.tl.pca(adata, svd_solver='arpack')
    sc.logging.print_memory_usage()
    sc.pp.neighbors(adata, n_neighbors=neighbors)
    sc.pl.pca_variance_ratio(adata, log=True)
    bbknn.bbknn(adata, batch_key=key, neighbors_within_batch=2, metric='euclidean', n_pcs=30, trim=None)
    sc.tl.umap(adata)
    sc.tl.leiden(adata, resolution=reso)
    sc.pl.umap(adata, color='leiden')

### Reading the excisting h5ad file I created previously in the QC pipeline 

In [None]:
#adata after QC
# adata = sc.read(r'/Code/data/single_cell/animals/frog/frog_2/tropicalis/''frog_2_after_qc.h5ad') 

sc.read_h5ad(r'/Code/data/single_cell/animals/frog/frog_2/laevis/''frog_2_after_qc.h5ad')

# If the anndata object is processed and contains the raw data in adata.raw run -  adata = adata.raw.to_adata()

## Preprocessing the data 

In [None]:
adata

In [None]:
adata.obs

In [None]:
adata.var

In [None]:
# Cleaning, normalizing, reducing dimensionality, clustering cells and creating visualizations.

sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
sc.pl.highly_variable_genes(adata)
adata.raw = adata
# adata = adata[:, adata.var.highly_variable] # For filtering non Highly variable genes. We usually keep all the genes. 

In [None]:
sc.pp.regress_out(adata, ['total_counts','pct_counts_MT'])

# Got it from: https://github.com/theislab/single-cell-tutorial/issues/35
sc.pp.scale(adata, max_value=10)
sc.pp.pca(adata, n_comps=50, use_highly_variable=True, svd_solver='arpack')
sc.pp.neighbors(adata)
sc.tl.umap(adata) 

In [None]:
processing_batch(adata, key='treatment')

In [None]:
sc.tl.rank_genes_groups(adata, groupby='treatment', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=50, sharey=True)
pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(20)

# sc.pl.umap(adata[adata.obs['treatment'] == 'control'], color = ['PLAC8', 'CD83', 'CD74', 'CD79B', 'CD38', 'IL10', 'CD7'], size=50)

In [None]:
adata.obs['treatment'].value_counts()

### UMAP's

Markers test box

#### T cells

In [None]:
# T cells. 'CCR7' is a marker for T cells and also B cells. Markers from Roy's Extended_Data_Figures.
# Markers for T cells https://www.beckman.com/resources/cell-types/blood-cells/leukocytes/lymphocytes/t-cells
sc.pl.umap(adata, color=['CD3G','CD3E_1','CD3E_2','CD37L_1','CD37L_2','CD3Z_1',
                         'CD3Z_2','CD37_1','CD37_2','CD3EAP','STMN3_1',
                         'STMN3_2','LEF1_1','LEF1_2','LEF1_3','RORA_1','RORA_2',
                         'GPR17_1','CCR7_1','CCR7_3','CXCR3_2','TCF20_1',
                         'TCF20_2','TCF19_1','TCF19_2','IL7RA','HOP2_4','CD4_1',
                         'CD4_2','GZF1_1','TNFRSF9_1','TNFRSF9_2','TNFSF8',
                         'IL4R_1','IL4R_3','IL27B','IL2RB_1','IL2RB_2','IL2RG',
                         'IL21R_1','IL27_1','IL21R_2','TNF10_4','TNF11_1','TNF11_2',
                         'TBX21_1','IL10_2','IL10RA_1','GBP7_1','GBP7_2',
                         'SEPT6_1','SEPT6_2'], size=50)

sc.pl.umap(adata, color='leiden', size=30)

In [None]:
sc.pl.umap(adata, color=['TNF6B_1','TNF6B_2'], size=30)

#### B cells

In [None]:
# B cells markers from Roy's data and from https://panglaodb.se/markers.html?cell_type=%27Platelets%27#google_vignette

sc.pl.umap(adata, color=['CD79B_1','CD79B_2','CD38','IRF8_1','IRF8_2','IRF8_3',
                         'CCR7_1','PXK','BANK1','BACH2_1','BACH2_2','CD79A_1',
                         'CD79A_2','PLAC9_2','IGHM','CD81_1','CD81_2','CD81_4',
                         'CD82_2','CD84_1','CD84_3','CD22_4','CD22_5','CD22_6',
                         'CD22_7'], size=50)

celltype = {'0':'0','1':'T_cells','2':'2','3':'3','4':'4','5':'5','6':'6','7':'T_cells'}
adata.obs['cell_type'] = adata.obs.leiden.map(celltype).astype('category')
sc.pl.umap(adata, color='cell_type', size=20)

#### RBC

In [None]:
# RBC markers from Roy's data and from https://panglaodb.se/markers

sc.pl.umap(adata, color=['HBAP1','HBA3_3','HBA3_6','HBA3_7','HBA1','HBA4','HBA2','HBB2_9',
                         'SLC30A1','ANK1_1','ANK1_2','HBB1','GPX1_1','GPX1_2'], size=50)

celltype = {'0':'B_cells','1':'1','2':'2','3':'3','4':'4','5':'5','6':'6','7':'T_cells'}
adata.obs['cell_type'] = adata.obs.leiden.map(celltype).astype('category')
sc.pl.umap(adata, color='cell_type', size=30)

#### Platelets

In [None]:
# Platelets markers from Roy's data and from https://panglaodb.se/markers.html?cell_type=%27Platelets%27#google_vignette

sc.pl.umap(adata, color=['PLEK','GNAS_1','GNAS_2','OAZ1_1','OAZ1_1','CD38',
                         'CD3EAP','ITGB3BP','IIGP5_1','IIGP5_2','IIGP5_3'], size=30)

celltype = {'0':'0','1':'1','2':'2','3':'3','4':'RBC','5':'5','6':'6','7':'T_cells'}
adata.obs['cell_type'] = adata.obs.leiden.map(celltype).astype('category')
sc.pl.umap(adata, color='cell_type', size=20)

#### Monocytes

In [None]:
# Monocytes markers from Roy's data and from https://panglaodb.se/markers

sc.pl.umap(adata, color=['S100A11','TET2_1','TET2_1','TLR4','CCR2','TXNL1_1',
                         'TXNL1_2','TXND9','TXN4B','TXND5_1','TXND5_2','TXN4A',
                         'TXNIP_1','TXNIP_2','MAFB_1','MAFB_2','PPA5_1',
                         'GBP1_2','IRF3_1','IRF3_2','IRF3_3','PLS1_1','PLS1_3',
                         'PLS1_4','PLS1_6','PLS1_10','RGS12_1','RGS12_2','RGS14_1',
                         'RGS14_2','RGS1_1','RGS18_2','RGS1_2','RGS10_1','RGS19_1',
                         'RGS19_2','IFIT5_13','IFIT5_14','IFIT5_15','RSAD2_1',
                         'RSAD2_2','ZNFX1_1','ZNFX1_2','IFIH1','PARP1','CMPK2_1',
                         'CMPK2_2'], size=50)

celltype = {'0':'0','1':'1','2':'2','3':'3','4':'RBC','5':'5','6':'6','7':'T_cells'}
adata.obs['cell_type'] = adata.obs.leiden.map(celltype).astype('category')
sc.pl.umap(adata, color='cell_type', size=20)

#### All clusters

In [None]:
celltype = {'0':'B_cells','1':'T_cells','2':'Monocytes','3':'3','4':'RBC','5':'Monocytes','6':'Monocytes','7':'T_cells'}
adata.obs['cell_type'] = adata.obs.leiden.map(celltype).astype('category')
sc.pl.umap(adata, color='cell_type', size=50)

In [None]:
celltype = {'0':'0','1':'1','2':'2','3':'3','4':'RBC','5':'5','6':'6','7':'7'}
adata.obs['cell_type'] = adata.obs.leiden.map(celltype).astype('category')
sc.pl.umap(adata, color='cell_type', size=30)

In [None]:
adata.obs['cell_type'].value_counts()

In [None]:
sc.pl.umap(adata, color=['CD1A'], size=50)
sc.pl.umap(adata, color='cell_type', size=50)

### Main data after manipulations for presenation

In [None]:
df_isg_genes = pd.read_csv('/Code/data/single_cell/animals/frog/frog_2/laevis/ISG_genes_frog_2_african.csv', index_col=0)
df_isg_genes = df_isg_genes.sort_index()

df_isg_genes

In [None]:
isg_genes_list = df_isg_genes.index.tolist()
isg_genes_list

#### UMAP's of the ISG genes

In [None]:
# celltype = {'0':'Platelets','1':'T_cells','2':'B_cells','3':'Monocytes','4':'T_cells','5':'RBC','6':'Monocytes'}
# adata.obs['cell_type'] = adata.obs.leiden.map(celltype).astype('category')
sc.pl.umap(adata, color=isg_genes_list, size=50)

#### Dotplot of the ISG genes in expressed in turtle

In [None]:
sc.pl.dotplot(adata, isg_genes_list, groupby='treatment', standard_scale='var')

## Gene search tools
This function returns a list of genes that are present in adata (markers for example) from an input of a general list.

In [None]:
def find_gene(gene):
    GENES = list(adata.raw.var.index[adata.raw.var.index.str.startswith(gene.upper())])
    return GENES

find_gene('PSB8')

In [None]:
sc.pl.umap(adata[adata.obs['treatment'] == 'control'], color='TLR4', size=50)
sc.pl.dotplot(adata, 'TLR4', groupby='treatment', standard_scale='var')

sc.pl.umap(adata[adata.obs['treatment'] == 'lps'], color='TLR4', size=50)
sc.pl.dotplot(adata, 'TLR4', groupby='treatment', standard_scale='var')