# ConvexGating tutorial

### Load necessary packages for tutorial
Make sure that the required packages are properly installed. For the installation of **ConvexGating** see https://github.com/buettnerlab/convex_gating#installation.

In [1]:
import convexgating as cg
import scanpy as sc
import anndata as ann
import pandas as pd
from sklearn.datasets import make_blobs

  from .autonotebook import tqdm as notebook_tqdm


# Generate toy data set
We generate a toy data set with 5000 observations and 12 features originating from 3 cluster centers. Input file format for **ConvexGating** is **anndata**, see https://anndata.readthedocs.io/.

In [2]:
n_features = 12
X,y = make_blobs(5000, n_features=n_features, centers=3,cluster_std = 3)

var_names = []
for j in range(1,n_features+1):
    var_names.append('feature_' +str(j))
    
adata = ann.AnnData(X=X)
adata.var.index = var_names    

# Clustering
At first, apply clustering to obtain labels in an unsupervised fashion. Our method of choice is the **louvain algorithm**. Install the louvain package using pip via **pip install louvain**.

In [3]:
sc.pp.neighbors(adata)
sc.tl.louvain(adata)

# Apply ConvexGating
The standard gating function *cg.gating_strategy* graphically ouputs a gating strategy and its corresponding performance measures. In case the details of a gating strategy (e.g. exact gate locations) need to be extracted, apply *cg.FIND_GATING_STRATEGY* in the following manner.  

### Preprocess 

In [23]:
cell_data = cg.preprocess_adata_gating(adata, cluster_string = 'louvain')


### Find gating strategy for louvain clusters '0' and '2

In [None]:
keys, gating_core, gating_overview = cg.FIND_GATING_STRATEGY(cell_data = cell_data,
                                                                          channels = list(adata.var.index),
                                                                          cluster_numbers = ['0','2'],
                                                                          cluster_string = 'louvain')

## Interpret output 

### *keys* 
dictionary that contains *louvain* cluster identifiers

In [25]:
keys

{0: '0', 1: '2'}

###  gating_core
dictionary decoded by 'keys'
- gating_core[0] corresponds to *louvain* cluster from keys[0] (here '0')
- gating_core[1] corresponds to *louvain* cluster from keys[1] (here '2')


#### gating_core[key][0] 
dictionary that shows chosen *features*, *cell labels* and *gate prediction* per cell per hierarchy
- gating_core[key][0]['1'] -> DataFrame for *hierarchy 1* 
- gating_core[key][0]['2'] -> DataFrame for *hierarchy 2* 
- ...


In [35]:
gating_core[0][0]['1'].head(10)

Unnamed: 0,feature_11,feature_6,label,cell_ID,final_pred
0,9.340398,-3.016162,1.0,0.0,1
1,-11.060426,3.213424,0.0,1.0,0
2,10.528987,-5.786933,1.0,2.0,1
3,-4.845074,-1.040972,0.0,3.0,0
4,7.082471,-3.349266,1.0,4.0,1
5,9.051398,0.942447,1.0,5.0,1
6,-2.858818,3.643722,0.0,6.0,0
7,3.254964,-5.724593,1.0,7.0,1
8,13.658639,2.514653,1.0,8.0,1
9,3.949655,-10.326386,1.0,9.0,1


#### gating_core[key][1] 
dictionary that gives detailed information on the gate location (*gate_points* and *gate_edges*) per hierarchy
- gating_core[key][1]['1'] -> dictionary for *hierarchy 1* 
- gating_core[key][1]['2'] -> dictionary for *hierarchy 2* 
- ...

In [33]:
gating_core[0][1]['1']

{'marker_combo': ['feature_11', 'feature_6'],
 'gate_points': array([[0.1381860803701187, 1.1898505004770499],
        [7.751842264865276, 6.444396764365232],
        [18.844844309996333, -7.751516050070587],
        [16.346729553580204, -9.524244540699133],
        [-0.010376331958184437, -9.377403544413758],
        [array([18.87984903]), array([-5.26104043])],
        [array([18.87984903]), array([6.34449842])]], dtype=object),
 'gate_edges': [[array([0.13818608, 1.1898505 ]),
   array([7.75184226, 6.44439676])],
  [array([0.13818608, 1.1898505 ]), array([-0.01037633, -9.37740354])],
  [array([7.751842264865276, 6.444396764365232], dtype=object),
   array([array([18.87984903]), array([6.34449842])], dtype=object)],
  [array([18.84484431, -7.75151605]), array([16.34672955, -9.52424454])],
  [array([18.844844309996333, -7.751516050070587], dtype=object),
   array([array([18.87984903]), array([-5.26104043])], dtype=object)],
  [array([16.34672955, -9.52424454]), array([-0.01037633, -9.

### gating_overview 

####  gating_overview[key]
DataFrame that contains an overview of all features and a one-hot gate membership encoding per individual cell (*final_gate_hierarchy*) 

In [39]:
gating_overview[0].head(10)

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,...,new_gate_2,new_gate_3,new_gate_4,new_gate_5,final_gate_0,final_gate_1,final_gate_2,final_gate_3,final_gate_4,final_gate_5
0,5.15653,-5.969478,-1.145506,-10.545896,-9.651578,-0.02671,3.455772,-5.852684,-3.497406,3.023977,...,1,1,1,1,1.0,1,1,1,1,1
1,-3.250296,-1.703907,2.298084,-5.12557,-4.134001,4.951892,8.507755,0.877886,6.474249,-4.742587,...,0,0,0,0,1.0,0,0,0,0,0
2,-0.594582,-0.41412,-3.007879,-10.642581,0.004086,-2.241073,0.920329,-10.539999,1.923242,6.445203,...,1,1,1,1,1.0,1,1,1,1,1
3,2.488288,6.932313,4.8928,-1.183787,-9.742088,1.551836,2.149421,1.174849,-5.593778,-1.615545,...,0,0,1,1,1.0,0,0,0,0,0
4,0.747323,-3.227009,0.21077,-11.272518,-0.190929,-0.292922,5.453629,-8.394526,-2.25804,1.930757,...,1,1,1,1,1.0,1,1,1,1,1
5,-5.474391,-2.466074,-6.025668,-15.829385,2.070189,3.136957,5.769475,-7.221881,1.829339,6.770826,...,1,1,1,1,1.0,1,1,1,1,1
6,-4.798623,-5.992838,-8.70817,-8.455205,-8.416313,5.295781,-3.517081,-0.163131,4.55549,-6.610892,...,0,0,0,0,1.0,0,0,0,0,0
7,-0.45983,-1.015333,-5.640967,-11.500357,-4.76195,-2.191252,4.430179,-9.95964,-1.588714,0.116612,...,1,1,1,1,1.0,1,1,1,1,1
8,-0.25123,2.88727,-1.480181,-6.702728,-0.545946,4.393444,4.928835,-2.690489,-1.645544,-1.880507,...,1,1,1,1,1.0,1,1,1,1,1
9,0.965518,-6.190322,-3.129289,-8.014034,-4.441035,-5.868944,2.047745,-5.365337,-1.926749,-1.034255,...,1,1,1,1,1.0,1,1,1,1,1
