# Example usage of GarNet

In [1]:
import numpy as np
import pandas as pd

# if you pip installed the package: 
# import GarNet

# if you cloned the repo:
import sys
sys.path.append("../GarNet")
import garnet

# Having issues importing despite module exising in virtual environment? 
# See https://github.com/jupyter/notebook/issues/397

# Generate GarNet file

Skip this section if GarNet file has already been generated. See Appendix for how to obtain UCSC reference file.

In [3]:
reference_filepath = "../garnet_data/reference/ucsc_reference.hg19.bed"
motif_filepath = "../garnet_data/cisBP_hg19_SLIM/cisBP.hg19.LOD.10kb.chr1_SLIM.bed"
garnet_filepath = "../garnet_data/cisBP_hg19_SLIM/garnetDB_cisBP.hg19.LOD.10kb.chr1_SLIM.tsv"

garnet_df = garnet.construct_garnet_file(reference_filepath, motif_filepath, garnet_filepath)
garnet_df.head()

01:31:40 - GarNet: INFO -   - TSS file seems to already exists at ../garnet_data/reference/ucsc_reference.hg19.tss.bed
01:31:40 - GarNet: INFO -   - Searching for motifs near genes. This may take a while...
01:31:49 - GarNet: INFO -   - 181422 motif-gene associations found and written to ../garnet_data/cisBP_hg19_SLIM/garnetDB_cisBP.hg19.LOD.10kb.chr1_SLIM.tsv


Unnamed: 0,motifChrom,motifStart,motifEnd,motifName,motifScore,motifStrand,geneName,tssStart,tssEnd,motif_gene_distance
0,chr1,17000,17008,HOXA4,8.846505,+,DDX11L1,11873,11874,5135
1,chr1,18055,18067,LMO2,8.768321,+,DDX11L1,11873,11874,6194
2,chr1,18339,18351,LMO2,8.901481,+,DDX11L1,11873,11874,6478
3,chr1,19086,19096,SREBF1,11.043803,-,DDX11L1,11873,11874,7213
4,chr1,19858,19866,HOXA4,8.539458,+,DDX11L1,11873,11874,7993


# Map peaks to genes using a549 data

This is accomplished by intersecting motifs with genes specified in the open chromatin bed file. 

In [None]:
garnet_filepath = "../garnet_data/cisBP_hg19_SLIM/garnetDB_cisBP.hg19.LOD.10kb.chr1_SLIM.tsv"
peaks_filepath = "a549_example_peaks.bed"

df = garnet.map_peaks(peaks_filepath, garnet_filepath)
df.head()

# Regress gene expression against TF Motif binding score

Perform regression, plots are automatically generated in `regression_plots/`. 

In [None]:
expression_filepath = "a549_example_expression.txt"

results = garnet.TF_regression(df, expression_filepath, ".")
results.head(10)

# Appendix

## Download and format reference file

### 1. Download UCSC known genes file

Visit [UCSC Table Browser](https://genome.ucsc.edu/cgi-bin/hgTables?command=start) and select the following options

 - **assembly**: Feb. 2009 (GRCh37/hg19)
 - **group**: Genes and Gene Predictions
 - **track**: UCSC Genes
 - **table**: knownGene
 - **region**: genome
 - **output format**: selected fields from primary and related tables

Press "get output", clear all fields, then check the following fields:

 - chrom
 - strand
 - txStart
 - txEnd
 - geneSymbol
 
Press "get output" again, and save the table locally. A copy (retrieved 9/11/2017) is saved in `../garnet_data/reference/ucsc_known_genes.hg19.tsv`.

### 2. Reformat to BED file

In [None]:
ucsc_reference = pd.read_csv("../garnet_data/reference/ucsc_known_genes.hg19.tsv", sep='\t', names=["chrom", "strand", "chromStart", "chromEnd", "name"], comment="#")
ucsc_reference.drop_duplicates(inplace=True)
ucsc_reference["score"] = "."
ucsc_reference.head()

In [None]:
ucsc_reference_bed = ucsc_reference[["chrom", "chromStart", "chromEnd", "name", "score", "strand"]]
ucsc_reference_bed.head()

In [None]:
ucsc_reference_bed.to_csv("../garnet_data/reference/ucsc_reference.hg19.bed", sep='\t', header=False, index=False)