# Example usage of GarNet

In [1]:
import sys
sys.path.append("../GarNet")

import pandas as pd
import numpy as np

import garnet

# Having issues importing despite module exising in virtual environment? 
# See https://github.com/jupyter/notebook/issues/397

# Generate GarNet file

Skip this section if GarNet file has already been generated. See Appendix for how to obtain UCSC reference file.

In [2]:
reference_filepath = "../garnet_data/reference/ucsc_reference.hg19.bed"
motif_filepath = "../garnet_data/cisBP_hg19_SLIM/cisBP.hg19.LOD.10kb.chr1_SLIM.bed"
garnet_filepath = "../garnet_data/cisBP_hg19_SLIM/garnetDB_cisBP.hg19.LOD.10kb.chr1_SLIM.tsv"

garnet_df = garnet.construct_garnet_file(reference_filepath, motif_filepath, garnet_filepath)
garnet_df.head()

12:16:20 - GarNet: INFO -   - Wrote TSS file to ../garnet_data/reference/ucsc_reference.hg19.tss.bed
12:16:20 - GarNet: INFO -   - Searching for motifs near genes. This may take a while...
12:16:29 - GarNet: INFO -   - 181422 motif-gene associations found and written to ../garnet_data/cisBP_hg19_SLIM/garnetDB_cisBP.hg19.LOD.10kb.chr1_SLIM.tsv


Unnamed: 0,motifChrom,motifStart,motifEnd,motifName,motifScore,motifStrand,geneName,tssStart,tssEnd,motif_gene_distance
0,chr1,17000,17008,HOXA4,8.846505,+,DDX11L1,11873,11874,5135
1,chr1,18055,18067,LMO2,8.768321,+,DDX11L1,11873,11874,6194
2,chr1,18339,18351,LMO2,8.901481,+,DDX11L1,11873,11874,6478
3,chr1,19086,19096,SREBF1,11.043803,-,DDX11L1,11873,11874,7213
4,chr1,19858,19866,HOXA4,8.539458,+,DDX11L1,11873,11874,7993


# Map peaks to genes using a549 data

This is accomplished by intersecting motifs with genes specified in the open chromatin bed file. 

In [3]:
peaks_filepath = "a549_example_peaks.chr1.bed"

df = garnet.map_peaks(peaks_filepath, garnet_filepath)
df.head()

Unnamed: 0,chrom,start,end,motifName,motifScore,motifStrand,geneName,geneStart,geneEnd,motif_gene_distance
0,chr1,565301,565308,HOXA4,7.539123,-,JA429830,566114,566115,813
1,chr1,565460,565467,HOXA4,7.539123,-,JA429830,566114,566115,654
2,chr1,568230,568241,GATA1,10.237757,-,JA429830,566114,566115,-2116
3,chr1,568231,568240,LMO2,8.409497,-,JA429830,566114,566115,-2117
4,chr1,570229,570235,HOXA4,7.118708,+,JA429830,566114,566115,-4121


Perform regression, plots are automatically generated in `regression_plots/`. 

In [4]:
expression_filepath = "a549_example_expression.txt"

results = garnet.TF_regression(df, expression_filepath, ".")
results.sort_values("P-Value", inplace=True)
results.head(10)

12:16:31 - GarNet: INFO - Performing linear regression for 7 transcription factor expression profiles...


Unnamed: 0,Transcription Factor,Slope,P-Value,Targets
1,CEBPB,1.741927,0.070899,"NBL1,NBL1,NBL1,NBL1,NBL1,NBL1,NBL1,NIPAL3,DHCR..."
4,HOXA4,0.28986,0.089367,"CTNNBIP1,CAMK2N1,ID3,ID3,EPB41,DHCR24,DHCR24,I..."
2,EGR3,0.238963,0.428018,"ACOT7,ACOT7,LEPRE1,HOOK1,HOOK1,HOOK1,HOOK1,CYR..."
0,ATF5,-0.431197,0.518044,"ACOT7,ACOT7,CDA,ID3,CSF1,IL6R,PBXIP1,PBXIP1,PB..."
6,SREBF1,-0.771848,0.65772,"SAMD11,SAMD11,SAMD11,SAMD11,SAMD11,EPB41,KIAA1..."
5,LMO2,-0.222877,0.713435,"SAMD11,SAMD11,ARHGEF16,CAMK2N1,CAMK2N1,NIPAL3,..."
3,GATA1,-0.089704,0.778543,"ARHGEF16,NIPAL3,KIAA1522,KIAA1522,PIK3R3,PIK3R..."


# Appendix

## Download and format reference file

### 1. Download UCSC known genes file

Visit [UCSC Table Browser](https://genome.ucsc.edu/cgi-bin/hgTables?command=start) and select the following options

 - **assembly**: Feb. 2009 (GRCh37/hg19)
 - **group**: Genes and Gene Predictions
 - **track**: UCSC Genes
 - **table**: knownGene
 - **region**: genome
 - **output format**: selected fields from primary and related tables

Press "get output", clear all fields, then check the following fields:

 - chrom
 - strand
 - txStart
 - txEnd
 - geneSymbol
 
Press "get output" again, and save the table locally. A copy (retrieved 9/11/2017) is saved in `../garnet_data/reference/ucsc_known_genes.hg19.tsv`.

### 2. Reformat to BED file

In [5]:
ucsc_reference = pd.read_csv("../garnet_data/reference/ucsc_known_genes.hg19.tsv", sep='\t', names=["chrom", "strand", "chromStart", "chromEnd", "name"], comment="#")
ucsc_reference.drop_duplicates(inplace=True)
ucsc_reference["score"] = "."
ucsc_reference.head()

Unnamed: 0,chrom,strand,chromStart,chromEnd,name,score
0,chr1,+,11873,14409,DDX11L1,.
3,chr1,-,14361,16765,WASH7P,.
4,chr1,-,16857,17751,WASH7P,.
5,chr1,-,15795,18061,WASH7P,.
6,chr1,-,14361,19759,WASH7P,.


In [6]:
ucsc_reference_bed = ucsc_reference[["chrom", "chromStart", "chromEnd", "name", "score", "strand"]]
ucsc_reference_bed.head()

Unnamed: 0,chrom,chromStart,chromEnd,name,score,strand
0,chr1,11873,14409,DDX11L1,.,+
3,chr1,14361,16765,WASH7P,.,-
4,chr1,16857,17751,WASH7P,.,-
5,chr1,15795,18061,WASH7P,.,-
6,chr1,14361,19759,WASH7P,.,-


In [7]:
ucsc_reference_bed.to_csv("../garnet_data/reference/ucsc_reference.hg19.bed", sep='\t', header=False, index=False)