# Load data

Methods section "CHIPseq analysis" https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1000778


Significance was determined by calculating a binomial P value for each 250 bp window and controlled for multiple testing by calculating an empirical false discovery rate. The “Best Window” in each enriched region with an empirical false discovery rate of <0.01 were called as “bound regions” and had a median size of 250 bp.

The ChIP-seq datasets and peak files are available for download from NCBI's Gene Expression Omnibus (GEO, http://www.ncbi.nlm.nih.gov/geo), accession number GSE17954, section "Samples" GSM449525	Jurkat_ETS1_ChIPseq, supplementary file GSM449525_ETS1_EmpFDR20.0_19420.txt.gz.

Useq package documentation: https://useq.sourceforge.net/outputFileTypeDescriptions.html

In [1]:
ETS1_chipseq <- read.csv("GSM449525_ETS1_EmpFDR20.0_19420.txt"
                        ,sep = "\t"
                        ,header = TRUE)
print(str(ETS1_chipseq))

'data.frame':	19420 obs. of  17 variables:
 $ X.Hyperlinks                                                                  : chr  "=HYPERLINK(http://localhost:7085/UnibrowControl?version=H_sapiens_Mar_2006&seqid=chr5&start=145534552&end=145550385,1)" "=HYPERLINK(http://localhost:7085/UnibrowControl?version=H_sapiens_Mar_2006&seqid=chr6&start=42631650&end=42647839,2)" "=HYPERLINK(http://localhost:7085/UnibrowControl?version=H_sapiens_Mar_2006&seqid=chr8&start=75038846&end=75055317,3)" "=HYPERLINK(http://localhost:7085/UnibrowControl?version=H_sapiens_Mar_2006&seqid=chr2&start=170130626&end=170146627,4)" ...
 $ Chr                                                                           : chr  "chr5" "chr6" "chr8" "chr2" ...
 $ Start                                                                         : int  145542052 42639150 75046346 170138126 216984754 2377683 6642060 6830279 47531729 52407215 ...
 $ Stop                                                                          : 

# Process data

In [5]:
start_hg38_region <- 88975070

In [6]:
stop_hg38_region <- 89017059

In [7]:
start_hg19_region <- 90734827

In [8]:
stop_hg19_region <- 90776816

In [2]:
sum(ETS1_chipseq$Chr == "chr10")

In [11]:
idx_fas_region <- ETS1_chipseq$Chr == "chr10" & ETS1_chipseq$Start > start_hg19_region & ETS1_chipseq$Stop < stop_hg19_region

In [12]:
ETS1_chipseq[idx_fas_region,]

Unnamed: 0_level_0,X.Hyperlinks,Chr,Start,Stop,X.Windows,BW_Start,BW_Stop,BW_BinPVal,BW_QValueFDR,BW_EmpFDR,BW_SkewPVal,BW_Log2..sumT.1...sumC.1..,BW_SumT.,BW_SumT..1,BW_SumC.,BW_SumC..1,GenomeVersion.H_sapiens_Mar_2006..TotalTreatObs.6683411..TotalCtrlObs.13825035
Unnamed: 0_level_1,<chr>,<chr>,<int>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>
16197,"=HYPERLINK(http://localhost:7085/UnibrowControl?version=H_sapiens_Mar_2006&seqid=chr10&start=90732725&end=90747993,16197)",chr10,90740225,90740493,1,90740238,90740480,39.59036,38.8689,23.09299,0,2.58303,7,7,1,3,
17002,"=HYPERLINK(http://localhost:7085/UnibrowControl?version=H_sapiens_Mar_2006&seqid=chr10&start=90733652&end=90748930,17002)",chr10,90741152,90741430,2,90741165,90741412,35.77071,35.21036,23.01187,0,2.479814,9,4,2,2,


In [18]:
bed <- ETS1_chipseq

In [19]:
bed$chrom = 10

In [20]:
bed$chromStart <- ETS1_chipseq$Start

In [21]:
bed$chromEnd <- ETS1_chipseq$Stop

In [23]:
bed <- bed[idx_fas_region
           ,c("chrom"
             ,"chromStart"
             ,"chromEnd"
             )]

In [24]:
print(str(bed))

'data.frame':	2 obs. of  3 variables:
 $ chrom     : num  10 10
 $ chromStart: int  90740225 90741152
 $ chromEnd  : int  90740493 90741430
NULL


# Export

In [25]:
write.table(bed
            ,"Hollenhorst_ETS1_FAS_hg18.bed"
         ,sep = "\t"
           ,row.names = FALSE
           ,col.names = FALSE
           ,quote = FALSE)

The coordinates were then converted to hg38 via http://genome.ucsc.edu/cgi-bin/hgLiftOver and saved as file Hollenhorst_ETS1_FAS_hg38.bed.

For UCSC browser, the following three lines were added manually:

browser position chr10:88990798-89017059

track name=Hollenhorst_2009 description=" " visibility=2 color=205,127,50

#chrom chromStart chromEnd