-
Notifications
You must be signed in to change notification settings - Fork 1
/
.Rhistory
512 lines (512 loc) · 19 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
#'
#' @details The genomic heterozygosity (also known as the autosomal
#' heterozygosity) has been demonstrated to be the more accurate and robust
#' measure of heterozygosity (Schmidt et al. 2021). SNP-wise heterozygosity can
#' suffer from sampling biases (filtering, missing data, sample size, etc).
#' Note, estimates of genomic heterozygosity are orders of magnitude less than
#' SNP-wise heterozygosity (so do not be alarmed if you see very small values!).
#'
#' Heterozygsity for population pools is calculated using the method from
#' Ferretti et al. (2013). You can calculate genomic heterozygosity for population
#' pools (standardising by total covered sites), or SNP-wise heterozygosity too
#' (standardising by the number of polymorphic sites observed).
#'
#' You must specify both \code{type} and \code{method}. This will dicate the
#' required column needed for \code{snpData} and \code{extraData}.
#'
#' If \code{type=='genos'} and \code{method=='genomic'}:
#' \enumerate{
#' \item \code{snpData} requires columns specified in:
#' \code{chromCol}, \code{posCol}, \code{locusCol} and \code{sampCol}
#'
#' \item \code{extraData} requires columns specified in:
#' \code{chromCol}, \code{sampCol}, and \code{covCol}.
#' }
#'
#' If \code{type=='genos'} and \code{method=='snpwise'}
#' \enumerate{
#' \item \code{snpData} requires columns specified in:
#' \code{chromCol}, \code{posCol}, \code{locusCol} and \code{sampCol}
#'
#' \item \code{extraData} will NOT be used.
#' }
#'
#' If \code{type=='freqs'} and \code{method=='genomic'}:
#' \enumerate{
#' \item \code{snpData} requires columns specified in:
#' \code{chromCol}, \code{posCol}, \code{locusCol}, \code{popCol},
#' \code{roCol}, and \code{aoCol}.
#'
#' \item \code{extraData} requires columns specified in:
#' \code{chromCol}, \code{sampCol}, \code{covCol}, and \code{indsCol}.
#' }
#'
#' If \code{type=='freqs'} and \code{method=='snpwise'}:
#' \enumerate{
#' \item \code{snpData} requires columns specified in:
#' \code{chromCol}, \code{posCol}, \code{locusCol}, \code{popCol},
#' \code{roCol}, and \code{aoCol}.
#'
#' \item \code{extraData} requires columns specified in:
#' \code{chromCol}, \code{sampCol}, and \code{indsCol}.
#' }
#'
#' @return Returns a data table of heterozygosity estimates per sample or population.
#' Genomic heterozygosity is reported per chromosome, SNP-wise heterozygosity is
#' reported across all SNPs.
#'
#' @references
#' Ferretti et al. (2013) Molecular Ecology. DOI: 10.1111/mec.12522 \cr
#' Schmidt et al. (2021) Methods in Ecology and Evolution. DOI: 10.1111/2041-210X.13659
#'
#' @examples
#' library(genomalicious)
#'
#' data(data_Genos)
#' data(data_PoolFreqs)
#'
#' # Convert genos to characters
#' data_Genos[, GT:=genoscore_converter(GT)]
#'
#' # Make extra data for the samples and populatin pools
#' extraSampInfo <- CJ(
#' SAMPLE=unique(data_Genos$SAMPLE),
#' CHROM=unique(data_Genos$CHROM),
#' COV.SITES=150
#' )
#'
#' extraPoolInfo <- CJ(
#' POP=unique(data_PoolFreqs$POP),
#' CHROM=unique(data_PoolFreqs$CHROM),
#' COV.SITES=150,
#' INDS=30
#' )
#'
#' # Genomic heterozygosity of individuals, per chromosome/contig
#' het_calc(data_Genos, extraSampInfo, type='genos', method='genomic')
#'
#' # SNP-wise heterozygosity of indivdiuals, SNP-wise
#' het_calc(data_Genos, extraSampInfo, type='genos', method='snpwise')
#'
#' # Genomic heterozygosity of population pools, per chromosome/contig
#' het_calc(data_PoolFreqs, extraPoolInfo, type='freqs', method='genomic')
#'
#' # Genomic heterozygosity of population pools, SNP-wise
#' het_calc(data_PoolFreqs, extraPoolInfo, type='freqs', method='snpwise')
#' @export
het_calc <- function(
snpData, extraData, method, type, chromCol='CHROM', posCol='POS', locusCol='LOCUS',
sampCol='SAMPLE', genoCol='GT', popCol='POP', roCol='RO', aoCol='AO',
covCol='COV.SITES', indsCol='INDS'
){
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
#### CHECK AND ENVIRONMENT ####
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# Check type
if(!type %in% c('genos','freqs')){
stop('Argument `type` must be one of "genos" or "freqs". See ?het_calc.')
}
# Check method
if(!method %in% c('genomic','snpwise')){
stop('Argument `method` must be one of "genomic" or "snpwise". See ?het_calc.')
}
# Check the columns in snpData and extraData
if(type=='genos'){
# Genomic heterozygosity
if(method=='genomic'){
# SNP data
check.col.snp <- c(chromCol, posCol, locusCol, sampCol, genoCol)
if(length(check.col.snp)!=5){
stop('Argument `type`=="genos" and `method`=="genomic". All columns `chromCol`, `posCol`, `locusCol` and `sampCol` must be in `snpData`. See ?het_calc.')
}
snpData <- copy(snpData) %>%
setnames(., check.col.snp, c('CHROM','POS','LOCUS','SAMPLE','GT'))
# Extra data
check.col.extra <- c(chromCol, sampCol, covCol)
if(length(check.col.extra)!=3){
stop('Argument `type`=="genos" and `method`=="genomic". All columns `chromCol`, `sampCol`, and `covCol` must be in `extraData`. See ?het_calc.')
}
extraData <- copy(extraData) %>%
setnames(., check.col.extra, c('CHROM','SAMPLE','COV.SITES'))
}
# SNP-wise heterozygosity
if(method=='snpwise'){
# SNP data
check.col.snp <- c(chromCol, posCol, locusCol, sampCol, genoCol)
if(length(check.col.snp)!=5){
stop('Argument `type`=="genos" and `method`=="snpwise". All columns `chromCol`, `posCol`, `locusCol` and `sampCol` must be in `snpData`. See ?het_calc.')
}
snpData <- copy(snpData) %>%
setnames(., check.col.snp, c('CHROM','POS','LOCUS','SAMPLE','GT'))
}
}
if (type=='freqs'){
# Genomic heterozygosity
if(method=='genomic'){
# SNP data
check.col.snp <- c(chromCol, posCol, locusCol, popCol, roCol, aoCol)
if(length(check.col.snp)!=6){
stop('Argument `type`=="freqs" and `method`=="genomic". All columns `chromCol`, `posCol`, `locusCol`, `popCol`, `roCol` and `aoCol` must be in `snpData`. See ?het_calc.')
}
snpData <- copy(snpData) %>%
setnames(., check.col.snp, c('CHROM','POS','LOCUS','POP','RO','AO'))
# Extra data
check.col.extra <- c(chromCol, popCol, covCol, indsCol)
if(length(check.col.extra)!=4){
stop('Argument `type`=="freqs" and `method`=="genomic". All columns `chromCol`, `sampCol`, `covCol`, and `indsCol` must be in `extraData`. See ?het_calc.')
}
extraData <- copy(extraData) %>%
setnames(., check.col.extra, c('CHROM', 'POP', 'COV.SITES', 'INDS'))
}
# SNP-wise heterozygosity
if(method=='snpwise'){
# SNP data
check.col.snp <- c(chromCol, posCol, locusCol, popCol, roCol, aoCol)
if(length(check.col.snp)!=6){
stop('Argument `type`=="freqs" and `method`=="snpwise". All columns `chromCol`, `posCol`, `locusCol`, `popCol`, `roCol` and `aoCol` must be in `snpData`. See ?het_calc.')
}
snpData <- copy(snpData) %>%
setnames(., check.col.snp, c('CHROM', 'POS', 'LOCUS', 'POP', 'RO', 'AO'))
# Extra data
check.col.extra <- c(chromCol, popCol, indsCol)
if(length(check.col.extra)!=3){
stop('Argument `type`=="freqs" and `method`=="snpwise". All columns `chromCol`, `sampCol`, and `indsCol` must be in `extraData`. See ?het_calc.')
}
extraData <- copy(extraData) %>%
setnames(., check.col.extra, c('CHROM', 'POP', 'INDS'))
}
}
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
#### GENOMIC HETEROZYGOSITY ####
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# For genomic heterozygosity, iterate over combinations of
# chromosome and populations in the extra data table.
if(method=='genomic'){
# Using individual genotypes
if(type=='genos'){
snpData[, ALLELE.1:=sub('/.*','',GT), by=c('SAMPLE','LOCUS')]
snpData[, ALLELE.2:=sub('.*/','',GT), by=c('SAMPLE','LOCUS')]
snpData[, HET:=(ALLELE.1!=ALLELE.2), by=c('SAMPLE','LOCUS')]
result <- snpData[, .(HET=sum(HET)), by=c('CHROM','SAMPLE')] %>%
merge.data.table(., extraData) %>%
.[, HET:=HET/COV.SITES] %>%
.[, c('CHROM','SAMPLE','HET')]
}
# Using population allele frequencies
if(type=='freqs'){
snpData <- snpData %>%
.[, RO:=as.integer(RO)] %>%
.[, AO:=as.integer(AO)] %>%
.[, DP:=AO+RO]
result <- lapply(1:nrow(extraData), function(i){
chrom <- extraData$CHROM[i]
pop <- extraData$POP[i]
cov.sites <- extraData$COV.SITES[i]
inds <- extraData$INDS[i]
allele.frac <- snpData[POP==pop] %>%
.[, NUMER:=2*(AO*(DP-AO)), by=LOCUS] %>%
.[, DENOM:=DP*(DP-1)] %>%
.[, sum(NUMER/DENOM)]
het <- (inds/(inds-1)) * (1/cov.sites) * allele.frac
data.table(CHROM=chrom, POP=pop, HET=het)
}) %>%
do.call('rbind',.)
}
}
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
#### SNP-WISE HETEROZYGOSITY ####
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
if(method=='snpwise'){
# Using individual allele frequencies
if(type=='genos'){
snpData[, ALLELE.1:=sub('/.*','',GT), by=c('SAMPLE','LOCUS')]
snpData[, ALLELE.2:=sub('.*/','',GT), by=c('SAMPLE','LOCUS')]
snpData[, HET:=(ALLELE.1!=ALLELE.2), by=c('SAMPLE','LOCUS')]
result <- snpData[, .(HET=sum(HET)/length(HET)), by=c('SAMPLE')]
}
# Using population allele frequencies
if(type=='freqs'){
snpData <- snpData %>%
.[, RO:=as.integer(RO)] %>%
.[, AO:=as.integer(AO)] %>%
.[, DP:=AO+RO]
result <- lapply(1:nrow(extraData), function(i){
chrom <- extraData$CHROM[i]
pop <- extraData$POP[i]
inds <- extraData$INDS[i]
num.sites <- snpData[POP==pop] %>% nrow
allele.frac <- snpData[POP==pop] %>%
.[, NUMER:=2*(AO*(DP-AO)), by=LOCUS] %>%
.[, DENOM:=DP*(DP-1)] %>%
.[, sum(NUMER/DENOM)]
het <- (inds/(inds-1)) * (1/num.sites) * allele.frac
data.table(CHROM=chrom, POP=pop, HET=het)
}) %>%
do.call('rbind',.)
}
}
# Output
return(result)
}
# Developer libraries
libs <- c('devtools', 'roxygen2', 'testthat', 'knitr', 'data.table', 'tidyverse')
for(L in libs){require(L, character.only=TRUE)}
# Make just those documents that have changed
roxygenise()
library(genomalicious)
# Make just those documents that have changed
roxygenise()
library(genomalicious)
# Developer libraries
libs <- c('devtools', 'roxygen2', 'testthat', 'knitr', 'data.table', 'tidyverse')
for(L in libs){require(L, character.only=TRUE)}
# Make just those documents that have changed
roxygenise()
library(genomalicious)
# Make just those documents that have changed
roxygenise()
library(genomalicious)
library(genomalicious)
library(genomalicious)
data(data_Genos)
data(data_PoolFreqs)
data(data_PoolInfo)
data_Genos$GT %>% head
data_Genos[, GT:=genoscore_converter(GT)]
data_Genos$GT %>% head
# Set allele counts and individuals in pool-seq data
data_PoolFreqs %>% head
data_PoolInfo %>% head
data_PoolFreqs[, COUNTS:=paste(RO,AO,sep=',')]
data_PoolFreqs$INDS <- data_PoolInfo$INDS[
match(data_PoolFreqs$POOL, data_PoolInfo$POOL)
]
head(data_PoolFreqs)
# Genotypes and global F-statistics
fstat_calc(
dat=data_Genos,
type='genos', method='global', fstatVec=c('FST','FIS','FIT'),
popCol='POP', sampCol='SAMPLE',
locusCol='LOCUS', genoCol='GT',
permute=FALSE
)
fstat_calc(
dat=data_Genos,
type='genos', method='pairwise', fstatVec=c('FST','FIS','FIT'),
popCol='POP', sampCol='SAMPLE',
locusCol='LOCUS', genoCol='GT',
permute=FALSE
)
fstat_calc(
dat=data_PoolFreqs,
type='freqs', method='global', fstatVec=NULL,
popCol='POP', locusCol='LOCUS',
countCol='COUNTS', indsCol='INDS',
permute=FALSE
)
fstat_calc(
dat=data_PoolFreqs,
type='freqs', method='pairwise', fstatVec=NULL,
popCol='POP', locusCol='LOCUS',
countCol='COUNTS', indsCol='INDS',
permute=FALSE
)
# Developer libraries
libs <- c('devtools', 'roxygen2', 'testthat', 'knitr', 'data.table', 'tidyverse')
for(L in libs){require(L, character.only=TRUE)}
# Make all documents
roxygenise(clean=TRUE)
library(genomalicious)
library(genomalicious)
# Developer libraries
libs <- c('devtools', 'roxygen2', 'testthat', 'knitr', 'data.table', 'tidyverse')
for(L in libs){require(L, character.only=TRUE)}
# Make just those documents that have changed
roxygenise()
library(genomalicious)
# Developer libraries
libs <- c('devtools', 'roxygen2', 'testthat', 'knitr', 'data.table', 'tidyverse')
for(L in libs){require(L, character.only=TRUE)}
# Load currently installed genomalicious
library(genomalicious)
# Make just those documents that have changed
roxygenise()
library(genomalicious)
genomaliciousExtData <- paste0(find.package('genomalicious'), '/extdata')
list.files(path=genomaliciousExtData, pattern='indseq.vcf')
vcfPath <- paste0(genomaliciousExtData, '/data_indseq.vcf')
# You can read the file in as lines to see what it
# looks like:
readLines(vcfPath) %>% head
readLines(vcfPath) %>% tail
# Now read it in as a data table
readVcf1 <- vcf2DT(vcfFile=vcfPath)
readVcf1 %>% print()
#' VCF file to data table
#'
#' Reads a VCF file and converts to a long format data table. Note, that whilst
#' the \code{data.table} object class is very memory efficient, very large genomic
#' datasets might take longer to read in, and/or be difficult to hold in
#' memory. Take your operating system and the size of your input dataset into
#' consideration when using this function.
#'
#' @param vcfFile Character: The path to the input VCF file.
#'
#' @param dropCols Character: Vector of column names from the VCF that you
#' want to drop from the output data table. Use this for any column that occurs
#' before the 'FORMAT' column in the original VCF file. Default = \code{NULL}.
#'
#' @param keepComments Logical: Should the VCF comments be kept?
#' Default = \code{FALSE}. See Details for parameterisation.
#'
#' @param keepInfo Logical: Should the VCF info for each locus be kept?
#' Default = \code{FALSE}.
#'
#' @details Firstly, it should be noted that while data tables are a really
#' excellent way of handling genotype and sequence read information in R,
#' they are not necessarily the most efficient way to do so for very large
#' genomic datasets. Take your operating system and/or dataset in mind before
#' using this function. Most RADseq datasets should be manageable, but
#' whole-genome data can be challenging if you do not have a lot of available
#' memory. You can always try loading in subsets (e.g., by chromosome or contigs)
#' of your dataset to see how feasible it is to load with this function.
#'
#' @return A \code{data.table} object is returned with all the columns contained in
#' the original VCF file with some additions:
#' \itemize{
#' \item A column called \code{$LOCUS} is generated. This is the concatenation of the
#' \code{$CHROM} and \code{$POS} column to form a locus ID. "CHROM:POS".
#' \item A column called \code{$SAMPLE} is generated. This contains the sample IDs that
#' are the columns that follow the \code{$FORMAT} column in the original VCF.
#' \item The items in the original \code{$FORMAT} column of the VCF are given their own columns.
#' } \cr
#' Note, for VCF files produced by Stacks, the $CHROM is given the same value
#' as the $ID column. \cr\cr
#' When \code{keepInfo==TRUE} and/or \code{keepComments==TRUE}, these are returned
#' as attributes. E.g., if the returned object is \code{vcfDT}, then you can
#' access Info and Comments (respectively) with: \code{attr(vcfDT, 'vcf_info')}
#' and \code{attr(vcfDT, 'vcf_comments')}.
#'
#' @examples
#' # Create a link to raw external datasets in genomalicious
#' genomaliciousExtData <- paste0(find.package('genomalicious'), '/extdata')
#'
#' # This command here shows you the VCF file that comes with genomalicious
#' list.files(path=genomaliciousExtData, pattern='indseq.vcf')
#'
#' # Use this to create a path to that file
#' vcfPath <- paste0(genomaliciousExtData, '/data_indseq.vcf')
#'
#' # You can read the file in as lines to see what it
#' # looks like:
#' readLines(vcfPath) %>% head
#' readLines(vcfPath) %>% tail
#'
#' # Now read it in as a data table
#' readVcf1 <- vcf2DT(vcfFile=vcfPath)
#' readVcf1 %>% print()
#'
#' # Read in VCF, but drop some columns,
#' # and keep comments and info.
#' readVcf2 <- vcf2DT(vcfPath
#' , dropCols=c('QUAL')
#' , keepComments=TRUE
#' , keepInfo=TRUE)
#'
#' readVcf2 %>% print
#'
#' attr(readVcf2, 'vcf_comments')
#' attr(readVcf2, 'vcf_info')
#'
#' @export
vcf2DT <- function(vcfFile, dropCols=NULL, keepComments=FALSE, keepInfo=FALSE){
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# #### Libraries and assertions ####
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
require(data.table); require(tidyverse)
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# #### Code: VCF to data table ####
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# Read file from header
cat('(1/4) Reading in VCF as a data table', sep='\n')
vcfDT <- fread(vcfFile, skip='#CHROM', sep='\t', header=TRUE)
# Adjust header
colnames(vcfDT) <- gsub(pattern='#', replace='', x=colnames(vcfDT))
# Which columns are the sample? The ones after the FORMAT column.
sampCols <- colnames(vcfDT)[(which(colnames(vcfDT)=='FORMAT')+1):ncol(vcfDT)]
# Generate a $LOCUS column, place at the start of the data table
cat('(2/4) Generating locus IDs', sep='\n')
vcfDT[, LOCUS:=paste0(CHROM, ':', POS)]
# Get the locus info as a vector and drop from data table
if(keepInfo==TRUE){
vcfInfo <- vcfDT$INFO
names(vcfInfo) <- vcfDT$LOCUS
}
vcfDT <- vcfDT[, !'INFO']
# Drop unwanted columns here to save memory
if(is.null(dropCols)==FALSE){
vcfDT <- vcfDT[, !dropCols, with=FALSE]
}
# Now convert the data from wide to long
cat('(3/4) Converting from wide to long format', sep='\n')
vcfDT <- data.table::melt(
data=vcfDT,
id.vars=colnames(vcfDT)[!colnames(vcfDT)%in%sampCols],
measure.vars=sampCols,
variable.name='SAMPLE',
value.name='DATA') %>%
as.data.table()
# Make sure SAMPLE is a character
vcfDT[, SAMPLE:=as.character(SAMPLE)]
# Separate out the FORMAT data components into their own columns
cat('(4/4) Parsing data for each sample', sep='\n')
# ... Get the format names
formatNames <- unlist(strsplit(vcfDT$FORMAT[1], split=':'))
# ... Drop the format column to save space now
vcfDT <- vcfDT[, !'FORMAT']
# ... If the $DATA column is '.', add in NA
vcfDT[DATA=='.', DATA:=NA]
# ... Separate $DATA by $FORMAT names
vcfDT <- vcfDT[, tstrsplit(DATA, ':', names=formatNames)] %>%
cbind(vcfDT[, !'DATA'], .) %>%
as.data.table()
# .... Replace '.' values in the data columns with NA
for(f in formatNames){
vcfDT[[f]][vcfDT[[f]]=='.'] <- NA
}
# Make sure DP and RO integers
if('DP' %in% colnames(vcfDT)){ vcfDT[, DP:=as.integer(DP)] }
if('RO' %in% colnames(vcfDT)){ vcfDT[, RO:=as.integer(RO)] }
# Attach header as an attribute, if specified.
if(keepComments==TRUE){
attr(vcfDT, 'vcf_comments') <- readLines(vcfFile, n=headPos-1)
}
# Attach info as an attribute if, if specified.
if(keepInfo==TRUE){
attr(vcfDT, 'vcf_info') <- vcfInfo
}
# Finish
cat('All done! <3', '\n')
# Return the data.table, drop any columns if specified.
return(vcfDT)
}
genomaliciousExtData <- paste0(find.package('genomalicious'), '/extdata')
list.files(path=genomaliciousExtData, pattern='indseq.vcf')
vcfPath <- paste0(genomaliciousExtData, '/data_indseq.vcf')
readLines(vcfPath) %>% head
readLines(vcfPath) %>% tail
readVcf1 <- vcf2DT(vcfFile=vcfPath)
readVcf1 %>% print()
# Developer libraries
libs <- c('devtools', 'roxygen2', 'testthat', 'knitr', 'data.table', 'tidyverse')
for(L in libs){require(L, character.only=TRUE)}
# Make just those documents that have changed
roxygenise()
library(genomalicious)
# Make just those documents that have changed
roxygenise()
# Developer libraries
libs <- c('devtools', 'roxygen2', 'testthat', 'knitr', 'data.table', 'tidyverse')
for(L in libs){require(L, character.only=TRUE)}
# Make just those documents that have changed
roxygenise()