From 607910847cb57b4739a5b90ca48b4b043ac4c4cd Mon Sep 17 00:00:00 2001 From: Bastiaan de Graaf Date: Mon, 27 Apr 2020 13:07:44 +0200 Subject: [PATCH 1/3] added a notebook cleaning and parsing the covid pan genome as obtained from genbank --- src/genbankParser/serratus_genbankParser.Rmd | 390 +++++++++++++++++++ 1 file changed, 390 insertions(+) create mode 100644 src/genbankParser/serratus_genbankParser.Rmd diff --git a/src/genbankParser/serratus_genbankParser.Rmd b/src/genbankParser/serratus_genbankParser.Rmd new file mode 100644 index 0000000..4f8ac16 --- /dev/null +++ b/src/genbankParser/serratus_genbankParser.Rmd @@ -0,0 +1,390 @@ +--- +title: "Cleanup of the CoVid pan-genome" +output: html_notebook +--- +### Setup + +we need the devel version of r and bioconductor because: https://github.com/gmbecker/genbankr/issues/3 + +```{r} +if (!requireNamespace("BiocManager", quietly=TRUE)) + install.packages("BiocManager") +BiocManager::install(version="devel") +BiocManager::install("genbankr", version = "devel") +install.packages("taxize") +install.packages("data.table") +install.packages("rlist") + +``` + +```{r} +library(rlist) +library(devtools) +library(genbankr) +library(GenomicRanges) +library(Biostrings) +library(tidyverse) +library(data.table) +library(taxize) +``` + +```{r} +fileName <- "C:/Users/Gebruiker/Downloads/cov0.gb" +fileNameDuplicates <- "C:/Users/Gebruiker/Downloads/cov0.duplicates" +``` + +### Functions + +```{r, include = FALSE} +CleanHostName <- function(x){ + NoSemiColon <- strsplit(x, ";")[[1]][1] + NoComma <- strsplit(NoSemiColon, ",")[[1]][1] + NoDot <- strsplit(NoComma, "\\.")[[1]][1] + NoParenth <- strsplit(NoDot, "\\(")[[1]][1] + + strsplit(NoParenth, "\\(")[[1]][1] + if (grepl(fixed = FALSE, "chick", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "breeder", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "chikc", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "poultry", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "layer", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "laying", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "gallus", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "broiler", NoParenth, ignore.case = T)) { + return("gallus gallus domesticus") + } + else if (grepl(fixed = FALSE, "calf", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "cow", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "bos taurus", NoParenth, ignore.case = T)) { + return("cow") + } + else if (NoParenth == "antelope") { + return("antilope") + } + else if (grepl(fixed = FALSE, "canine", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "canis lupus", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "canis lupus", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "dog", NoParenth, ignore.case = T)) { + return("dog") + } + else if (grepl(fixed = FALSE, "large pig roundworm", NoParenth, ignore.case = T)) { + return("Ascaris suum") + } + else if (grepl(fixed = FALSE, "pigeon", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "Columba livia", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "wigeon", NoParenth, ignore.case = T)) { + return("Columbidae") + } + else if (grepl(fixed = FALSE, "buffalo", NoParenth, ignore.case = T)) { + return("Bubalus bubalis") + } + else if (grepl(fixed = FALSE, "pig", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "Sus scrofa domesticus L", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "porcine", NoParenth, ignore.case = T) | NoParenth == "sow") { + return("Sus scrofa domesticus") + } + else if (grepl(fixed = FALSE, "boar", NoParenth, ignore.case = T)) { + return("Sus scrofa") + } + else if (grepl(fixed = FALSE, "Chaerephon plicata", NoParenth, ignore.case = T)) { + return("Chaerephon plicatus") + } + else if ((grepl(fixed = FALSE, "bat", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "Ozimops", NoParenth, ignore.case = T) | NoParenth == "Vespadelus baverstocki") & NoParenth != "Rhinolophus lobatus" ) { + return("bats") + } + else if (grepl(fixed = FALSE, "apodemus spp", NoParenth, ignore.case = T)) { + return("apodemus") + } + else if (grepl(fixed = FALSE, "camel", NoParenth, ignore.case = T)) { + return("Camelus dromedarius") + } + else if (grepl(fixed = FALSE, "bottlenose dolphin", NoParenth, ignore.case = T)) { + return("Tursiops") + } + else if (grepl(fixed = FALSE, "crocidura sp", NoParenth, ignore.case = T)) { + return("crocidura") + } + else if (grepl(fixed = FALSE, "Cynopterus brachyotis large intestine", NoParenth, ignore.case = T)) { + return("Cynopterus brachyotis") + } + else if (grepl(fixed = FALSE, "domestic donkey", NoParenth, ignore.case = T)) { + return("donkey") + } + else if (grepl(fixed = FALSE, "Egretta picata", NoParenth, ignore.case = T)) { + return("Egretta") + } + else if (grepl(fixed = FALSE, "Epomophorus sp", NoParenth, ignore.case = T)) { + return("Epomophorus") + } + else if (grepl(fixed = FALSE, "Eptesicus sp", NoParenth, ignore.case = T)) { + return("Eptesicus") + } + else if (grepl(fixed = FALSE, "equus caballus", NoParenth, ignore.case = T)) { + return("equus caballus") + } + else if (grepl(fixed = FALSE, "equus caballus", NoParenth, ignore.case = T)) { + return("equus caballus") + } + else if (grepl(fixed = FALSE, "falco sp", NoParenth, ignore.case = T)) { + return("falco") + } + else if (grepl(fixed = FALSE, "feline", NoParenth, ignore.case = T)) { + return("cat") + } + else if (grepl(fixed = FALSE, "fox", NoParenth, ignore.case = T)) { + return("canidae") + } + else if (grepl(fixed = FALSE, "Glossophaginae sp", NoParenth, ignore.case = T)) { + return("Glossophaginae") + } + else if (grepl(fixed = FALSE, "Guinea fowl", NoParenth, ignore.case = T)) { + return("Numididae ") + } + else if (grepl(fixed = FALSE, "hering gull", NoParenth, ignore.case = T)) { + return("gull") + } + else if (grepl(fixed = FALSE, "Hipposideros cf", NoParenth, ignore.case = T)) { + return("Hipposideros caffer") + } + else if (grepl(fixed = FALSE, "Hipposideros sp", NoParenth, ignore.case = T)) { + return("Hipposideros") + } + else if (grepl(fixed = FALSE, "hering gull", NoParenth, ignore.case = T)) { + return("gull") + } + else if (grepl(fixed = FALSE, "Macroglossus sp", NoParenth, ignore.case = T)) { + return("Macroglossus") + } + else if (grepl(fixed = FALSE, "Miniopterus cf", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "Miniopterus sp", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "Miniopterus fuliginosus, feces", NoParenth, ignore.case = T)) { + return("Miniopterus") + } + else if (grepl(fixed = FALSE, "mops cf", NoParenth, ignore.case = T)) { + return("Mops cf. nanulus DMR-2017") + } + else if (grepl(fixed = FALSE, "Mormopterus sp", NoParenth, ignore.case = T)) { + return("Mormopterus sp. BBvV-2008") + } + else if (grepl(fixed = FALSE, "Neoromicia cf", NoParenth, ignore.case = T)) { + return("Neoromicia") + } + else if (grepl(fixed = FALSE, "night-heron", NoParenth, ignore.case = T)) { + return("night herons") + } + else if (grepl(fixed = FALSE, "homo sapiensc", NoParenth, ignore.case = T)) { + return("homo sapiens") + } + else if (grepl(fixed = FALSE, "Nycteris sp", NoParenth, ignore.case = T)) { + return("Nycteris") + } + else if (grepl(fixed = FALSE, "palm civet", NoParenth, ignore.case = T) | NoParenth == "civet") { + return("Paguma larvata") + } + else if (grepl(fixed = FALSE, "peafowl", NoParenth, ignore.case = T)) { + return("pavo") + } + else if (grepl(fixed = FALSE, "pheasant", NoParenth, ignore.case = T)) { + return("Phasianinae") + } + else if (grepl(fixed = FALSE, "Pipistrellus cf", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "Pipistrellus inexspectatus", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "Pipistrellus sp", NoParenth, ignore.case = T)) { + return("Pipistrellus") + } + else if (grepl(fixed = FALSE, "Quail", NoParenth, ignore.case = T)) { + return("coturnix") + } + else if (grepl(fixed = FALSE, "Red-necked Avocet", NoParenth, ignore.case = T)) { + return("Recurvirostra novaehollandiae") + } + else if (grepl(fixed = FALSE, "Rhinolophus sp", NoParenth, ignore.case = T)) { + return("unclassified Rhinolophus") + } + else if (grepl(fixed = FALSE, "Scotoecus sp", NoParenth, ignore.case = T)) { + return("unclassified Scotoecus") + } + else if (grepl(fixed = FALSE, "Scotomannes kuhlii", NoParenth, ignore.case = T) | grepl(fixed = FALSE, "Scotphilus kuhli large intestine", NoParenth, ignore.case = T)) { + return("Scotophilus kuhlii") + } + else if (grepl(fixed = FALSE, "shorebird", NoParenth, ignore.case = T)) { + return("unclassified Scotoecus") + } + else if (grepl(fixed = FALSE, "Sigmodon sp", NoParenth, ignore.case = T)) { + return("Sigmodon") + } + else if (grepl(fixed = FALSE, "sparrow", NoParenth, ignore.case = T)) { + return("Melospiza") + } + else if (grepl(fixed = FALSE, "swan", NoParenth, ignore.case = T)) { + return("Cygnus") + } + else if (grepl(fixed = FALSE, "Scotoecus sp", NoParenth, ignore.case = T)) { + return("unclassified Scotoecus") + } + else if (grepl(fixed = FALSE, "Tadarida sp", NoParenth, ignore.case = T)) { + return("unclassified Tadarida") + } + else if (grepl(fixed = FALSE, "teal", NoParenth, ignore.case = T)) { + return("anas") + } + else if (grepl(fixed = FALSE, "wild bird", NoParenth, ignore.case = T)) { + return("Aves") + } + else if (grepl(fixed = FALSE, "rat", NoParenth, ignore.case = T)) { + return("rattus") + } + else if (grepl(fixed = FALSE, "mouse", NoParenth, ignore.case = T)) { + return("mus musculus") + } + else if (grepl(fixed = FALSE, "Chaerephon sp", NoParenth, ignore.case = T)) { + return("unclassified Chaerephon") + } + else if (grepl(fixed = FALSE, "chinese bulbul", NoParenth, ignore.case = T)) { + return("Pycnonotus sinensis") + } + else if (grepl(fixed = FALSE, "Vespadelus baverstocki", NoParenth, ignore.case = T)) { + return("unclassified Scotoecus") + } + else if (grepl(fixed = FALSE, "Chiroptera sp", NoParenth, ignore.case = T)) { + return("unclassified Chiroptera") + } + else if (grepl(fixed = FALSE, "magpie-robin", NoParenth, ignore.case = T)) { + return("Copsychus saularis") + } + else if (grepl(fixed = FALSE, "Liomys sp", NoParenth, ignore.case = T)) { + return("Liomys") + } + else if (grepl(fixed = FALSE, "Amazona virdigenalis", NoParenth, ignore.case = T)) { + return("Amazona viridigenalis") + } + else if (grepl(fixed = FALSE, "Neoromicia sp", NoParenth, ignore.case = T)) { + return("unclassified Neoromicia") + } + else if (grepl(fixed = FALSE, "Columbia livia", NoParenth, ignore.case = T)) { + return("Amazona viridigenalis") + } + else if (grepl(fixed = FALSE, "mink", NoParenth, ignore.case = T)) { + return("Mustela vison") + } + else if (grepl(fixed = FALSE, "snow goose", NoParenth, ignore.case = T)) { + return("Anser caerulescens") + } + else { + return(NoParenth) + } +} + +GetSampleTypeFromDefinition <- function(x){ + noSemiColon <- strsplit(x, ";")[[1]][1] + noDot <- gsub("\\.", "", noSemiColon) + if (noDot %in% c("partial cds", "complete cds", "partial genome", "complete genome", "partial sequence", "complete sequence")) { + return(noDot) + } + else + { + return(NA) + } +} + +blackList<- list("KC786228", "AX191447", "AX191449", "FB764528", "HV449436", "CS382036") + +InBlackList <- function(x, blacklist){ + return(toupper(x) %in% blacklist) +} +``` + + +### Parsing + +Split into strings for individual records + +```{r} +txt <- readChar(fileName, file.info(fileName)$size) + +txt.split <- txt %>% + str_split("\n//\n\n") %>% + unlist + +txt.split <- txt.split[txt.split != ""] +``` + +Get a list of GenBankRecord objects for each record. Long runtime! + +```{r} +recs2 <- txt.split %>% + map(possibly(~genbankr::readGenBank(NULL, text = .), otherwise = NA)) +recs2[[1]] + +``` + +### Selecting + +```{r} +res <- lapply(recs2, FUN = function(entry){ + #only cotinue if an entry was retrieved + if (typeof(entry) != typeof(NA)) + { + + sources <- mcols(sources(entry)) + taxon <- gsub( "taxon:", "", unlist(sources$db_xref)[[1]]) + orfs<-mcols(genes(entry))[["gene_id"]] + + colNames <-c("accession", + "virus", + "virusTaxonId", + "seq_type", + "orf", + "blacklisted", + "n_deleted", + "host") + accession <- strsplit(vers(entry), " ")[[1]][1] + fields <- list(accession, + ifelse(!is.null(sources$strain), sources$strain, NA), + taxon, + GetSampleTypeFromDefinition(strsplit(definition(entry), ", ")[[1]][2]), + paste(orfs, collapse = ","), + InBlackList(accession, blackList), + alphabetFrequency(getSeq(entry))[[15]], + ifelse(!is.null(sources$host), CleanHostName(sources$host), NA)) + names(fields) <- colNames + fields + } +}) +``` + +### Cleaning + +Remove instances where no gb entry was returned +warning: its possible that user input is needed for fetching the taxIds. +```{r} +CleanedEntries<- list.clean(res, function(x) length(x) == 0L, recursive = TRUE) +dt <- rbindlist(CleanedEntries) +uniqueHosts <- unique(dt[!is.na(host),host]) + +taxIdMap <-unlist(lapply(uniqueHosts, function(x){taxId = get_uid(x) +names(taxId) <- c(x) +taxId})) +for (idx in 1:nrow(dt)) { + hostName = dt[idx,]$host + if (!is.na(hostName)) { + dt[idx, hostTaxonId := taxIdMap[hostName]] + } +} +``` + +remove earlier identified duplicates: https://github.com/ababaian/serratus/blob/master/notebook/200420_cov2_pangenome.ipynb +```{r} +duplicateTable <- fread(fileNameDuplicates) +names(duplicateTable) <- c("n_duplicates", "accessions") + +duplicateTable[,groupRepresentative:=strsplit(accessions,", ")[[1]][1]] +duplicates <- unlist(lapply(duplicateTable$accessions, function(x){ + accessions <- strsplit(x[1], ", ")[[1]] + idxWithHostId <- 1 + for (idx in 1:length(accessions)) { + if (!is.null(dt[accession== accessions[idx],])) { + print("not found") + print(accessions[idx]) + next + } + if (!is.na(dt[accession == accessions[idx],hostTaxonId])) { + print("with host") + print(accessions[idx]) + idxWithHostId <- idx + break + } + } + accessions[-idxWithHostId] +})) + +noDup <- dt[accession %in% duplicates,] +``` + + +### Writing + +```{r} +write.table(row.names = FALSE, noDup, "test3.csv", sep = ",") +getwd() +``` \ No newline at end of file From 737c23d846d226acc93fe8507dddaf53791eb41f Mon Sep 17 00:00:00 2001 From: Bdegraaf1234 <35769574+Bdegraaf1234@users.noreply.github.com> Date: Mon, 27 Apr 2020 13:22:22 +0200 Subject: [PATCH 2/3] Create README --- src/genbankParser/README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 src/genbankParser/README.md diff --git a/src/genbankParser/README.md b/src/genbankParser/README.md new file mode 100644 index 0000000..9db730e --- /dev/null +++ b/src/genbankParser/README.md @@ -0,0 +1,15 @@ +# serratus_genbankParser.Rmd + +## Usage + +not formatted as script yet + +### Dependecies + Modules: R 4.0, BiocManager(devel), genbankr(devel), taxize, data.table, rlist, devtools, GenomicRanges, Biostrings, tidyverse + Files: cov0.gb, cov0.duplicates and cov0.id99.uc. + +## Description + +The genbankParser is designed to be run as a standalone script to generate a formatted and cleaned csv table of the covid pan-genome from genbank input. It deals with most hostTaxonId mapping errors, and attempts to infer these hostTaxonIds for duplicate and highly homologous entries by checking if clusters/duplicate all provide the same hostTaxonId and if so inferring it for those where none was prvided (in a new column). + +Three meta-data files are required: the covid pan-genome from genbank [file](https://serratus-public.s3.amazonaws.com/seq/cov0/cov0.gb), the list of duplicates (generated here [file](https://github.com/ababaian/serratus/blob/master/notebook/200420_cov2_pangenome.ipynb) and a table containing homology information [file](https://serratus-public.s3.amazonaws.com/seq/cov2r/cov0.id99.uc) Filepaths have to be edited into the code as it stands From 26d0909e36b97caf7e7ab5c79d4e23f0575de87c Mon Sep 17 00:00:00 2001 From: Bdegraaf1234 <35769574+Bdegraaf1234@users.noreply.github.com> Date: Mon, 27 Apr 2020 13:40:25 +0200 Subject: [PATCH 3/3] commit unsaved changes --- src/genbankParser/serratus_genbankParser.Rmd | 106 ++++++++++++++----- 1 file changed, 77 insertions(+), 29 deletions(-) diff --git a/src/genbankParser/serratus_genbankParser.Rmd b/src/genbankParser/serratus_genbankParser.Rmd index 4f8ac16..99d275b 100644 --- a/src/genbankParser/serratus_genbankParser.Rmd +++ b/src/genbankParser/serratus_genbankParser.Rmd @@ -14,7 +14,6 @@ BiocManager::install("genbankr", version = "devel") install.packages("taxize") install.packages("data.table") install.packages("rlist") - ``` ```{r} @@ -29,8 +28,10 @@ library(taxize) ``` ```{r} -fileName <- "C:/Users/Gebruiker/Downloads/cov0.gb" -fileNameDuplicates <- "C:/Users/Gebruiker/Downloads/cov0.duplicates" +fileName <- "cov0.gb" +fileNameDuplicates <- "cov0.duplicates" +fileNameHomologues <- "cov0.id99.uc" +writePath <- "parsedGenbankFile.csv" ``` ### Functions @@ -335,7 +336,7 @@ res <- lapply(recs2, FUN = function(entry){ ### Cleaning -Remove instances where no gb entry was returned +Remove instances where no gb entry was returned, and fetch and map hostTaxonIds warning: its possible that user input is needed for fetching the taxIds. ```{r} CleanedEntries<- list.clean(res, function(x) length(x) == 0L, recursive = TRUE) @@ -353,38 +354,85 @@ for (idx in 1:nrow(dt)) { } ``` -remove earlier identified duplicates: https://github.com/ababaian/serratus/blob/master/notebook/200420_cov2_pangenome.ipynb +mark earlier identified duplicates: https://github.com/ababaian/serratus/blob/master/notebook/200420_cov2_pangenome.ipynb +try and infer host taxon id for all duplicates if there is only a single taxonId for the whole cluster, we infer ```{r} duplicateTable <- fread(fileNameDuplicates) names(duplicateTable) <- c("n_duplicates", "accessions") -duplicateTable[,groupRepresentative:=strsplit(accessions,", ")[[1]][1]] -duplicates <- unlist(lapply(duplicateTable$accessions, function(x){ - accessions <- strsplit(x[1], ", ")[[1]] - idxWithHostId <- 1 - for (idx in 1:length(accessions)) { - if (!is.null(dt[accession== accessions[idx],])) { - print("not found") - print(accessions[idx]) - next - } - if (!is.na(dt[accession == accessions[idx],hostTaxonId])) { - print("with host") - print(accessions[idx]) - idxWithHostId <- idx - break - } - } - accessions[-idxWithHostId] -})) - -noDup <- dt[accession %in% duplicates,] +dupMap <- c() +inferredHostMap <- c() + +for (idx in 1:nrow(duplicateTable)) { + accessions <- strsplit(duplicateTable[idx,accessions], ", ")[[1]] + hostIds <- c() + leaderToAssign = accessions[1] + dupsToAssign = accessions[-1] + + dups <- dt[accession %in% accessions,] + localInferredHostTaxonId <- unique(dups[!is.na(hostTaxonId),hostTaxonId]) + if (length(localInferredHostTaxonId) == 1) { + duplicateTable[idx, InferredHostTaxonId := localInferredHostTaxonId] + } + else{ + localInferredHostTaxonId <- NA + } + + duplicateTable[idx, leader:=leaderToAssign] + duplicateTable[idx, duplicates:=paste(dupsToAssign, collapse = ";")] + localValues <- rep(leaderToAssign, length(dupsToAssign)) + localInferredHostTaxonIds <- rep(localInferredHostTaxonId, length(dupsToAssign)) + names(localValues) <- dupsToAssign + names(localInferredHostTaxonIds) <- dupsToAssign + assign("dupMap", c(localValues, dupMap), envir=globalenv()) + assign("hostMap", c(localInferredHostTaxonIds, hostMap), envir=globalenv()) +} + +dt[,isDuplicated:=dupMap[accession]] +dt[,InferredHostTaxonId:=hostMap[accession]] ``` +mark earlier identified extreme homologues: https://serratus-public.s3.amazonaws.com/seq/cov2r/cov0.id99.uc +try and infer host taxon id for all homologues. if there is only a single taxonId for the whole cluster, we infer. + +Warning: data table acts a little funky here. +```{r} +homologueTableRead <- fread(fileNameHomologues) +homologueTable <- homologueTableRead[,c(1,2,4,9,10)] +names(homologueTable) <- c("isHomologue", "ClustId", "Homology", "FastaHeader", "LeaderFastaHeader") +homologueTable[, Accession := gsub(" .*", "", FastaHeader)] +homologueTable[, LeaderAccession := gsub(" .*", "", LeaderFastaHeader)] + +uniqueClustIds<-unique(homologueTable$ClustId) +homologueMap<-c() +homologueHostMap<-c() +for (id in uniqueClustIds) { + cluster <- homologueTable[ClustId == id & isHomologue == "H",] + homAccessions <- cluster$Accession + + homologues <- dt[accession %in% homAccessions,] + localInferredHostTaxonId <- unique(homologues[!is.na(hostTaxonId),hostTaxonId]) + if (length(localInferredHostTaxonId) == 1) { + homologueTable[ClustId == id, InferredHostTaxonId := localInferredHostTaxonId] + } + else{ + localInferredHostTaxonId <- NA + } + + localInferredHostTaxonIds <- rep(localInferredHostTaxonId, nrow(cluster)) + localValues <- cluster$LeaderAccession + names(localValues) <- cluster$Accession + names(localInferredHostTaxonIds) <- cluster$Accession + assign("homologueMap", c(localValues, homologueMap), envir=globalenv()) + assign("homologueHostMap", c(localInferredHostTaxonIds, homologueHostMap), envir=globalenv()) +} + +dt[,hasHomologue:=homologueMap[accession]] +dt[,HomologueInferredHostTaxonId:=homologueHostMap[accession]] +``` ### Writing ```{r} -write.table(row.names = FALSE, noDup, "test3.csv", sep = ",") -getwd() -``` \ No newline at end of file +write.table(row.names = FALSE, dt, writePath, sep = ",") +```