In [0]:
## Load Libraries ##
library(tidyverse)
library(docstring)
source('./pLink_functions.R')

## Process and filter pLink spectra to remove contaminants ##

In [0]:
flist <- c("MDA" = "./MDA_MergedDB_spectra.csv",
           "LM2" = "./LM2_MergedDB_spectra.csv")

off_table <- read_tsv("./Julia_edited_SPmTPremoved_pos.txt")# position offset table (correct removal of signal peptide)

In [0]:
data <- map(flist, read_csv) %>% # load the data
         map(split_into_2_seq, "Peptide") %>% # split the peptide column into 4 columns - Sequence_A, Sequence_B, PepPos_A, and PepPos_B
         map(filter, str_detect(Proteins, "sp\\|[^/]+-sp\\|")) %>% # remove contaminants which do not start with sp
         map(split_title, "Title") %>% #splits into the different columns: Spectrum_File, Spectrum_ID, Precursor_ID, and scans
         map(split_into_2_acc, "Proteins") %>% #splits protein into ProtA and protB, but takes only the 1st option when there are multiple.
         map(renumber_plink_multival, off_table) # renumber the positions according to an offset table


In [0]:
# write the filtered data
map2(data, flist %>% str_replace("\\.csv","_filtered.csv"), write_csv)

## Process and filter pLink sites to remove contaminants ##

In [0]:
#load data
flist <- c("MDA" = "./MDA_MergedDB_sites.csv",
           "LM2" = "./LM2_MergedDB_sites.csv")

off_table <- read_tsv("./Julia_edited_SPmTPremoved_pos.txt") # position offset table (correct removal of signal peptide)
exp_pattern = "(?<=_SCX)\\d+_\\d" # the pattern of the experiment number in the file name

#### Load the nested table site data, extract the data from the nested table and remove it

In [0]:
#read the data to a nested DF
data <- map(flist, read_pLink_sites)
#split the title inside the nested spectra_data to prepare for extraction
data <- map(data, mutate, spectra_data = map(spectra_data, split_title ,"Title"))%>%
                  #extract the file names based on the pattern
                  map(mutate, spec_files = spectra_data %>%
                                   map(extract_spec_files_from_nested_spec_data,exp_pattern) %>%
                                   unlist(use.names = F)
                 ) %>%
                 #extract the IDs, spectrum, precursor, scans
                 map(mutate,spectrum_IDs = spectra_data %>%
                                       map(extract_ID_from_nested_spec_data, "Spectrum_ID") %>%
                                       unlist(use.names = F)
                 ) %>%
                 map(mutate,precursor_IDs = spectra_data %>%
                                       map(extract_ID_from_nested_spec_data, "Precursor_ID") %>%
                                       unlist(use.names = F)
                 ) %>%
                 map(mutate,scans = spectra_data %>%
                                       map(extract_ID_from_nested_spec_data, "scans") %>%
                                       unlist(use.names = F)
                 ) %>%
                 #extract the min score
                 map(mutate,min_score = spectra_data %>%
                                       map(extract_score_from_nested_spec_data, "Score") %>%
                                       unlist(use.names = F)
                 )%>%
                  map(select,-spectra_data)

#### process the parent table

In [0]:
data <- data %>% map(filter, str_detect(Protein, "sp\\|[^/]+-sp\\|")) %>% #filter out contaminants
                 map(split_into_2_acc, "Protein")%>%
                 map(renumber_plink_multival, off_table)

In [0]:
#write the Xl tables
map2(data, flist %>% str_replace("\\.csv","_filtered.csv"), write_csv)

## Convert processed pLink tables to CrossID format ##

In [0]:
flist <- c("MDA" = "./MDA_MergedDB_spectra_filtered.csv",
           "LM2" = "./LM2_MergedDB_spectra_filtered.csv")

data <- map(flist, read_csv) %>%
        map(CSMs_2_crossID) # renames the columns to match the crossID column names

map2(data, flist %>% str_replace("\\.csv","_crossID.csv"), write_csv)


In [0]:
flist <- c("MDA" = "./MDA_MergedDB_sites_filtered.csv",
           "LM2" = "./LM2_MergedDB_sites_filtered.csv")

data <- map(flist, read_csv) %>%
        map(Sites_2_crossID)

map2(data, flist %>% str_replace("\\.csv","_crossID.csv"), write_csv)
