-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move chem16S::calc_metrics() to calc.metrics()
- Loading branch information
Showing
9 changed files
with
348 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# canprot/calc.metrics.R | ||
# Calculate selected chemical metrics for proteins | ||
# 20191027 initial version as canprot/metrics.R | ||
# 20230704 adapted for chem16S/calc_metrics.R | ||
# 20240302 moved to canprot | ||
calc.metrics <- function(AAcomp, metrics = c("Zc", "nO2", "nH2O")) { | ||
|
||
## Define objects used in various calculations | ||
# The number of C in each amino acid residue; calculated in CHNOSZ: | ||
# nC_AA <- sapply(makeup(info(info(aminoacids("")))$formula), "[", "C") | ||
# nC_AA <- nC_AA | ||
# names(nC_AA) <- aminoacids(3) | ||
nC_AA <- c(Ala = 3, Cys = 3, Asp = 4, Glu = 5, Phe = 9, Gly = 2, His = 6, | ||
Ile = 6, Lys = 6, Leu = 6, Met = 5, Asn = 4, Pro = 5, Gln = 5, | ||
Arg = 6, Ser = 3, Thr = 4, Val = 5, Trp = 11, Tyr = 9) | ||
# Identify columns with 3-letter abbreviations for the amino acids | ||
isAA <- tolower(colnames(AAcomp)) %in% tolower(names(nC_AA)) | ||
iAA <- match(tolower(colnames(AAcomp)[isAA]), tolower(names(nC_AA))) | ||
|
||
values <- lapply(metrics, function(metric) { | ||
|
||
if(metric == "Zc") { | ||
Zc(AAcomp) | ||
} else if(metric == "nH2O") { | ||
nH2O(AAcomp) | ||
} else if(metric == "nO2") { | ||
nO2(AAcomp) | ||
} else if(metric == "GRAVY") { | ||
GRAVY(AAcomp) | ||
} else if(metric == "pI") { | ||
pI(AAcomp) | ||
} else if(metric == "MW") { | ||
MW(AAcomp) | ||
} else if(tolower(metric) %in% c("length", "plength")) { | ||
plength(AAcomp) | ||
} else if(metric %in% c("H/C", "H_C", "HC")) { | ||
HC(AAcomp) | ||
} else if(metric %in% c("N/C", "N_C", "NC")) { | ||
NC(AAcomp) | ||
} else if(metric %in% c("O/C", "O_C", "OC")) { | ||
OC(AAcomp) | ||
} else if(metric %in% c("S/C", "S_C", "SC")) { | ||
SC(AAcomp) | ||
} else stop(paste0("'", metric, "' is not an available metric")) | ||
|
||
}) | ||
|
||
values <- do.call(cbind, values) | ||
colnames(values) <- metrics | ||
as.data.frame(values) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
## Tests for Zc, nO2, and nH2O adapted from canprot/tests/test-metrics.R on 20230704 | ||
|
||
info <- "Results are as expected for Zc, nO2, and nH2O" | ||
|
||
## Calculate metrics for a few proteins the "long way" (using functions in CHNOSZ) | ||
#library(CHNOSZ) | ||
#basis(c("glutamine", "glutamic acid", "cysteine", "H2O", "O2")) | ||
#Zc.ref <- ZC(protein.formula(1:6)) | ||
#nO2.ref <- protein.basis(1:6)[, "O2"] / protein.length(1:6) | ||
## NOTE: subtract 1 so as exclude terminal groups from calculation of nH2O | ||
#nH2O.ref <- (protein.basis(1:6)[, "H2O"] - 1) / protein.length(1:6) | ||
|
||
Zc.ref <- c(-0.11633875106929, -0.0272787757817698, -0.195689166193988, -0.0492957746478873, -0.170212765957447, 0.0163132137030995) | ||
nO2.ref <- c(-0.699539170506912, -0.522294022617124, -0.81466049382716, -0.574137931034483, -0.716346153846154, -0.471317829457364) | ||
nH2O.ref <- c(-1.17465437788018, -0.881098546042003, -0.941666666666667, -0.955172413793103, -0.730769230769231, -0.886821705426357) | ||
|
||
# Calculate metrics using calc.metrics() function in chem16S | ||
AAcomp <- | ||
structure(list(protein = c("O08452", "AMY", "AMYA", "BPT1", "CYC", | ||
"LYSC"), organism = c("PYRFU", "BACSU", "PYRFU", "BOVIN", "BOVIN", | ||
"CHICK"), ref = c("UniProt", "UniProt", "UniProt", "UniProt", | ||
"UniProt", "UniProt"), abbrv = c("O08452", "P00691", "P49067", | ||
"P00974", "P62894", "P00698"), chains = c(1L, 1L, 1L, 1L, 1L, | ||
1L), Ala = c(28, 49, 26, 6, 6, 12), Cys = c(5, 1, 2, 6, 2, 8), | ||
Asp = c(33, 44, 35, 2, 3, 7), Glu = c(23, 23, 66, 2, 9, 2 | ||
), Phe = c(20, 20, 37, 4, 4, 3), Gly = c(45, 51, 44, 6, 14, | ||
12), His = c(12, 16, 14, 0, 3, 1), Ile = c(25, 35, 41, 2, | ||
6, 6), Lys = c(19, 30, 48, 4, 18, 6), Leu = c(27, 36, 59, | ||
2, 6, 8), Met = c(4, 10, 12, 1, 2, 2), Asn = c(21, 54, 24, | ||
3, 5, 14), Pro = c(20, 23, 28, 4, 4, 2), Gln = c(7, 29, 15, | ||
1, 3, 3), Arg = c(14, 24, 35, 6, 2, 11), Ser = c(21, 55, | ||
33, 1, 1, 10), Thr = c(16, 45, 12, 3, 8, 7), Val = c(31, | ||
32, 59, 1, 3, 6), Trp = c(26, 14, 17, 0, 1, 6), Tyr = c(37, | ||
28, 41, 4, 4, 3)), row.names = c(NA, 6L), class = "data.frame") | ||
|
||
metrics <- calc.metrics(AAcomp) | ||
|
||
# Perform the tests | ||
expect_equivalent(metrics$Zc, Zc.ref, info = info) | ||
expect_equivalent(metrics$nO2, nO2.ref, info = info) | ||
expect_equivalent(metrics$nH2O, nH2O.ref, info = info) | ||
|
||
## Tests for H/C, N/C, O/C, and S/C added on 20230707 | ||
|
||
AAcomp <- | ||
structure(list(protein = c("LYSC", "RNAS1", "AMYA", "CSG"), organism = c("CHICK", | ||
"BOVIN", "PYRFU", "HALJP"), ref = c("UniProt", "UniProt", "UniProt", | ||
"UniProt"), abbrv = c("P00698", "P61823", "P49067", "Q9C4B4"), | ||
chains = c(1L, 1L, 1L, 1L), Ala = c(12, 12, 26, 61), Cys = c(8, | ||
8, 2, 0), Asp = c(7, 5, 35, 122), Glu = c(2, 5, 66, 86), | ||
Phe = c(3, 3, 37, 20), Gly = c(12, 3, 44, 78), His = c(1, | ||
4, 14, 4), Ile = c(6, 3, 41, 47), Lys = c(6, 10, 48, 4), | ||
Leu = c(8, 2, 59, 47), Met = c(2, 4, 12, 0), Asn = c(14, | ||
10, 24, 51), Pro = c(2, 4, 28, 29), Gln = c(3, 7, 15, 21), | ||
Arg = c(11, 4, 35, 19), Ser = c(10, 15, 33, 74), Thr = c(7, | ||
10, 12, 79), Val = c(6, 9, 59, 66), Trp = c(6, 0, 17, 2), | ||
Tyr = c(3, 6, 41, 18)), row.names = c(6L, 9L, 3L, 14L), class = "data.frame") | ||
|
||
|
||
# library(CHNOSZ) | ||
# pf <- as.data.frame(protein.formula(AAcomp)) | ||
# pf$H <- pf$H - 2 # Remove terminal H-OH | ||
# pf$O <- pf$O - 1 # Remove terminal H-OH | ||
# HCref <- pf$H / pf$C | ||
# OCref <- pf$O / pf$C | ||
# NCref <- pf$N / pf$C | ||
# SCref <- pf$S / pf$C | ||
|
||
HC.ref <- c(1.56117455138662, 1.57739130434783, 1.50964265456608, 1.53856636685745) | ||
OC.ref <- c(0.300163132137031, 0.333913043478261, 0.276517300056721, 0.405287544289997) | ||
NC.ref <- c(0.314845024469821, 0.297391304347826, 0.250992626205332, 0.264649768329245) | ||
SC.ref <- c(0.0163132137030995, 0.0208695652173913, 0.00397050482132728, 0) | ||
|
||
metrics <- calc.metrics(AAcomp, c("HC", "OC", "NC", "SC")) | ||
expect_equivalent(metrics$HC, HC.ref) | ||
expect_equivalent(metrics$NC, NC.ref) | ||
expect_equivalent(metrics$OC, OC.ref) | ||
expect_equivalent(metrics$SC, SC.ref) | ||
|
||
# Test for length added 20240302 | ||
length.ref <- c(129, 124, 648, 828) | ||
length.calc <- calc.metrics(AAcomp, "Length")[, 1] | ||
expect_equal(length.calc, length.ref) |
Oops, something went wrong.