Skip to content

Commit

Permalink
Merge 32a1b5c into 1633241
Browse files Browse the repository at this point in the history
  • Loading branch information
ldhtnp committed Jun 12, 2024
2 parents 1633241 + 32a1b5c commit 625fb27
Show file tree
Hide file tree
Showing 11 changed files with 40,281 additions and 50 deletions.
32 changes: 17 additions & 15 deletions pvactools/lib/aggregate_all_epitopes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from abc import ABCMeta, abstractmethod
import itertools
import csv
import ast
from pvactools.lib.run_utils import get_anchor_positions

from pvactools.lib.prediction_class import PredictionClass

Expand Down Expand Up @@ -281,6 +283,20 @@ def __init__(
probs[hla] = line
anchor_probabilities[length] = probs
self.anchor_probabilities = anchor_probabilities

mouse_anchor_positions = {}
for length in [8, 9, 10, 11]:
base_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
file_name = os.path.join(base_dir, 'tools', 'pvacview', 'data', "mouse_anchor_predictions_{}_mer.tsv".format(length))
values = {}
with open(file_name, 'r') as fh:
reader = csv.DictReader(fh, delimiter="\t")
for line in reader:
allele = line.pop('Allele')
values[allele] = {int(k): ast.literal_eval(v) for k, v in line.items()}
mouse_anchor_positions[length] = values
self.mouse_anchor_positions = mouse_anchor_positions

self.allele_specific_anchors = allele_specific_anchors
self.anchor_contribution_threshold = anchor_contribution_threshold
super().__init__()
Expand Down Expand Up @@ -362,7 +378,7 @@ def is_anchor_residue_pass(self, mutation):
binding_threshold = self.binding_threshold

anchor_residue_pass = True
anchors = self.get_anchor_positions(mutation['HLA Allele'], len(mutation['MT Epitope Seq']))
anchors = get_anchor_positions(mutation['HLA Allele'], len(mutation['MT Epitope Seq']), self.allele_specific_anchors, self.anchor_probabilities, self.anchor_contribution_threshold, self.mouse_anchor_positions)
# parse out mutation position from str
position = mutation["Mutation Position"]
if pd.isna(position):
Expand All @@ -382,20 +398,6 @@ def is_anchor_residue_pass(self, mutation):
anchor_residue_pass = False
return anchor_residue_pass

def get_anchor_positions(self, hla_allele, epitope_length):
if self.allele_specific_anchors and epitope_length in self.anchor_probabilities and hla_allele in self.anchor_probabilities[epitope_length]:
probs = self.anchor_probabilities[epitope_length][hla_allele]
positions = []
total_prob = 0
for (pos, prob) in sorted(probs.items(), key=lambda x: x[1], reverse=True):
total_prob += float(prob)
positions.append(int(pos))
if total_prob > self.anchor_contribution_threshold:
return positions

return [1, 2, epitope_length - 1 , epitope_length]


#assign mutations to a "Classification" based on their favorability
def get_tier(self, mutation, vaf_clonal):
if self.use_allele_specific_binding_thresholds and mutation['HLA Allele'] in self.allele_specific_binding_thresholds:
Expand Down
16 changes: 16 additions & 0 deletions pvactools/lib/run_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,19 @@ def float_range_checker(arg):
def supported_amino_acids():
return ["A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V"]

def get_anchor_positions(hla_allele, epitope_length, allele_specific_anchors, anchor_probabilities, anchor_contribution_threshold, mouse_anchor_positions):
if allele_specific_anchors and epitope_length in anchor_probabilities and hla_allele in anchor_probabilities[epitope_length]:
probs = anchor_probabilities[epitope_length][hla_allele]
positions = []
total_prob = 0
for (pos, prob) in sorted(probs.items(), key=lambda x: x[1], reverse=True):
total_prob += float(prob)
positions.append(int(pos))
if total_prob > anchor_contribution_threshold:
return positions
elif allele_specific_anchors and epitope_length in mouse_anchor_positions and hla_allele in mouse_anchor_positions[epitope_length]:
values = mouse_anchor_positions[epitope_length][hla_allele]
positions = [pos for pos, val in values.items() if val]
return positions

return [1, 2, epitope_length - 1 , epitope_length]
30 changes: 15 additions & 15 deletions pvactools/lib/top_score_filter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import csv
import ast
import argparse
import re
import os
Expand Down Expand Up @@ -137,6 +138,19 @@ def __init__(
anchor_probabilities[length] = probs
self.anchor_probabilities = anchor_probabilities

mouse_anchor_positions = {}
for length in [8, 9, 10, 11]:
base_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
file_name = os.path.join(base_dir, 'tools', 'pvacview', 'data', "mouse_anchor_predictions_{}_mer.tsv".format(length))
values = {}
with open(file_name, 'r') as fh:
reader = csv.DictReader(fh, delimiter="\t")
for line in reader:
allele = line.pop('Allele')
values[allele] = {int(k): ast.literal_eval(v) for k, v in line.items()}
mouse_anchor_positions[length] = values
self.mouse_anchor_positions = mouse_anchor_positions

def execute(self):
with open(self.input_file) as input_fh, open(self.output_file, 'w') as output_fh:
reader = csv.DictReader(input_fh, delimiter = "\t")
Expand Down Expand Up @@ -215,28 +229,14 @@ def find_best_line(self, lines):
sorted_anchor_residue_pass_lines = sorted(anchor_residue_pass_lines, key=lambda d: (float(d["{} MT IC50 Score".format(self.mt_top_score_metric)]), d['TSL Sort'], -int(d['Transcript Length'])))
return sorted_anchor_residue_pass_lines[0]

def get_anchor_positions(self, hla_allele, epitope_length):
if self.allele_specific_anchors and epitope_length in self.anchor_probabilities and hla_allele in self.anchor_probabilities[epitope_length]:
probs = self.anchor_probabilities[epitope_length][hla_allele]
positions = []
total_prob = 0
for (pos, prob) in sorted(probs.items(), key=lambda x: x[1], reverse=True):
total_prob += float(prob)
positions.append(pos)
if total_prob > self.anchor_contribution_threshold:
return positions

return [1, 2, epitope_length - 1 , epitope_length]


def is_anchor_residue_pass(self, line):
if self.use_allele_specific_binding_thresholds:
binding_threshold = self.allele_specific_binding_thresholds[line['HLA Allele']]
else:
binding_threshold = self.binding_threshold

anchor_residue_pass = True
anchors = self.get_anchor_positions(line['HLA Allele'], len(line['MT Epitope Seq']))
anchors = get_anchor_positions(line['HLA Allele'], len(line['MT Epitope Seq']), self.allele_specific_anchors, self.anchor_probabilities, self.anchor_contribution_threshold, self.mouse_anchor_positions)
# parse out mutation position from str
position = line["Mutation Position"]
if '-' in position:
Expand Down
88 changes: 69 additions & 19 deletions pvactools/tools/pvacview/anchor_and_helper_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ anchor_data[[9]] <- read.table(curl("https://raw.githubusercontent.com/griffithl
anchor_data[[10]] <- read.table(curl("https://raw.githubusercontent.com/griffithlab/pVACtools/ae938113ddbbe6c6eeecebf94459d449facd2c2f/tools/pvacview/data/Normalized_anchor_predictions_10_mer.tsv"), sep = "\t", header = TRUE, stringsAsFactors = FALSE)
anchor_data[[11]] <- read.table(curl("https://raw.githubusercontent.com/griffithlab/pVACtools/ae938113ddbbe6c6eeecebf94459d449facd2c2f/tools/pvacview/data/Normalized_anchor_predictions_11_mer.tsv"), sep = "\t", header = TRUE, stringsAsFactors = FALSE)

## Load Mouse Anchor data
mouse_anchor_data <- list()
mouse_anchor_data[[8]] <- read.table(curl("https://raw.githubusercontent.com/ldhtnp/pVACtools/add_mouse_anchors/pvactools/tools/pvacview/data/mouse_anchor_predictions_8_mer.tsv"), sep = "\t", header = TRUE, stringsAsFactors = FALSE)
mouse_anchor_data[[9]] <- read.table(curl("https://raw.githubusercontent.com/ldhtnp/pVACtools/add_mouse_anchors/pvactools/tools/pvacview/data/mouse_anchor_predictions_9_mer.tsv"), sep = "\t", header = TRUE, stringsAsFactors = FALSE)
mouse_anchor_data[[10]] <- read.table(curl("https://raw.githubusercontent.com/ldhtnp/pVACtools/add_mouse_anchors/pvactools/tools/pvacview/data/mouse_anchor_predictions_10_mer.tsv"), sep = "\t", header = TRUE, stringsAsFactors = FALSE)
mouse_anchor_data[[11]] <- read.table(curl("https://raw.githubusercontent.com/ldhtnp/pVACtools/add_mouse_anchors/pvactools/tools/pvacview/data/mouse_anchor_predictions_11_mer.tsv"), sep = "\t", header = TRUE, stringsAsFactors = FALSE)

#get binding affinity colors cutoffs given HLA

scale_binding_affinity <- function(allele_specific_binding_thresholds, use_allele_specific_binding_thresholds, binding_threshold, hla, current_ba) {
Expand Down Expand Up @@ -75,10 +82,29 @@ peptide_coloring <- function(hla_allele, peptide_row) {
return(c("#999999"))
}
position <- as.numeric(peptide_row["x_pos"])
anchor_score <- as.numeric(anchor_data[[peptide_length]][anchor_data[[peptide_length]]["HLA"] == hla_allele][2:(peptide_length + 1)])
value_bins <- cut(anchor_score, breaks = seq(0, 1, len = 100),
include.lowest = TRUE)
colors <- colorRampPalette(c("lightblue", "blue"))(99)[value_bins]
if (any(hla_allele == anchor_data[[peptide_length]]["HLA"])) {
anchor_score <- as.numeric(anchor_data[[peptide_length]][anchor_data[[peptide_length]]["HLA"] == hla_allele][2:(peptide_length + 1)])
value_bins <- cut(anchor_score, breaks = seq(0, 1, len = 100),
include.lowest = TRUE)
colors <- colorRampPalette(c("lightblue", "blue"))(99)[value_bins]
} else if (any(hla_allele == mouse_anchor_data[[peptide_length]]["Allele"])) {
mouse_position_data <- (mouse_anchor_data[[peptide_length]][mouse_anchor_data[[peptide_length]]["Allele"] == hla_allele][2:(peptide_length + 1)])
colors <- list()
for (i in 1:length(mouse_position_data)) {
if (mouse_position_data[i] == "True") {
colors <- append(colors, "blue")
} else {
colors <- append(colors, "lightblue")
}
}
} else {
if (position %in% c(1, 2, peptide_length-1, peptide_length)) {
return("blue")
} else {
return("lightblue")
}
}

return(colors[[position]])
}
#calculate per-length anchor score for HLA allele
Expand All @@ -87,28 +113,40 @@ anchor_weights_for_alleles <- function(hla_alleles) {
for (hla_allele in hla_alleles) {
if (any(hla_allele == anchor_data[[8]]["HLA"])) {
eight_mer_scores <- append(anchor_data[[8]][anchor_data[[8]]["HLA"] == hla_allele][1:(8 + 1)], "8mer", 1)
} else if (any(hla_allele == mouse_anchor_data[[8]]["Allele"])) {
eight_mer_scores <- append(mouse_anchor_data[[8]][mouse_anchor_data[[8]]["Allele"] == hla_allele][1:(8 + 1)], "8mer", 1)
}
else {
eight_mer_scores <- c(hla_allele, "8mer")
}

if (any(hla_allele == anchor_data[[9]]["HLA"])) {
nine_mer_scores <- append(anchor_data[[9]][anchor_data[[9]]["HLA"] == hla_allele][1:(9 + 1)], "9mer", 1)
} else if (any(hla_allele == mouse_anchor_data[[9]]["Allele"])) {
nine_mer_scores <- append(mouse_anchor_data[[9]][mouse_anchor_data[[9]]["Allele"] == hla_allele][1:(9 + 1)], "9mer", 1)
}
else {
nine_mer_scores <- c(hla_allele, "9mer")
}

if (any(hla_allele == anchor_data[[10]]["HLA"])) {
ten_mer_scores <- append(anchor_data[[10]][anchor_data[[10]]["HLA"] == hla_allele][1:(10 + 1)], "10mer", 1)
} else if (any(hla_allele == mouse_anchor_data[[10]]["Allele"])) {
ten_mer_scores <- append(mouse_anchor_data[[10]][mouse_anchor_data[[10]]["Allele"] == hla_allele][1:(10 + 1)], "10mer", 1)
}
else {
ten_mer_scores <- c(hla_allele, "10mer")
}

if (any(hla_allele == anchor_data[[11]]["HLA"])) {
eleven_mer_scores <- append(anchor_data[[11]][anchor_data[[11]]["HLA"] == hla_allele][1:(11 + 1)], "11mer", 1)
} else if (any(hla_allele == mouse_anchor_data[[11]]["Allele"])) {
eleven_mer_scores <- append(mouse_anchor_data[[11]][mouse_anchor_data[[11]]["Allele"] == hla_allele][1:(11 + 1)], "11mer", 1)
}
else {
eleven_mer_scores <- c(hla_allele, "11mer")
}

scores <- list(eight_mer_scores, nine_mer_scores, ten_mer_scores, eleven_mer_scores)
scores <- lapply(scores, `length<-`, 13)
scores <- transpose(data.frame(scores))
Expand All @@ -120,23 +158,35 @@ anchor_weights_for_alleles <- function(hla_alleles) {
#calculate anchor list for specific peptide length and HLA allele combo given contribution cutoff
calculate_anchor <- function(hla_allele, peptide_length, anchor_contribution) {
result <- tryCatch({
anchor_raw_data <- as.numeric(anchor_data[[peptide_length]][anchor_data[[peptide_length]]["HLA"] == hla_allele][2:(peptide_length + 1)])
if (any(is.na(anchor_raw_data))) {
return("NA")
}
names(anchor_raw_data) <- as.character(1:length(anchor_raw_data))
anchor_raw_data <- anchor_raw_data[order(unlist(anchor_raw_data), decreasing = TRUE)]
count <- 0
anchor_list <- list()
for (i in 1:length(anchor_raw_data)) {
if (count >= anchor_contribution) {
return(anchor_list)
}else {
count <- count + anchor_raw_data[[i]]
anchor_list <- append(anchor_list, names(anchor_raw_data[i]))
if (any(hla_allele == anchor_data[[peptide_length]]["HLA"])) {
anchor_raw_data <- as.numeric(anchor_data[[peptide_length]][anchor_data[[peptide_length]]["HLA"] == hla_allele][2:(peptide_length + 1)])
if (any(is.na(anchor_raw_data))) {
return("NA")
}
names(anchor_raw_data) <- as.character(1:length(anchor_raw_data))
anchor_raw_data <- anchor_raw_data[order(unlist(anchor_raw_data), decreasing = TRUE)]
count <- 0
anchor_list <- list()
for (i in 1:length(anchor_raw_data)) {
if (count >= anchor_contribution) {
return(anchor_list)
} else {
count <- count + anchor_raw_data[[i]]
anchor_list <- append(anchor_list, names(anchor_raw_data[i]))
}
}
} else if (any(hla_allele == mouse_anchor_data[[peptide_length]]["Allele"])) {
mouse_position_data <- (mouse_anchor_data[[peptide_length]][mouse_anchor_data[[peptide_length]]["Allele"] == hla_allele][2:(peptide_length + 1)])
anchor_list <- list()
for (i in 1:length(mouse_position_data)) {
if (mouse_position_data[i] == "True") {
anchor_list <- append(anchor_list, as.character(i))
}
}
return(anchor_list)
} else {
return("NA")
}
return(anchor_list)
}, error = function(e) { return("NA") })
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Allele 1 2 3 4 5 6 7 8 9 10
H-2-Db False False False False True False False False False True
H-2-Kb False False False False True False False False False True
H-2-Dd False True True False False False False False False True
H-2-Kd False True False False False False False False False True
H-2-Ld False True False False False False False False False True
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Allele 1 2 3 4 5 6 7 8 9 10 11
H-2-Db False False False False True False False False False False True
H-2-Dd False True True False False False False False False False True
H-2-Kd False True False False False False False False False False True
H-2-Ld False True False False False False False False False False True
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Allele 1 2 3 4 5 6 7 8
H-2-Kb False False False False True False False True
H-2-Dd False True False False False False False True
H-2-Kd True False False False False False False True
H-2-Ld False True False False False False False True
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Allele 1 2 3 4 5 6 7 8 9
H-2-Db False False False False True False False False True
H-2-Kb False False False False True False False False True
H-2-Dd False True True False False False False False True
H-2-Kd False True False False False False False False True
H-2-Ld False True False False False False False False True
2 changes: 1 addition & 1 deletion pvactools/tools/pvacview/server.R
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ server <- shinyServer(function(input, output, session) {
peptide_table <- do.call("rbind", lapply(peptide_names, table_formatting, peptide_data))
peptide_table_filtered <- Filter(function(x) length(unique(x)) != 1, peptide_table)
peptide_table_names <- names(peptide_table_filtered)
hla_list <- peptide_table_names[grepl("^HLA-*", peptide_table_names)]
hla_list <- df$metricsData$alleles
hla_data <- data.frame(hla = hla_list)
hla_sep <- max(nchar(peptide_table$`Peptide Sequence`))
hla_data$y_pos <- 1
Expand Down
Loading

0 comments on commit 625fb27

Please sign in to comment.