# Translation of HBV Mutations 

The code shown below was used in order to translate four different HBV mutation categories from the scientific literature into mutation in amino acid (e.g. p.Pro156Ser) and nucleotide (e.g. c.314C>T) sequences used in the clinical study. 
1. D16V 
2. c.47A>T (No translation required)
3. A47T 
4. rtD16V
5. Asp16Val
6. p.Asp16Val (No translation required)
7. rtAsp16Val (No hit mutations in this category for PubMed Central)

In [71]:
import re

## Dictionary

In [72]:
#Master dictionary to match single letter AA (amino acid) to 3 letter AA
master_dictionary_mutations = {'A': 'Ala', 'a': 'Ala', 'R': 'Arg', 'r': 'Arg', 'N': 'Asn', 'D': 'Asp', 'C': 'Cys', 'E': 'Glu', 'Q': 'Gln', 'G': 'Gly', 'H': 'His', 'O': 'Hyp', 'I': 'Ile', 'L': 'Leu', 'K': 'Lys', 'M': 'Met', 'F': 'Phe', 'P': 'Pro', 'U': 'Glp', 'S': 'Ser', 'T': 'Thr', 'W': 'Trp', 'Y': 'Tyr', 'V': 'Val', 'n': 'Asn', 'd': 'Asp', 'c': 'Cys', 'e': 'Glu', 'q': 'Gln', 'g': 'Gly', 'h': 'His', 'o': 'Hyp', 'i': 'Ile', 'l': 'Leu', 'k': 'Lys', 'm': 'Met', 'f': 'Phe', 'p': 'Pro', 'u': 'Glp', 's': 'Ser', 't': 'Thr', 'w': 'Trp', 'y': 'Tyr', 'v': 'Val', 'B': 'Asx', 'b': 'Asx', 'Z': 'Glx', 'z': 'Glx'}

In [73]:
#Dictionary for base pair mapping 
base_pair_mapping1 = {'A':'T', 'G':'C', 'C':'G', 'T':'A', 'a': 'T', 't': 'A', 'g': 'C', 'c': 'G'}

In [74]:
#Dictionary for conversion of lower case to upper case for base pairs
upper_to_lower_case = {'a': 'A', 'g': 'G', 'c': 'C', 't': 'T'}

In [75]:
#Dictionary for capitalizing the first letters of 3 letter AA
dictionary_3let_mutations = {'ala': 'Ala', 'arg': 'Arg', 'asn': 'Asn', 'asp': 'Asp', 'cys': 'Cys', 'glu': 'Glu', 'gln': 'Gln', 'gly': 'Gly', 'his': 'His', 'hyp': 'Hyp', 'ile': 'Ile', 'leu': 'Leu', 'lys': 'Lys', 'met': 'Met', 'phe': 'Phe', 'pro': 'Pro', 'glp': 'Glp', 'ser': 'Ser', 'thr': 'Thr', 'trp': 'Trp', 'tyr': 'Tyr', 'val': 'Val', 'asx': 'Asx', 'glx': 'Glx'}

## Translation of Category 1 (i.e. D16V) to Category 6 (i.e. p.Asp16Val)

In [76]:
#Script used to convert mutation type 1 to 6
#First, conversion from Single Letter AA to 3 Letter AA (without changing position #)
#Then adding "p."

def translation_category1(list_of_mutation_type1_from_cooccurrence_of_mutation_and_drug_from_literature):
    cooccurrence_translation_mutation_expression_type1 = set()
    mutation_translation = str()

    for mutation in str(list_of_mutation_type1_from_cooccurrence_of_mutation_and_drug_from_literature):
        if mutation in master_dictionary_mutations:
            mutation_translation += master_dictionary_mutations[mutation]     
        else:
            mutation_translation += mutation

    split_mutations_list = mutation_translation.split()   
    cooccurrence_translation_expression_type1_AA1wt_AA1mut = [re.sub('[^a-zA-Z0-9]+', '', _) for _ in split_mutations_list]
    cooccurrence_translation_mutation_expression_type1 = ["p." + mutation for mutation in cooccurrence_translation_expression_type1_AA1wt_AA1mut]
    cooccurrence_translation_mutation_expression_type1 = set(cooccurrence_translation_mutation_expression_type1)
    return(cooccurrence_translation_mutation_expression_type1)

Example use of translation_category1

In [77]:
mutation_type1_list = ['D16V', "E70G"]

In [78]:
translation_category1(mutation_type1_list)

{'p.Asp16Val', 'p.Glu70Gly'}

## Translation of Category 3 (i.e. A47T) to Category 2 (i.e. c.314C>T)

In [79]:
#Script used to convert mutation type 3 to 6
#First, remove the first character from string, then add the letter that matched with the dictionary and ">" 
#Then convert the remaining single letter nucleotide (NT) to single letter NT 
#Convert lower case mutations to upper case 
#Then add "c." 

def translation_category3(list_of_mutation_type3_from_cooccurrence_of_mutation_and_drug_from_literature):
    list_of_mutation_type3_from_cooccurrence_of_mutation_and_drug_from_literature = [e[1:] for e in list_of_mutation_type3_from_cooccurrence_of_mutation_and_drug_from_literature]

    cooccurrence_translation_mutation_expression_type3 = set()
    mutation_translation = str()

    for mutation in str(list_of_mutation_type3_from_cooccurrence_of_mutation_and_drug_from_literature):
        if mutation in base_pair_mapping1:
            mutation_translation += base_pair_mapping1[mutation] + ">" + mutation  
        else:
            mutation_translation += mutation  
    split_mutations_list = mutation_translation.split()

    string_translation = str()

    for string in str(split_mutations_list):
        if string in upper_to_lower_case:
            string_translation += upper_to_lower_case[string]
        else:
            string_translation += string
    translation_mutations_list = string_translation.split()

    cooccurrence_translation_expression_type3_base_Pos_base = [re.sub('[^a-zA-Z0-9>]+', '', _) for _ in translation_mutations_list]
    cooccurrence_translation_mutation_expression_type3 = ["c." + mutation for mutation in cooccurrence_translation_expression_type3_base_Pos_base]
    cooccurrence_translation_mutation_expression_type3 = set(cooccurrence_translation_mutation_expression_type3) 
    return(cooccurrence_translation_mutation_expression_type3)

Example use of translation_category3

In [80]:
mutation_type3_list = ['A100T', "G20C", "g105c"]

In [81]:
translation_category3(mutation_type3_list)

{'c.100A>T', 'c.105G>C', 'c.20G>C'}

## Translation of Category 4 (i.e. rtD16V) to Category 6 (i.e. p.Asp16Val)

In [82]:
#Script used to convert mutation type 4 to 6
#First, remove first two characters of each mutation (i.e. rt)
#Then, conversion from Single Letter AA to 3 Letter AA (without changing position #)
#Lastly, add "p."
#Then filter conversions that do not satify p_AA3wt_Pos_AA3mut 

def translation_category4(list_of_mutation_type4_from_cooccurrence_of_mutation_and_drug_from_literature):
    list_of_mutation_type4_from_cooccurrence_of_mutation_and_drug_from_literature = [e[2:] for e in list_of_mutation_type4_from_cooccurrence_of_mutation_and_drug_from_literature]

    cooccurrence_translation_mutation_expression_type4 = set()
    mutation_translation = str()

    for mutation in str(list_of_mutation_type4_from_cooccurrence_of_mutation_and_drug_from_literature):
        if mutation in master_dictionary_mutations:
            mutation_translation += master_dictionary_mutations[mutation]     
        else:
            mutation_translation += mutation  

    split_mutations_list = mutation_translation.split()   
    cooccurrence_translation_expression_type4_rt_AA1wt_Pos_AA1mut = [re.sub('[^a-zA-Z0-9]+', '', _) for _ in split_mutations_list]
    cooccurrence_translation_mutation_expression_type4 = ["p." + mutation for mutation in cooccurrence_translation_expression_type4_rt_AA1wt_Pos_AA1mut]
    cooccurrence_translation_mutation_expression_type4 = set(cooccurrence_translation_mutation_expression_type4)
    return(cooccurrence_translation_mutation_expression_type4)

Example use of translation_category4

In [83]:
mutation_type4_list = ['rtF90M', "rtP11W"]

In [84]:
translation_category4(mutation_type4_list)

{'p.Phe90Met', 'p.Pro11Trp'}

## Translation of Category 5 (i.e. Asp16Val) to Category 6 (i.e. p.Asp16Val)

In [85]:
#Script used to convert mutation type 5 to 6
#First, conversion from lower case 3 letter AA to standard notation (without changing position #)
#Then adding "p."

def translation_category5(list_of_mutation_type5_from_cooccurrence_of_mutation_and_drug_from_literature):
    cooccurrence_translation_mutation_expression_type5 = set()
    mutation_translation = str()

    for mutation in str(list_of_mutation_type5_from_cooccurrence_of_mutation_and_drug_from_literature):
        if mutation in dictionary_3let_mutations:
            mutation_translation += dictionary_3let_mutations[mutation]     
        else:
            mutation_translation += mutation

    split_mutations_list = mutation_translation.split()   
    cooccurrence_translation_expression_type5_AA3wt_AA3mut = [re.sub('[^a-zA-Z0-9]+', '', _) for _ in split_mutations_list]
    cooccurrence_translation_mutation_expression_type5 = ["p." + mutation for mutation in cooccurrence_translation_expression_type5_AA3wt_AA3mut]
    cooccurrence_translation_mutation_expression_type5 = set(cooccurrence_translation_mutation_expression_type5)
    return(cooccurrence_translation_mutation_expression_type5)

Example use of translation_category5

In [86]:
mutation_type5_list = ['His27Phe', "Tyr62Glx"]

In [87]:
translation_category5(mutation_type5_list)

{'p.His27Phe', 'p.Tyr62Glx'}