# Translation of HBV Mutations 

The code shown below was used in order to translate four different HBV mutation categories from the scientific literature into mutation in amino acid (e.g. p.Pro156Ser) and nucleotide (e.g. c.314C>T) sequences used in the clinical study. 
1. D16V 
2. c.47A>T (Desired format so no translation required)
3. A47T 
4. rtD16V
5. Asp16Val
6. p.Asp16Val (Desired format so no translation required)
7. rtAsp16Val (No hit mutations in this category for PubMed Central)

In [1]:
import re

## Dictionary

In [2]:
# Master dictionary to match single letter AA (amino acid) to 3 letter AA
master_dictionary_mutations = {'A': 'Ala', 'a': 'Ala', 'R': 'Arg',
                               'r': 'Arg', 'N': 'Asn',
                               'D': 'Asp', 'C': 'Cys',
                               'E': 'Glu', 'Q': 'Gln',
                               'G': 'Gly', 'H': 'His',
                               'O': 'Hyp', 'I': 'Ile',
                               'L': 'Leu', 'K': 'Lys',
                               'M': 'Met', 'F': 'Phe',
                               'P': 'Pro', 'U': 'Glp',
                               'S': 'Ser', 'T': 'Thr',
                               'W': 'Trp', 'Y': 'Tyr',
                               'V': 'Val', 'n': 'Asn',
                               'd': 'Asp', 'c': 'Cys',
                               'e': 'Glu', 'q': 'Gln',
                               'g': 'Gly', 'h': 'His',
                               'o': 'Hyp', 'i': 'Ile',
                               'l': 'Leu', 'k': 'Lys',
                               'm': 'Met', 'f': 'Phe',
                               'p': 'Pro', 'u': 'Glp',
                               's': 'Ser', 't': 'Thr',
                               'w': 'Trp', 'y': 'Tyr',
                               'v': 'Val', 'B': 'Asx',
                               'b': 'Asx', 'Z': 'Glx', 'z': 'Glx'}

In [3]:
# Dictionary for base pair mapping
base_pair_mapping1 = {'A': 'T', 'G': 'C',
                      'C': 'G', 'T': 'A',
                      'a': 'T', 't': 'A', 'g': 'C', 'c': 'G'}

In [4]:
# Dictionary for conversion of lower case to upper case for base pairs
upper_to_lower_case = {'a': 'A', 'g': 'G', 'c': 'C', 't': 'T'}

In [5]:
# Dictionary for capitalizing the first letters of 3 letter AA
dictionary_3let_mutations = {'ala': 'Ala', 'arg': 'Arg',
                             'asn': 'Asn', 'asp': 'Asp',
                             'cys': 'Cys', 'glu': 'Glu',
                             'gln': 'Gln', 'gly': 'Gly',
                             'his': 'His', 'hyp': 'Hyp',
                             'ile': 'Ile', 'leu': 'Leu',
                             'lys': 'Lys', 'met': 'Met',
                             'phe': 'Phe', 'pro': 'Pro',
                             'glp': 'Glp', 'ser': 'Ser',
                             'thr': 'Thr', 'trp': 'Trp',
                             'tyr': 'Tyr', 'val': 'Val',
                             'asx': 'Asx', 'glx': 'Glx'}

## Translation of Category 1 (i.e. D16V) to Category 6 (i.e. p.Asp16Val)

In [6]:
# Definition for translating HBV mutations


def translate_HBV_mutations(translate_set, dictionary):
    mutation_translation = str()
    for mutation in str(translate_set):
        if mutation in dictionary:
            mutation_translation += dictionary[mutation]
        else:
            mutation_translation += mutation
    split_mutation_list = mutation_translation.split()
    return(split_mutation_list)


def translate_HBV_mutations_type3(translate_set, character, dictionary):
    mutation_translation = str()
    for mutation in str(translate_set):
        if mutation in dictionary:
            mutation_translation += dictionary[mutation] + character + mutation
        else:
            mutation_translation += mutation
    split_mutation_list = mutation_translation.split()
    return(split_mutation_list)


def translate_HBV_mutations_set(translate_set, dictionary, letter):
    translated_set = set()
    translated_list = [re.sub('[^a-zA-Z0-9>]+', '', _)
                       for _ in translate_HBV_mutations(translate_set,
                                                        dictionary)]
    translated_list = [letter +
                       mutation for mutation in translated_list]
    translated_set = set(translated_list)
    return(translated_set)

## Translation of Category 1 (i.e. D16V) to Category 6 (i.e. p.Asp16Val)

Example use of translation_category1

In [7]:
mutation_type1_list = ['D16V', "E70G"]

In [8]:
translate_HBV_mutations_set(mutation_type1_list,
                            master_dictionary_mutations, "p.")

{'p.Asp16Val', 'p.Glu70Gly'}

## Translation of Category 3 (i.e. A47T) to Category 2 (i.e. c.314C>T)

Example use of translation_category3

In [9]:
mutation_type3_list = ['A100T', "G20C", "g105c"]

In [10]:
# First, remove the first character from string
mutation_type3_list = [e[1:] for e in mutation_type3_list]
# then add the letter that matched with the dictionary and ">"
# Then convert the remaining nucleotides
mutation_type3_intermediate1 = translate_HBV_mutations_type3(
    mutation_type3_list, ">", base_pair_mapping1)
# Convert lower case mutations to upper case
mutation_type3_intermediate2 = translate_HBV_mutations(
    mutation_type3_intermediate1, upper_to_lower_case)
# Then add "c."
translate_HBV_mutations_set(
    mutation_type3_intermediate2, upper_to_lower_case, "c.")

{'c.100A>T', 'c.105G>C', 'c.20G>C'}

## Translation of Category 4 (i.e. rtD16V) to Category 6 (i.e. p.Asp16Val)

Example use of translation_category4

In [11]:
mutation_type4_list = ['rtF90M', "rtP11W"]

In [12]:
# First, remove first two characters ("rt") from string
mutation_type4_list = [e[2:] for e in mutation_type4_list]
# Convert single letter AA to three letters AA
# Then add "p."
translate_HBV_mutations_set(mutation_type4_list,
                            master_dictionary_mutations, "p.")

{'p.Phe90Met', 'p.Pro11Trp'}

## Translation of Category 5 (i.e. Asp16Val) to Category 6 (i.e. p.Asp16Val)

Example use of translation_category5

In [13]:
mutation_type5_list = ['His27Phe', "Tyr62Glx"]

In [14]:
# Add "p."
translate_HBV_mutations_set(mutation_type5_list,
                            dictionary_3let_mutations, "p.")

{'p.His27Phe', 'p.Tyr62Glx'}