In [7]:
import pandas as pd
import random
from collections import Counter
from tqdm import tqdm
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import taxoniq
from Bio.SeqUtils import seq3
from Bio import Entrez

In [12]:
viruses_specs = pd.read_csv('../data/ms12all_all_virus.csv', index_col=0)
viruses_type = pd.read_csv('../data/taxid_virus_type.csv', index_col=0)
viruses_aa_freq = pd.read_csv('../data/aminoacid_freq_all_virus.csv')

In [13]:
viruses_specs.head()

Unnamed: 0,Mut,ObsNum,ExpNum,MutSpec,MutSpec_q05,MutSpec_median,MutSpec_q95,taxid
0,A>C,52.2396,1469.25,0.024132,0.018531,0.023911,0.028715,10990
1,A>G,253.879,1469.25,0.11728,0.104365,0.118346,0.125804,10990
2,A>T,38.2773,1469.25,0.017682,0.01312,0.017858,0.022347,10990
3,C>A,28.9588,616.17,0.031899,0.021166,0.033087,0.041969,10990
4,C>G,3.51939,616.17,0.003877,0.0,0.003464,0.007581,10990


In [14]:
viruses_type.head()

Unnamed: 0,taxid,species,host,gene,comment,Type
0,38170,Avian orthoreovirus__38170,,,,ds
1,1157337,Piscine orthoreovirus__1157337,,,,ds
2,351073,Mammalian orthoreovirus__351073,,,,ds
3,40054,Epizootic hemorrhagic disease virus__40054,,,,ds
4,10990,Rice black streaked dwarf virus__10990,Avena sativa,P1 protein,,ds


In [47]:
viruses_type[viruses_type['Type'] == '-']['taxid'].unique()

array(['113201', '113202', '162145', '12814', '186538', '1980486',
       '11620', '31604', '118655', '1980917', '12331', '38525', '1933178',
       '57482', '2034996', '2560743'], dtype=object)

In [16]:
viruses_type['taxid'] = viruses_type['taxid'].apply(str)

In [17]:
viruses_aa_freq.head()

Unnamed: 0,taxid,refseq_id,M,L,K,V,N,Q,A,I,...,S,G,T,D,R,H,W,E,C,X
0,10990,GCF_000852945.1,0.024014,0.101132,0.066222,0.061991,0.070771,0.032265,0.039141,0.070983,...,0.090976,0.037554,0.056702,0.062097,0.04168,0.024331,0.007405,0.056702,0.015762,0.0
1,11082,GCF_000861085.1,0.032028,0.087427,0.056481,0.078338,0.037654,0.02467,0.0818,0.050422,...,0.060593,0.088293,0.074659,0.044579,0.057996,0.020558,0.026401,0.059944,0.020342,0.0
2,11320,GCF_000865725.1,0.039465,0.081437,0.061808,0.055126,0.053247,0.038839,0.057214,0.06494,...,0.075799,0.065358,0.062644,0.04385,0.067237,0.017749,0.016496,0.074963,0.018167,0.0
3,1133363,GCF_004789575.1,0.03165,0.088671,0.074353,0.04848,0.056267,0.036925,0.049485,0.090681,...,0.077367,0.041949,0.061291,0.05476,0.040944,0.025119,0.011052,0.055514,0.030394,0.0
4,1157337,GCF_002829625.1,0.029342,0.094342,0.042105,0.076842,0.044474,0.034868,0.072763,0.058947,...,0.080526,0.061711,0.073947,0.062368,0.053026,0.018553,0.015132,0.043421,0.013421,0.0


In [18]:
d = {'C': 'Cys', 'D': 'Asp', 'S': 'Ser', 'Q': 'Gln', 'K': 'Lys',
     'I': 'Ile', 'P': 'Pro', 'T': 'Thr', 'F': 'Phe', 'N': 'Asn', 
     'G': 'Gly', 'H': 'His', 'L': 'Leu', 'R': 'Arg', 'W': 'Trp', 
     'A': 'Ala', 'V':'Val', 'E': 'Glu', 'Y': 'Tyr', 'M': 'Met'}

viruses_aa_freq.columns = [d[col] if col in d else col for col in viruses_aa_freq.columns]

In [19]:
viruses_aa_freq.head()

Unnamed: 0,taxid,refseq_id,Met,Leu,Lys,Val,Asn,Gln,Ala,Ile,...,Ser,Gly,Thr,Asp,Arg,His,Trp,Glu,Cys,X
0,10990,GCF_000852945.1,0.024014,0.101132,0.066222,0.061991,0.070771,0.032265,0.039141,0.070983,...,0.090976,0.037554,0.056702,0.062097,0.04168,0.024331,0.007405,0.056702,0.015762,0.0
1,11082,GCF_000861085.1,0.032028,0.087427,0.056481,0.078338,0.037654,0.02467,0.0818,0.050422,...,0.060593,0.088293,0.074659,0.044579,0.057996,0.020558,0.026401,0.059944,0.020342,0.0
2,11320,GCF_000865725.1,0.039465,0.081437,0.061808,0.055126,0.053247,0.038839,0.057214,0.06494,...,0.075799,0.065358,0.062644,0.04385,0.067237,0.017749,0.016496,0.074963,0.018167,0.0
3,1133363,GCF_004789575.1,0.03165,0.088671,0.074353,0.04848,0.056267,0.036925,0.049485,0.090681,...,0.077367,0.041949,0.061291,0.05476,0.040944,0.025119,0.011052,0.055514,0.030394,0.0
4,1157337,GCF_002829625.1,0.029342,0.094342,0.042105,0.076842,0.044474,0.034868,0.072763,0.058947,...,0.080526,0.061711,0.073947,0.062368,0.053026,0.018553,0.015132,0.043421,0.013421,0.0


In [20]:
viruses_aa_freq['taxid'].unique()

array([  10990,   11082,   11320, 1133363, 1157337,  118655,   11983,
         12110,   12162,   12637,   12814,  138948,  138950,  138951,
        162145, 1678143,  186538, 1933178,  198112,   28295,   28344,
       3052310, 3052493, 3052763,  351073,   38170,   38525,   40054,
         54290,   57482,  693997,  694014,   77763], dtype=int64)

In [21]:
viruses_specs['taxid'].unique()

array(['10990', '11082', '11320_1', '11320_2', '1133363', '1157337',
       '118655', '11983', '12110', '12162', '12637', '138948', '138950',
       '138951', '162145', '1678143', '1933178', '198112', '28295',
       '28344', '3052493', '3052763', '351073', '38170', '40054', '54290',
       '57482', '693997', '694014', '77763'], dtype=object)

In [22]:
viruses_type['taxid'].unique()

array(['38170', '1157337', '351073', '40054', '10990', '77763', '113201',
       '113202', '162145', '12814', '186538', '1980486', '11620', '31604',
       '1933309', '1980917', '12331', '38525', '1933178', '57482',
       '2034996', '2560743', '11103', '11983', '12637', '138948', '28344',
       '11082', '694014', '12110', '138950', '1678143', '28295', '12162',
       '138951', '693997', '1985356', '198112'], dtype=object)

In [23]:
viruses_aa_freq['taxid'] = viruses_aa_freq['taxid'].apply(str)

In [24]:
viruses_specs.loc[viruses_specs['taxid'] == '118655(1933309)','taxid'] = '118655'
viruses_type.loc[viruses_type['taxid'] == '1933309','taxid'] = '118655'

viruses_specs.loc[viruses_specs['taxid'] == '54290_1985356','taxid'] = '54290'
viruses_type.loc[viruses_type['taxid'] == '1985356','taxid'] = '54290'

In [25]:
codon_aa={'UUU':'Phe','UUC':'Phe','UUA':'Leu','UUG':'Leu','CUU':'Leu','CUC':'Leu','CUA':'Leu','CUG':'Leu','AUU':'Ile',
       'AUC':'Ile','AUA':'Ile','AUG':'Met','GUU':'Val','GUC':'Val','GUA':'Val','GUG':'Val','UCU':'Ser','UCC':'Ser',
       'UCA':'Ser','UCG':'Ser','CCU':'Pro','CCC':'Pro','CCA':'Pro','CCG':'Pro','ACU':'Thr','ACC':'Thr','ACA':'Thr',
       'ACG':'Thr','GCU':'Ala','GCC':'Ala','GCA':'Ala','GCG':'Ala','UAU':'Tyr','UAC':'Tyr','UAA':'STOP','UAG':'STOP',
       'CAU':'His','CAC':'His','CAA':'Gln','CAG':'Gln','AAU':'Asn','AAC':'Asn','AAA':'Lys','AAG':'Lys','GAU':'Asp',
       'GAC':'Asp','GAA':'Glu','GAG':'Glu','UGU':'Cys','UGC':'Cys','UGA':'STOP','UGG':'Trp','CGU':'Arg','CGC':'Arg',
       'CGA':'Arg','CGG':'Arg','AGU':'Ser','AGC':'Ser','AGA':'Arg','AGG':'Arg','GGU':'Gly','GGC':'Gly','GGA':'Gly',
       'GGG':'Gly'}

In [26]:
aa_counter = {
    'Pro' : [],
    'Ser' : [],
    'Ala' : [],
    'Thr' : [],
    'Leu' : [],
    'Phe' : [],
    'Val' : [],
    'Ile' : [],
    'Met' : [],
    'Arg' : [],
    'Cys' : [],
    'Trp' : [],
    'Gly' : [],
    'His' : [],
    'Gln' : [],
    'Tyr' : [],
    'Asp' : [],
    'Glu' : [],
    'Asn' : [],
    'Lys' : []
}

In [27]:
def create_genome(length=9999):
    generated_genome = ''.join([random.choice('AUGC') for n in range(length)])
    return generated_genome

In [28]:
#ref_data = pd.read_csv('../data/U_ideal_table.csv', index_col=0)

In [29]:
#ref_data = ref_data[ref_data['GenType'] == 'translated']

In [30]:
#ref_data = ref_data[['CodonNumber', 'RefCodon']].drop_duplicates()

In [31]:
#ref_data = ref_data[~(ref_data['RefCodon'].isin(['UGA', 'UAA', 'UAG']))]

In [32]:
#ref_codon_list = ref_data['RefCodon'].to_list()

In [33]:
#mutations = pd.read_csv('../data/All_mutation_information.csv.gz')
#mutations = mutations[mutations['(SBS) is coding'].isin([True, 'TRUE', 'TRUE, TRUE'])]

In [34]:
#all_df = mutations[mutations['base(s) in status 1'].isin(['A', 'T', 'G', 'C']) & mutations['base(s) in status 2'].isin(['A', 'T', 'G', 'C'])]

#all_df['aa_from'] = all_df['(SBS) AA change'].str.split(" ").str[1].str[0]
#all_df['aa_to'] = all_df['(SBS) AA change'].str.split(" ").str[1].str[0]
#all_df = all_df.reset_index(drop=True)
#all_df['counter'] = all_df.index
#all_df['from_to_nuc'] = all_df['base(s) in status 1'] + '>' + all_df['base(s) in status 2']
#data = all_df[['from_to_nuc', 'counter']].groupby(['from_to_nuc'], as_index=False).count()
#n_mut = int(sum(data['counter']))
#max_mut = int(max(data['counter']))
#data['counter'] = data['counter']/n_mut

In [35]:
#data['from_nuc'] = data['from_to_nuc'].str.split(">").str[0]
#data['to_nuc'] = data['from_to_nuc'].str.split(">").str[1]
#data['to_nuc'] = data['to_nuc'].str.replace('T','U')
#data['from_nuc'] = data['from_nuc'].str.replace('T','U')

In [36]:
#data

In [37]:
viruses_specs['from_to_nuc'] = viruses_specs['Mut']
viruses_specs['counter'] = viruses_specs['MutSpec']

viruses_specs['from_nuc'] = viruses_specs['from_to_nuc'].str.split(">").str[0]
viruses_specs['to_nuc'] = viruses_specs['from_to_nuc'].str.split(">").str[1]
viruses_specs['to_nuc'] = viruses_specs['to_nuc'].str.replace('T','U')
viruses_specs['from_nuc'] = viruses_specs['from_nuc'].str.replace('T','U')

In [38]:
viruses_specs = viruses_specs[['taxid', 'from_to_nuc', 'counter', 'from_nuc', 'to_nuc']]

In [39]:
viruses_specs.head()

Unnamed: 0,taxid,from_to_nuc,counter,from_nuc,to_nuc
0,10990,A>C,0.024132,A,C
1,10990,A>G,0.11728,A,G
2,10990,A>T,0.017682,A,U
3,10990,C>A,0.031899,C,A
4,10990,C>G,0.003877,C,G


In [40]:
def select_codon_number(codon_list):
    return random.choice(range(len(codon_list)))

In [41]:
def mutate_codon(codon_number, mutspec):
    nuc_number = random.choice(range(len(codon_list[codon_number])))
    
    nuc_spec = mutspec[mutspec['from_nuc'] == codon_list[codon_number][nuc_number]]
    pop = nuc_spec['to_nuc'].to_list()
    pop.append(codon_list[codon_number][nuc_number])
    
    wei = nuc_spec['counter'].to_list()
    wei.append(1 - sum(wei))
    
    new_nuc = random.choices(pop, weights=wei)[0]
    
    new_codon = codon_list[codon_number][:nuc_number] + new_nuc + codon_list[codon_number][nuc_number + 1:]
    
    if new_codon in ['UGA', 'UAA', 'UAG']:
        new_codon = codon_list[codon_number]
    
    codon_list[codon_number] = new_codon

In [42]:
def update_aa_dict(aa_counter, new_aa_counter):
    for key in aa_counter.keys():
        if key in new_aa_counter.keys():
            aa_counter[key].append(new_aa_counter[key])
        else:
            aa_counter[key].append(0)

In [45]:
viruses_g_l_dict = {
    'taxid' : [],
    'strand' : [],
    'gainers' : [],
    'loosers' : []
}

for tax in viruses_specs['taxid'].unique():
    aa_counter = {
    'Pro' : [],
    'Ser' : [],
    'Ala' : [],
    'Thr' : [],
    'Leu' : [],
    'Phe' : [],
    'Val' : [],
    'Ile' : [],
    'Met' : [],
    'Arg' : [],
    'Cys' : [],
    'Trp' : [],
    'Gly' : [],
    'His' : [],
    'Gln' : [],
    'Tyr' : [],
    'Asp' : [],
    'Glu' : [],
    'Asn' : [],
    'Lys' : []
    }
    
    virus_spec = viruses_specs[viruses_specs['taxid'] == tax]
    
    if tax in ('113201', '113202'):
        virus_aa_freq = viruses_aa_freq[viruses_aa_freq['taxid'] == '11320']
    else:
        virus_aa_freq = viruses_aa_freq[viruses_aa_freq['taxid'] == tax.split('_')[0]]
        
    print(tax)
    if tax in ('3052493', '1133363', '3052763'):
        strand = '-'
    else:
        virus_type = viruses_type[viruses_type['taxid'] == tax.replace('_','')]
        strand = virus_type['Type'].values[0]
        
    virus_name = virus_type['species'].values[0].split('__')[0]
    
    
    with PdfPages(f'../figures/simulations/mutation_of_aa_{tax}.pdf') as pdf:
        fig, axs = plt.subplots(5, 4, figsize=(20, 30))
        for _ in range(1):
            aa_counter = {
                'Pro' : [],
                'Ser' : [],
                'Ala' : [],
                'Thr' : [],
                'Leu' : [],
                'Phe' : [],
                'Val' : [],
                'Ile' : [],
                'Met' : [],
                'Arg' : [],
                'Cys' : [],
                'Trp' : [],
                'Gly' : [],
                'His' : [],
                'Gln' : [],
                'Tyr' : [],
                'Asp' : [],
                'Glu' : [],
                'Asn' : [],
                'Lys' : []
            }
            genome = create_genome(length=9999)
            codon_list = [genome[i:i+3] for i in range(0, len(genome), 3)]

            num_generations = 10000
            mut_in_gen = 100

            for gen in tqdm(range(num_generations)):
                new_dict = dict(Counter([codon_aa.get(item, item)  for item in codon_list]))

                update_aa_dict(aa_counter, new_dict)
                for mut in range(mut_in_gen):
                    codon_num_to_mut = select_codon_number(codon_list)

                    mutate_codon(codon_num_to_mut, mutspec=virus_spec)
            mutated_df = pd.DataFrame(aa_counter)
            mutated_df = mutated_df / len(codon_list)

            gainers = []
            loosers = []
            for aa in mutated_df.columns:
                last_freq = mutated_df[aa].tail(1).values[0]
                ref_freq = virus_aa_freq[aa].values[0]
                if last_freq>ref_freq:
                    gainers.append(aa)
                elif last_freq<ref_freq:
                    loosers.append(aa)
                    
            g_l = list(aa_counter.keys())
            column=0
            for aa_num in range(len(g_l)):
                aa = g_l[aa_num]
                ref_aa_count = virus_aa_freq[aa].values[0]
                if aa in gainers: 
                    colr = 'red'
                elif aa in loosers: 
                    colr = 'blue'

                if aa_num<=4:
                    axs[aa_num][column].plot(mutated_df[aa], color=colr, label=aa)
                    #axs[aa_num][column].legend(loc="upper right", fontsize=20)
                    axs[aa_num][column].yaxis.set_major_locator(plt.MaxNLocator(2))
                    axs[aa_num][column].yaxis.set_tick_params(labelsize=15)
                    axs[aa_num][column].axhline(y = ref_aa_count, color = 'black', linestyle = '--')
                elif aa_num>4 and aa_num<=9:
                    axs[aa_num-5][column+1].plot(mutated_df[aa], color=colr, label=aa)
                    #axs[aa_num-5][column+1].legend(loc="upper right", fontsize=20)
                    axs[aa_num-5][column+1].yaxis.set_major_locator(plt.MaxNLocator(2))
                    axs[aa_num-5][column+1].yaxis.set_tick_params(labelsize=15)
                    axs[aa_num-5][column+1].axhline(y = ref_aa_count, color = 'black', linestyle = '--')
                elif aa_num>9 and aa_num<=14:
                    axs[aa_num-10][column+2].plot(mutated_df[aa], color=colr, label=aa)
                    #axs[aa_num-10][column+2].legend(loc="upper right", fontsize=20)
                    axs[aa_num-10][column+2].yaxis.set_major_locator(plt.MaxNLocator(2))
                    axs[aa_num-10][column+2].yaxis.set_tick_params(labelsize=15)
                    axs[aa_num-10][column+2].axhline(y = ref_aa_count, color = 'black', linestyle = '--')
                elif aa_num>14:
                    axs[aa_num-15][column+3].plot(mutated_df[aa], color=colr, label=aa)
                    #axs[aa_num-15][column+3].legend(loc="upper right", fontsize=20)
                    axs[aa_num-15][column+3].yaxis.set_major_locator(plt.MaxNLocator(2))
                    axs[aa_num-15][column+3].yaxis.set_tick_params(labelsize=15)
                    axs[aa_num-15][column+3].axhline(y = ref_aa_count, color = 'black', linestyle = '--')

        axs[4][0].xaxis.set_tick_params(labelsize=15)
        axs[4][1].xaxis.set_tick_params(labelsize=15)
        axs[4][2].xaxis.set_tick_params(labelsize=15)
        axs[4][3].xaxis.set_tick_params(labelsize=15)
        plt.subplots_adjust(hspace=0)
        plt.subplots_adjust(wspace=0.15)
        
        for row in range(5):
            for column in range(4):
                handles, labels = axs[row][column].get_legend_handles_labels()
                by_label = dict(zip(labels, handles))
                axs[row][column].legend(by_label.values(), by_label.keys(), loc="upper right", fontsize=20)
                #axs[row][column].legend(loc="upper right", fontsize=20)

        plt.grid(False)
        fig.supylabel("Proportion of Amino Acid in the genome", x=0.08, size=20)
        fig.supxlabel("Number of Generations", y=0.09, size=20)
        fig.suptitle("Mutation of the amino acid composition of according to the mutation spectrum"+virus_name+" Genome type - "+strand, y=0.90, size=23)
        pdf.savefig(fig, bbox_inches='tight')
    plt.close()
    virus_g_l_dict = {
        'taxid' : tax,
        'strand' : strand,
        'gainers' : gainers,
        'loosers' : loosers
    }
    update_aa_dict(viruses_g_l_dict, virus_g_l_dict)
    
viruses_g_l_df = pd.DataFrame(viruses_g_l_dict)
viruses_g_l_df.to_csv('../data_obtain/viruses_g_l.csv')  

10990


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:39<00:00, 62.56it/s]


11082


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:41<00:00, 61.93it/s]


11320_1


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:42<00:00, 61.71it/s]


11320_2


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:42<00:00, 61.48it/s]


1133363


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:44<00:00, 60.86it/s]


1157337


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:43<00:00, 60.99it/s]


118655


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:45<00:00, 60.33it/s]


11983


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:45<00:00, 60.43it/s]


12110


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:45<00:00, 60.51it/s]


12162


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:45<00:00, 60.49it/s]


12637


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:45<00:00, 60.48it/s]


138948


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:45<00:00, 60.50it/s]


138950


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:46<00:00, 60.13it/s]


138951


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:47<00:00, 59.79it/s]


162145


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:47<00:00, 59.74it/s]


1678143


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:45<00:00, 60.31it/s]


1933178


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:46<00:00, 60.15it/s]


198112


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:46<00:00, 59.99it/s]


28295


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:41<00:00, 62.01it/s]


28344


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:43<00:00, 61.29it/s]


3052493


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:46<00:00, 60.19it/s]


3052763


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:46<00:00, 60.10it/s]


351073


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:45<00:00, 60.40it/s]


38170


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:44<00:00, 60.81it/s]


40054


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:44<00:00, 60.66it/s]


54290


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:46<00:00, 59.92it/s]


57482


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:47<00:00, 59.70it/s]


693997


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:46<00:00, 60.06it/s]


694014


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:45<00:00, 60.42it/s]


77763


100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [02:48<00:00, 59.39it/s]


In [None]:
viruses_specs['taxid'].unique()

In [None]:
gainers = ['Ser', 'Leu', 'Phe', 'Ile', 'Cys', 'Tyr']
loosers = ['Pro', 'Ala', 'Thr', 'Val', 'Met', 'Arg', 'Trp', 'Gly', 'His', 'Gln', 'Asp', 'Glu', 'Asn', 'Lys']
with PdfPages('../figures/mutation_of_aa.pdf') as pdf:
    fig, axs = plt.subplots(7, 3, figsize=(20, 30))

    for aa_num in range(len(loosers)):
        if aa_num <= 6:
            first = aa_num
            second = 1
        else:
            first = aa_num - 7
            second = 2
        aa = loosers[aa_num]
        if aa in gainers:
            axs[first][second].plot(mutated_df[aa], color='red', label=aa)
        elif aa in loosers:
            axs[first][second].plot(mutated_df[aa], color='blue', label=aa)
        else:
            axs[first][second].plot(mutated_df[aa], color='black', label=aa)

        axs[first][second].legend(loc="upper right", fontsize=20)
        axs[first][second].yaxis.set_major_locator(plt.MaxNLocator(2))
        axs[first][second].yaxis.set_tick_params(labelsize=15)
        
    for aa_num in range(len(gainers)):
        aa = gainers[aa_num]
        if aa in gainers:
            axs[aa_num][0].plot(mutated_df[aa], color='red', label=aa)
        elif aa in loosers:
            axs[aa_num][0].plot(mutated_df[aa], color='blue', label=aa)
        else:
            axs[aa_num][0].plot(mutated_df[aa], color='black', label=aa)

        axs[aa_num][0].legend(loc="upper right", fontsize=20)
        axs[aa_num][0].yaxis.set_major_locator(plt.MaxNLocator(2))
        axs[aa_num][0].yaxis.set_tick_params(labelsize=15)
        
    axs[5][0].xaxis.set_tick_params(labelsize=15)
    axs[6][1].xaxis.set_tick_params(labelsize=15)
    axs[6][2].xaxis.set_tick_params(labelsize=15)
    plt.subplots_adjust(hspace=0)
    plt.subplots_adjust(wspace=0.15)
    axs[6,0].set_axis_off()
        
    plt.grid(False)
    fig.supylabel("Proportion of Amino Acid in the genome", x=0.08, size=20)
    fig.supxlabel("Number of Generations", y=0.09, size=20)
    fig.suptitle("Mutation of the amino acid composition of SARS-CoV-2 according to the mutation spectrum", y=0.90, size=23)
    pdf.savefig(fig, bbox_inches='tight') 