##### Confirm All Modification Positions Match Reference
* Create positions file with header: `contig	pos	strand	ref_base	variants`

In [1]:
tmp_18S_mods = "/Users/andrewbailey/CLionProjects/personal/projects/ares_rRNA/mod_files/misc/18S_mods.bed"
tmp_25S_mods = "/Users/andrewbailey/CLionProjects/personal/projects/ares_rRNA/mod_files/misc/25S_mods.bed"


In [2]:
from signalalign.utils.sequenceTools import read_fasta

yeast_rrna_seq_path = "/Users/andrewbailey/CLionProjects/personal/projects/ares_rRNA/reference/yeast_25S_18S.fa" 


ref_seq = [(name, comment, seq) for name, comment, seq in read_fasta(yeast_rrna_seq_path)]
ssu_18S = ref_seq[0][2]
lsu_25S = ref_seq[1][2]


In [3]:
output_mods = "/Users/andrewbailey/CLionProjects/personal/projects/ares_rRNA/mod_files/yeast_18S_25S_mods.positions"

exp_bases = {'2’-O-Methyladenosine' : "A",
            '2’-O-Methylcytidine' : "C",
            '2’-O-Methylguanosine' : "G",
            '2’-O-Methyluridine' : "T",
            'C5-Methylcytidine' : "C",
            'N1-Methyladenosine' : "A",
            'N1-methyl-N3-aminocarboxypropyl_pseudouridine' : "T",
            'N3-Methyluridine' : "T",
            'N4-Acetylcytidine' : "C",
            'N6-N6-Dimethyladenosine' : "A",
            'N7-Methylguanosine' : "G",
            'Pseudouridine' : "T", 
            "2’-O-Methyluridine_pseudouridine": "T",
            "2'-O-methylated_pseudouridine_2’-O-Methyluridine" : "T"}

mod_bases = {'2’-O-Methyladenosine' : "a",
        '2’-O-Methylcytidine' : "b",
        '2’-O-Methylguanosine' : "c",
        '2’-O-Methyluridine' : "d",
        'C5-Methylcytidine' : "e",
        'N1-Methyladenosine' : "f",
        'N1-methyl-N3-aminocarboxypropyl_pseudouridine' : "g",
        'N3-Methyluridine' : "h",
        'N4-Acetylcytidine' : "i",
        'N6-N6-Dimethyladenosine' : "j",
        'N7-Methylguanosine' : "k",
        'Pseudouridine' : "l",
         "2'-O-methylated_pseudouridine" : "m"}


mod_variants = {'2’-O-Methyladenosine' : "Aa",
        '2’-O-Methylcytidine' : "Cb",
        '2’-O-Methylguanosine' : "Gc",
        '2’-O-Methyluridine' : "Td",
        'C5-Methylcytidine' : "Ce",
        'N1-Methyladenosine' : "Af",
        'N1-methyl-N3-aminocarboxypropyl_pseudouridine' : "Tg",
        'N3-Methyluridine' : "Th",
        'N4-Acetylcytidine' : "Ci",
        'N6-N6-Dimethyladenosine' : "Aj",
        'N7-Methylguanosine' : "Gk",
        'Pseudouridine' : "Tl", 
        "2'-O-methylated_pseudouridine_2’-O-Methyluridine": "Tmd"}

with open(output_mods, "w") as outfh:
    strand = "+"
    with open(tmp_18S_mods, 'r') as fh:
        contig = "RDN18-1"
        for line in fh:
            split_line = line.split()
            mod_name = split_line[0]
            pos = int(split_line[1]) - 1
            ref_base = ssu_18S[pos]
            exp_base = exp_bases[mod_name]
            variants = mod_variants[mod_name]
            if ref_base != exp_base:
                print("18S ERROR", mod_name, exp_base, pos, ref_base)
            else:
                print("{}\t{}\t{}\t{}\t{}".format(contig, pos, strand, ref_base, variants), file=outfh)

    #         assert mods[mod_name] == ssu_18S[position]
    #         print(["RDN18-1"] + split_line[1:2] + split_line[:1])

    with open(tmp_25S_mods, 'r') as fh:
        contig = "RDN25-1"
        for line in fh:
            split_line = line.split()
            mod_name = split_line[0]
            pos = int(split_line[1]) - 1
            ref_base = lsu_25S[pos]
            exp_base = exp_bases[mod_name]
            variants = mod_variants[mod_name]
            if ref_base != exp_base:
                print("25S ERROR", mod_name, exp_base, pos, ref_base)
            else:
                print("{}\t{}\t{}\t{}\t{}".format(contig, pos, strand, ref_base, variants), file=outfh)


## Create csv for secondary structure visualization 
The resulting CSV files encode modification information which can be visualized at [RiboVision](http://apollo.chemistry.gatech.edu/RiboVision/)


In [53]:
tmp_18S_mod_csv = "/Users/andrewbailey/CLionProjects/personal/projects/ares_rRNA/mod_files/misc/18S_mods.csv"
tmp_25S_mod_csv = "/Users/andrewbailey/CLionProjects/personal/projects/ares_rRNA/mod_files/misc/25S_mods.csv"
ribo_vision_18S = "/Users/andrewbailey/CLionProjects/personal/projects/ares_rRNA/ribovision/ribovison_18S_mods.csv"
ribo_vision_25S = "/Users/andrewbailey/CLionProjects/personal/projects/ares_rRNA/ribovision/ribovison_25S_mods.csv"

color_coding = {'2’-O-Methyladenosine' : "#008000",
            '2’-O-Methylcytidine' : "#008000",
            '2’-O-Methylguanosine' : "#008000",
            '2’-O-Methyluridine' : "#008000",
            'C5-Methylcytidine' : "#FFA500",
            'N1-Methyladenosine' : "#00FFFF",
            'N1-methyl-N3-aminocarboxypropyl_pseudouridine' : "#00FF00",
            'N3-Methyluridine' : "#0000FF",
            'N4-Acetylcytidine' : "#000080",
            'N6-N6-Dimethyladenosine' : "#800080",
            'N7-Methylguanosine' : "#FF00FF",
            'Pseudouridine' : '#FF0000', 
            "2’-O-Methyluridine_pseudouridine": "#FFFF00",
            "2'-O-methylated_pseudouridine_2’-O-Methyluridine" : "#808080"}

rna_exp_bases = {'2’-O-Methyladenosine' : "A",
            '2’-O-Methylcytidine' : "C",
            '2’-O-Methylguanosine' : "G",
            '2’-O-Methyluridine' : "U",
            'C5-Methylcytidine' : "C",
            'N1-Methyladenosine' : "A",
            'N1-methyl-N3-aminocarboxypropyl_pseudouridine' : "U",
            'N3-Methyluridine' : "U",
            'N4-Acetylcytidine' : "C",
            'N6-N6-Dimethyladenosine' : "A",
            'N7-Methylguanosine' : "G",
            'Pseudouridine' : "U", 
            "2’-O-Methyluridine_pseudouridine": "U",
            "2'-O-methylated_pseudouridine_2’-O-Methyluridine" : "U"}


In [52]:
# 18S 
data_des = "18S modification base landscape"
header = "resNum,DataCol,ColorCol,DataDescription"
res_num = "18S:{}"


with open(ribo_vision_18S, 'w') as fh:
    print(header, file=fh)
    with open(tmp_18S_mod_csv, "r") as fh2:
        header = fh2.readline()
        first_line = True
        for line in fh2:
            split_line = line.split(",")
            mod = split_line[0]
            pos = int(split_line[1])
            percent_mod = float(split_line[2])
            if first_line:
                print(",".join([res_num.format(str(pos)), rna_exp_bases[mod], str(color_coding[mod]), data_des]), file=fh)
            else:
                print(",".join([res_num.format(str(pos)), rna_exp_bases[mod], str(color_coding[mod])]), file=fh)
            first_line = False

In [51]:
# 25S
data_des = "25S modification base landscape"
header = "resNum,DataCol,ColorCol,FontWeight,DataDescription"
res_num = "25S:{}"


with open(ribo_vision_25S, 'w') as fh:
    print(header, file=fh)
    with open(tmp_25S_mod_csv, "r") as fh2:
        header = fh2.readline()
        first_line = True
        for line in fh2:
            split_line = line.split(",")
            mod = split_line[0]
            pos = int(split_line[1])
            percent_mod = float(split_line[2])
            if first_line:
                print(",".join([res_num.format(str(pos)), exp_bases[mod], str(color_coding[mod]), str(int(400+(500*(percent_mod/100)))), data_des]), file=fh)
            else:
                print(",".join([res_num.format(str(pos)), exp_bases[mod], str(color_coding[mod]), str(int(400+(500*(percent_mod/100))))]), file=fh)
            first_line = False

In [54]:
%matplotlib notebook

import matplotlib.patches as mpatches
import matplotlib.pyplot as plt

plt.figure(figsize=[11,11])
handles = []
for name, color in color_coding.items():
    handles.append(mpatches.Patch(color=color, label=name))
plt.legend(handles=handles, prop={'size': 20})

# plt.savefig("/Users/andrewbailey/CLionProjects/personal/projects/ares_rRNA/ribovision/mod_color_legend.png")
plt.show()


<IPython.core.display.Javascript object>

## Create SA baseline Models


In [None]:
from signalalign.hiddenMarkovModel import HmmModel
original_rna_model = "/Users/andrewbailey/CLionProjects/personal/projects/ares_rRNA/models/testModelR9p4_5mer_acgt_RNA_180mv.model"
tmp_rna_model = "/Users/andrewbailey/CLionProjects/personal/projects/ares_rRNA/models/rna_r94_5mer_{}.model"

new_variants = "abcdefghijklm"
replacement_bases = "ACGTCATTCAGTT"
alphabet = "ACGT"
current_model_path = original_rna_model
for variant, rep_base in zip(new_variants, replacement_bases):
    alphabet += variant
    rna_model = HmmModel(current_model_path, rna=True)
    print(alphabet)
    rna_model.write_new_model(tmp_rna_model.format(alphabet), alphabet, rep_base)
    current_model_path = tmp_rna_model.format(alphabet)



In [None]:
from signalalign.hiddenMarkovModel import HmmModel
original_rna_model = "/Users/andrewbailey/CLionProjects/personal/projects/ares_rRNA/models/testModelR9p4_5mer_acgt_RNA_180mv.model"
tmp_rna_model = "/Users/andrewbailey/CLionProjects/personal/projects/ares_rRNA/models/rna_r94_5mer_{}_noise.model"

new_variants = "abcdefghijklm"
replacement_bases = "ACGTCATTCAGTT"
alphabet = "ACGT"
current_model_path = original_rna_model
for variant, rep_base in zip(new_variants, replacement_bases):
    alphabet += variant
    rna_model = HmmModel(current_model_path, rna=True)
    print(alphabet)
    rna_model.write_new_model(tmp_rna_model.format(alphabet), alphabet, rep_base,  noise=0.1)
    current_model_path = tmp_rna_model.format(alphabet)

