In [1]:
import structure as st
from tkinter import filedialog
import customtkinter as ctk
import re
import os
import ctypes
from ctypes import wintypes

In [2]:
def get_desktop_path():
    CSIDL_DESKTOP = 0x0000  # Desktop folder constant
    SHGFP_TYPE_CURRENT = 0
    buf = ctypes.create_unicode_buffer(wintypes.MAX_PATH)
    ctypes.windll.shell32.SHGetFolderPathW(None, CSIDL_DESKTOP, None, SHGFP_TYPE_CURRENT, buf)
    return buf.value
    
def open_file():
    file_path = filedialog.askopenfilename(filetypes=[("FASTA files", "*.fasta"), ("Text files", "*.txt")])
    if file_path:
        valid_characters=['A', 'G', 'C', 'T','U']
        cds=st.fasta_to_list(fasta_dir=file_path,seq_to_codon=False,sos_eos=False)[0].upper()
        seq_entry.delete('1.0',ctk.END)
        if not set(cds) in valid_characters:
            cds=''.join([c for c in cds if c in valid_characters])
        seq_entry.insert('1.0',cds)
        result, gc_initial, gc_converted= change_to_c(cds = cds, path = file_path)  
        gc_label_initial.configure(text=f'GC content: {gc_initial}%')  
        # gc_label_converted.configure(text=f'GC content: {gc_converted}%')
        # result_entry.configure(state=ctk.NORMAL)
        # result_entry.delete("1.0", ctk.END)
        # result_entry.insert('1.0',result) 
        # result_entry.configure(state=ctk.DISABLED)
        # save_label.configure(text=f'Converted Sequence Saved in:\n {path_saved}')
        codon_seperation()
            
def submit_sequence():
    sequence = seq_entry.get("1.0", ctk.END)
    cleaned_text = sequence.strip().replace('\n','')
    valid_characters=['A', 'C', 'G', 'T','U']
    if not set(cleaned_text.upper()).issubset(valid_characters):
        cds=''.join([c for c in cleaned_text if c.upper() in valid_characters])
    else:
        cds=cleaned_text.upper()
    seq_entry.delete('1.0',ctk.END)
    seq_entry.insert('1.0',cds)
    alternative_ccc()
    codon_seperation()

def codon_seperation():
    if check_var.get()==1:
        entry_sequence = seq_entry.get("1.0", ctk.END)
        seq_entry.delete('1.0',ctk.END)
        only_letters = re.sub(r'[^A-Za-z]', '', entry_sequence)
        cds = st.seq_to_cds([only_letters],sep=' ',sos_eos=False,remove_stop=False)[0] 
        seq_entry.insert('1.0',cds)
        gc_initial = gc_content(cds)
        gc_label_initial.configure(text=f'GC content: {gc_initial}%')
 
        result_entry.configure(state=ctk.NORMAL)
        result_seq = result_entry.get("1.0", ctk.END)
        result_entry.delete('1.0',ctk.END)
        only_letters = re.sub(r'[^A-Za-z]', '', result_seq)
        cds = st.seq_to_cds([only_letters],sep=' ',sos_eos=False,remove_stop=False)[0] 
        result_entry.insert('1.0',cds)
        result_entry.configure(state=ctk.DISABLED)
        gc_converted = gc_content(cds)
        gc_label_converted.configure(text=f'GC content: {gc_converted}%')
    else:
        sequence = seq_entry.get("1.0", ctk.END)
        seq_entry.delete('1.0',ctk.END)
        only_letters = re.sub(r'[^A-Za-z]', '', sequence)
        cds = only_letters.replace(' ','')
        seq_entry.insert('1.0',cds)
        gc_initial = gc_content(cds)
        gc_label_initial.configure(text=f'GC content: {gc_initial}%')
        
        result_entry.configure(state=ctk.NORMAL)
        result_seq = result_entry.get("1.0", ctk.END)
        result_entry.delete('1.0',ctk.END)
        only_letters = re.sub(r'[^A-Za-z]', '', result_seq)
        cds = only_letters.replace(' ','')
        result_entry.insert('1.0',cds)
        result_entry.configure(state=ctk.DISABLED)
        gc_converted = gc_content(cds)
        gc_label_converted.configure(text=f'GC content: {gc_converted}%')

'''I seperated the change_to_c function from others because in "CCC to CCG" and "No three Cs in a row", the wobbling position of the input seq
should be first changed to c and after that the desired change is implemented'''

def change_to_c(cds=None,path=None):
    only_letters = re.sub(r'[^AGCTUagctu]', '', cds)
    cds = st.seq_to_cds([only_letters],sep=' ',sos_eos=False,remove_stop=False)[0].upper()
    gc_initial = gc_content(only_letters)
    cds = cds.split(' ')
    exception_cds=['ATG','TTA','TTG','AAA','AAG','GAA','GAG','CAA','CAG','AGA','AGG','TGG','TAA','TAG','TGA','AUG','UUA','UUG','UGG','UAA','UAG','UGA']
    for i in range(len(cds)):
        if cds[i] not in exception_cds:
            cds[i]=cds[i][:-1]+'C'
    cds = ''.join(cds)    
    gc_converted = gc_content(cds)
    return cds, gc_initial, gc_converted

def alternative_ccc():
    sequence = seq_entry.get("1.0", ctk.END)
    seq_entry.delete('1.0',ctk.END)
    only_letters = re.sub(r'[^A-Za-z]', '', sequence)
    cds = only_letters.replace(' ','')
    seq_entry.insert('1.0',cds)
    exception_cds=['AGC','AAC','AUC','CAC','UGC','UAC','UUC','GAC','ATC','TGC','TAC','TTC']
    if radio_var.get()==0:
        cds_seq, gc_initial, gc_converted=change_to_c(cds)
    elif radio_var.get()==1:
        cds, gc_initial, gc_converted=change_to_c(cds)
        cds_seq = st.seq_to_cds([cds],sep=' ',sos_eos=False,remove_stop=False)[0].upper()
        cds_seq=cds_seq.replace('CCC','CCG').replace(' ','') 
        gc_converted = gc_content(cds_seq)
    else:
        cds, gc_initial, gc_converted=change_to_c(cds)
        lst=(st.seq_to_cds([cds],sos_eos=False)[0].upper()).split()
        frame1=[] #XXC
        frame2=[] #XCC
        for i,v in enumerate(lst):
            if v[-1]=='C'and v[1]!='C':
                frame1.append(i)
            if v[0]!='C'and v[1:]=='CC':
                frame2.append(i)     
        for i in frame1:
            if i<len(lst)-1 and lst[i] not in exception_cds:
                if lst[i+1][0:2]=='CC':
                    lst[i]=lst[i][:-1]+'G'
        for i in frame2:
            if i<len(lst)-1 and lst[i] not in exception_cds:
                if lst[i+1][0]=='C':
                    lst[i]=lst[i][:-1]+'G'
        cds_seq=' '.join(lst).replace('CCC','CCG').replace(' ','')     
        gc_converted = gc_content(cds_seq)
    
    gc_label_initial.configure(text=f'GC content: {gc_initial}%')  
    gc_label_converted.configure(text=f'GC content: {gc_converted}%')
    
    result_entry.configure(state=ctk.NORMAL)
    result_entry.delete("1.0", ctk.END)
    result_entry.insert('1.0',cds_seq.upper()) 
    result_entry.configure(state=ctk.DISABLED)
    codon_seperation()

def gc_content(cds):
    if len(cds.replace(' ',''))!=0:
        gc=round((cds.count('C')+cds.count('G')+cds.count('c')+cds.count('g'))/len(cds.replace(' ',''))*100,1)
    else:
        gc=0
    return gc   
    
ctk.set_appearance_mode('dark')
ctk.set_default_color_theme('blue')

root = ctk.CTk()
root.title("Wobble to C converter")
root.geometry('600x600')

root.rowconfigure((1,4),weight=4, uniform = 'a')
root.rowconfigure((2,5),weight=6, uniform = 'a')
root.rowconfigure((0,3,6,7),weight=3, uniform = 'a')
root.columnconfigure((0,1,2),weight=1, uniform = 'a')

file_button = ctk.CTkButton(root, text="Select FASTA or TEXT File", command=open_file)
file_button.grid(row = 0, column = 0, padx = 5, pady = 5, sticky = 'w')

seq_label = ctk.CTkLabel(root, text="Or enter CDS sequence below",text_color='gray')
seq_label.grid(row = 1, column = 0, padx = 5, pady = 5, sticky = 'sw')

# Checkbox
check_var=ctk.IntVar()
check_box=ctk.CTkCheckBox(root,text='seperation based on codon',variable=check_var,command=codon_seperation)
check_box.grid(row = 1, column =1, padx = 5, pady = 5, sticky = 'sw')# Checkbox

radio_var=ctk.IntVar()

# The wobbling position to C
radio_var_C=ctk.CTkRadioButton(root,text='The wobbling position to C',variable=radio_var,command=alternative_ccc,value=0)
radio_var_C.grid(row =1 , column =2, padx = 5, pady = 5, sticky = 'nw')

# Change CCC codons to CCG
radio_var_CCC=ctk.CTkRadioButton(root,text='Change CCC codons to CCG',variable=radio_var,command=alternative_ccc,value=1)
radio_var_CCC.grid(row =1 , column =2, padx = 5, pady = 5, sticky = 'w')

# No three Cs in a row
radio_var_frame=ctk.CTkRadioButton(root,text='No three Cs in a row',variable=radio_var,command=alternative_ccc,value=2)
radio_var_frame.grid(row =1 , column =2, padx = 5, pady = 5, sticky = 'sw')

# Input Textbox
seq_entry = ctk.CTkTextbox(root)
seq_entry.grid(row=2,column=0, sticky = 'nsew', padx = 5, pady = 5,columnspan=3)

#GC-content for initial codons
gc_label_initial=ctk.CTkLabel(root,text='')
gc_label_initial.grid(row=3,column=1, pady = 5, sticky = 'n')

# Submit button
submit_button = ctk.CTkButton(root, text="Submit", command=submit_sequence)
submit_button.grid(row = 3, column = 0, padx = 5, pady = 5, sticky = 'nw')

#GC-content for initial codons
Converted_seq_label=ctk.CTkLabel(root,text='Converted seq',text_color='gray')
Converted_seq_label.grid(row=4,column=0, pady = 5, sticky = 'sw')

# Result Textbox
result_entry = ctk.CTkTextbox(root)
result_entry.grid(row=5,column=0, sticky = 'nsew', padx = 5, pady = 5,columnspan=3)

# GC-content for converted codons
gc_label_converted=ctk.CTkLabel(root,text='')
gc_label_converted.grid(row=6,column=1, pady = 5, sticky = 'n')

# save label
save_label = ctk.CTkLabel(root,text='',text_color='gray')
save_label.grid(row=6,column=1,pady=10, sticky = 's')

# save label
developer_label = ctk.CTkLabel(root,text='barzegar@postech.ac.kr',text_color='gray')
developer_label.grid(row=7,column=1,pady=10, sticky = 's')

# Run the GUI
root.mainloop()
