In [1]:
# color a structure by differances from the referance sequence
# e.g. VACV K3 is gray where conserved with eIF2a and purple where sites differ

In [2]:
from Bio import SeqIO
import os

In [13]:
# Parse the FASTA file and store the headers and sequences in a dictionary
def fasta_to_dict(input_fasta):
    # make seq dict
    seq_dict = {}
    for record in SeqIO.parse(input_fasta, "fasta"):
        header = record.id  # Extract the header (up to the first space in the description)
        sequence = str(record.seq)  # Convert the sequence object to a string
        seq_dict[header] = sequence
    return seq_dict

def color_structure(input_alignment, input_dict, structure_name, ref_seq_name, output_dir, depiction='surface'):
    seq_dict = fasta_to_dict(input_alignment)
        
    structure_seq = seq_dict[structure_name]
    ref_seq = seq_dict[ref_seq_name]
    
    dif_list = []
    aa = 1
    for struct_aa,ref_aa in zip(structure_seq, ref_seq):
        if struct_aa == '-':
            continue
        if struct_aa != ref_aa:
            dif_list.append(str(aa))
        aa += 1

    struct_id = input_dict[structure_name]['id']
    struct_color = input_dict[structure_name]['color']
    ref_color = input_dict[ref_seq_name]['color']
    
    print(f"# ChimeraX Structure: {structure_name} ID:{input_dict[structure_name]['id']}")
    print(f"view 1; show #{struct_id}.1 models; hide #{struct_id} cartoon,atoms,surface;")
    print(f"show #{struct_id} {depiction}; color #{struct_id}/A #6dc091 {depiction};")
    print(f"color #{struct_id}/B {struct_color} {depiction};")
    print(f"color #{struct_id}/B:{','.join(dif_list)} {ref_color} {depiction};")
    
    img_file = f"{ref_seq_name}-differences_{structure_name}.png"
    output_img = os.path.join(output_dir, img_file)
    print(f"save {output_img} width 900 height 900 transparentBackground true supersample 20;")
    print()
    print(f"hide #{struct_id} models;")
    print()

In [14]:
# 
input_file = "k3-orthologs.aln.fa"
input_dict = {
    'eIF2α':{'color':'gray','id':'1'},
    'VACV':{'color':'#786BAC', 'id':'2'},
    'VARV':{'color':'#EC615E', 'id':'3'},
    'TPV':{'color':'#F4B681','id':'5'},
    'MYXV':{'color':'#6BB3FC','id':'4'},
    'RCV':{'color':'#A8D39B','id':'6'}
}

color_structure(input_file, input_dict, 'VACV', 'eIF2α', 'img/', depiction='surface')

# ChimeraX Structure: VACV ID:2
view 1; show #2.1 models; hide #2 cartoon,atoms,surface;
show #2 surface; color #2/A #6dc091 surface;
color #2/B #786BAC surface;
color #2/B:1,2,3,4,5,6,7,8,10,11,12,15,16,17,18,20,21,22,23,24,25,26,28,29,31,32,34,35,36,38,39,40,41,42,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,61,62,63,64,66,72,73,79,80,81,82,84,85,86,87,88 gray surface;
save img/eIF2α-differences_VACV.png width 900 height 900 transparentBackground true supersample 20;

hide #2 models;



In [41]:
# Show VACV
cartoon_color_dif("VACV")

# Differences for other K3 Orthologs - Cartoon
for k3 in k3_orthologs:
    vacv_seq = k3_dict['VACV']['seq']
    k3_seq = k3_dict[k3]['seq']

    aa = 0
    dif_list = []
    for vacv_aa,k3_aa in zip(vacv_seq, k3_seq):
        if k3_aa == '-':
            continue
        aa += 1
        if vacv_aa != k3_aa:
            dif_list.append(str(aa))

    # Show differences from VACV
    cartoon_color_dif(k3)

# ChimeraX K3 color: VACV
view 1; show #2.1 models; hide #2 surface; show #2 cartoon; color #2/A #6dc091 cartoon;
color #2/B #786BAC cartoon;
color #2/B:1,2,3,4,5,6,7,8,9,10,13,14,18,19,20,23,24,25,26,27,28,29,30,31,32,33,35,36,37,38,39,41,42,43,45,46,47,49,50,52,53,55,56,57,58,59,60,61,63,64,65,66,68,69,70,71,72,73,75,79,80,84,86,87,88,89,90,91,92,93,94,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233 #786BAC cartoon;
save /Volumes/data/sadhu_lab/projects/dms_pkr_k3-orthologs/data/chimerax/img/k

In [15]:
def surface_color_dif(k3):
    vacv_seq = k3_dict['VACV']['seq']
    vacv_color = k3_dict['VACV']['color']
    k3_color = k3_dict[k3]['color']
    k3_id = k3_dict[k3]['id']
    
    print(f"# ChimeraX K3 color: {k3}")
    print(f"view 3; show #{k3_id}.1 models; hide #{k3_id}/A surface; hide #{k3_id} cartoon; show #{k3_id}/B surface;")
    print(f"color #{k3_id}/B {vacv_color} surface;")
    print(f"color #{k3_id}/B:{','.join(dif_list)} {k3_color} surface;")
    
    img_file = f"surface-k3-differences_{k3}.png"
    img_path = os.path.join(img_dir, img_file)
    print(f"save {img_path} width 900 height 900 transparentBackground true supersample 10;")
    print(f"\nhide #{k3_id} models;\n")

In [16]:
# Show VACV
surface_color_dif("VACV")

# Differences for other K3 Orthologs - Cartoon
for k3 in k3_orthologs:
    vacv_seq = k3_dict['VACV']['seq']
    k3_seq = k3_dict[k3]['seq']

    aa = 0
    dif_list = []
    for vacv_aa,k3_aa in zip(vacv_seq, k3_seq):
        if k3_aa == '-':
            continue
        aa += 1
        if vacv_aa != k3_aa:
            dif_list.append(str(aa))

    # Show differences from VACV
    surface_color_dif(k3)

NameError: name 'k3_dict' is not defined

In [None]:
# try coloring by similarity to eIF2a

def surface_color_dif(k3):
    vacv_seq = k3_dict['VACV']['seq']
    vacv_color = k3_dict['VACV']['color']
    k3_color = k3_dict[k3]['color']
    k3_id = k3_dict[k3]['id']
    
    print(f"# ChimeraX K3 color: {k3}")
    print(f"view 3; show #{k3_id}.1 models; hide #{k3_id}/A surface; hide #{k3_id} cartoon; show #{k3_id}/B surface;")
    print(f"color #{k3_id}/B {vacv_color} surface;")
    print(f"color #{k3_id}/B:{','.join(dif_list)} {k3_color} surface;")
    
    img_file = f"surface-k3-differences_{k3}.png"
    img_path = os.path.join(img_dir, img_file)
    print(f"save {img_path} width 900 height 900 transparentBackground true supersample 10;")
    print()
    print(f"hide #{k3_id} models;")
    print()

# Show VACV
surface_color_dif("VACV")

# Differences for other K3 Orthologs - Cartoon
for k3 in k3_orthologs:
    vacv_seq = k3_dict['VACV']['seq']
    k3_seq = k3_dict[k3]['seq']

    aa = 0
    dif_list = []
    for vacv_aa,k3_aa in zip(vacv_seq, k3_seq):
        if k3_aa == '-':
            continue
        aa += 1
        if vacv_aa != k3_aa:
            dif_list.append(str(aa))

    # Show differences from VACV
    surface_color_dif(k3)