In [3]:
import numpy as np
import os
print(os.getcwd())
os.chdir('/usr/users/fatma.chafra01/ColabDesign')
print(os.getcwd())
#can't find colabdesign script if not in ColabDesign main directory
from colabdesign.af.alphafold.common import residue_constants

def find_rows_without_value(arr, value):
    # Check each row for the presence of the value
    rows_with_value = np.any(np.isclose(arr, value), axis=1)

    # Get the indices of rows that don't have the value
    rows_without_value = np.where(~rows_with_value)[0]

    return rows_without_value

def generate_bias_matrix(seq, fix_pos, to_be_mutated=None, outfile=None, verbose=0):
    '''
    seq string of the amino acid sequence that is to be used (after the flanking non-structure associated regions are removed)
    fix_pos list of regions (tuples) to be kept constant in the seq (not zero indexed!)
    to_be_mutated list of amino acid strings (single letter) to mutate the non-fix position residues (in order of the non-fix position residues in seq)
    outfile absolute path and filename for the file to be saved in (optional)
    '''
    binder_len = len(seq)
    # get a bias matrix of the dimensions (no of rows: binder length, no of cols: all 20 amino acids as a one hot)
    bias = np.zeros((binder_len,20))
    for item in fix_pos:
        start = item[0] -1
        end = item[1] -1
        if verbose==1: print(start, end)
        while start <= end:
            aa = seq[start]
            if verbose==1: print(start, aa)
            bias[start,residue_constants.restype_order[str(aa)]] = 1e8
            if verbose==1: print(f'bias added to:{start} as {aa}')
            start += 1
            # because the index changed once the pdb file was truncated
            # bias[start-19,residue_constants.restype_order[str(aa)]] = 1e8
            # order comes from residue_constants.restype_order[str(aa)]
    rows_to_change = find_rows_without_value(bias, 1.0e+08)
    if verbose==1: 
        print('bias matrix initial', bias)
        print('rows_to_change:', rows_to_change)
    
    if to_be_mutated != None:
        index_to_be_mutated = [residue_constants.restype_order[i] for i in to_be_mutated]
        if verbose==1: 
            print('mutation to:', to_be_mutated)
            print('index to be mutated:', index_to_be_mutated)
        if len(rows_to_change) != len(index_to_be_mutated):
            print('Number of positions to mutate and number of specified amino acids do not match!')
            return 
        for i in range(len(rows_to_change)):
            bias[rows_to_change[i],index_to_be_mutated[i]] = 1.0e+08


    if verbose==1: print('bias matrix end', bias)
    # Save the matrix to a CSV file
    if outfile != None: 
        np.savetxt(outfile, bias, delimiter=",", fmt="%.2f")
        print(f'File saved in {outfile}')
    return bias

/home/mpg01/MBPC/fatma.chafra01/ColabDesign/af/examples
/home/mpg01/MBPC/fatma.chafra01/ColabDesign


In [4]:
# can only enter a single type of aa to be mutated but this function works for sure, the other needs to be checked again!!
def generate_bias_matrix1(seq, fix_pos, to_be_mutated=None, outfile=None, verbose=0):
    '''
    seq string of the amino acid sequence that is to be used (after the flanking non-structure associated regions are removed)
    fix_pos list of regions (tuples) to be kept constant in the seq (not zero indexed!)
    to_be_mutated amino acid string (single letter) to mutate the non-fix position residues
    outfile absolute path and filename for the file to be saved in (optional)
    '''
    binder_len = len(seq)
    # get a bias matrix of the dimensions (no of rows: binder length, no of cols: all 20 amino acids as a one hot)
    bias = np.zeros((binder_len,20))
    for item in fix_pos:
        start = item[0] -1
        end = item[1] -1
        if verbose==1: print(start, end)
        while start <= end:
            aa = seq[start]
            if verbose==1: print(start, aa)
            bias[start,residue_constants.restype_order[str(aa)]] = 1e8
            if verbose==1: print(f'bias added to:{start} as {aa}')
            start += 1
            # because the index changed once the pdb file was truncated
            # bias[start-19,residue_constants.restype_order[str(aa)]] = 1e8
            # order comes from residue_constants.restype_order[str(aa)]
    rows_to_change = find_rows_without_value(bias, 1.0e+08)
    if verbose==1: 
        print('bias matrix initial', bias)
        print('rows_to_change:', rows_to_change)
    
    if to_be_mutated != None:
        index_to_be_mutated = residue_constants.restype_order[to_be_mutated]
        if verbose==1: 
            print('mutation to:', to_be_mutated)
            print('index to be mutated:', index_to_be_mutated)
        #if len(rows_to_change) != len(index_to_be_mutated):
            #print('Number of positions to mutate and number of specified amino acids do not match!')
            #return 
        for i in range(len(rows_to_change)):
            bias[rows_to_change[i],index_to_be_mutated] = 1.0e+08


    if verbose==1: print('bias matrix end', bias)
    # Save the matrix to a CSV file
    if outfile != None: 
        np.savetxt(outfile, bias, delimiter=",", fmt="%.2f")
        print(f'File saved in {outfile}')
    return bias

In [6]:
seq ='VQLVESGGGLVQPGGSLRLSCTTSTSLFSITTMGWYRQAPGKQRELVASIKRGGGTNYADSMKGRFTISRDNARNTVFLEMNNLTTEDTAVYYCNAAILAYTGEVTNYWGQGTQVTV'
# testing to change two positions 1st and 101st row
fix_pos = [(2,100),(102,117)]
to_be_mutated = ['A','R']

In [7]:
test1 = generate_bias_matrix(seq=seq, fix_pos=fix_pos)
print(test1[[0,100],:])
print(find_rows_without_value(test1, 1e8))

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[  0 100]


In [8]:
test2 = generate_bias_matrix(seq=seq, fix_pos=fix_pos, to_be_mutated=to_be_mutated)
print(test2[[0,100],:])
print(find_rows_without_value(test2, 1e8))

[[1.e+08 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00
  0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00]
 [0.e+00 1.e+08 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00
  0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00 0.e+00]]
[]


In [33]:
test3 = generate_bias_matrix1(seq=seq, fix_pos=[(1,1),(3,117)], to_be_mutated='A',outfile='/usr/users/fatma.chafra01/ColabDesign/af/examples/bias_matrix/test.csv', verbose=1)

0 0
0 V
bias added to:0 as V
2 116
2 L
bias added to:2 as L
3 V
bias added to:3 as V
4 E
bias added to:4 as E
5 S
bias added to:5 as S
6 G
bias added to:6 as G
7 G
bias added to:7 as G
8 G
bias added to:8 as G
9 L
bias added to:9 as L
10 V
bias added to:10 as V
11 Q
bias added to:11 as Q
12 P
bias added to:12 as P
13 G
bias added to:13 as G
14 G
bias added to:14 as G
15 S
bias added to:15 as S
16 L
bias added to:16 as L
17 R
bias added to:17 as R
18 L
bias added to:18 as L
19 S
bias added to:19 as S
20 C
bias added to:20 as C
21 T
bias added to:21 as T
22 T
bias added to:22 as T
23 S
bias added to:23 as S
24 T
bias added to:24 as T
25 S
bias added to:25 as S
26 L
bias added to:26 as L
27 F
bias added to:27 as F
28 S
bias added to:28 as S
29 I
bias added to:29 as I
30 T
bias added to:30 as T
31 T
bias added to:31 as T
32 M
bias added to:32 as M
33 G
bias added to:33 as G
34 W
bias added to:34 as W
35 Y
bias added to:35 as Y
36 R
bias added to:36 as R
37 Q
bias added to:37 as Q
38 A
bias

In [46]:
# generate different fix position lists
single_mutation_ranges = []
for i in range(1, 117):
    if i == 1:
        single_mutation_ranges.append([(2, 117)])
    #elif i == 116:
        #single_mutation_ranges.append([(1, 116)])
    else:
        single_mutation_ranges.append([(1, i-1), (i + 1, 117)])
# have to append the last range because otherwise doesn't include the last mutation
single_mutation_ranges.append([(1,116)])

print(single_mutation_ranges)

# convert this to a function
def generate_single_mutation_range(seq_length, verbose=0):
    single_mutation_ranges = []
    for i in range(1, seq_length):
        if i == 1:
            single_mutation_ranges.append([(2, seq_length)])
        #elif i == 116:
            #single_mutation_ranges.append([(1, 116)])
        else:
            single_mutation_ranges.append([(1, i-1), (i + 1, seq_length)])
    # have to append the last range because otherwise doesn't include the last mutation
    single_mutation_ranges.append([(1,seq_length -1)])

    if verbose ==1: print(single_mutation_ranges)
    return single_mutation_ranges



[[(2, 117)], [(1, 1), (3, 117)], [(1, 2), (4, 117)], [(1, 3), (5, 117)], [(1, 4), (6, 117)], [(1, 5), (7, 117)], [(1, 6), (8, 117)], [(1, 7), (9, 117)], [(1, 8), (10, 117)], [(1, 9), (11, 117)], [(1, 10), (12, 117)], [(1, 11), (13, 117)], [(1, 12), (14, 117)], [(1, 13), (15, 117)], [(1, 14), (16, 117)], [(1, 15), (17, 117)], [(1, 16), (18, 117)], [(1, 17), (19, 117)], [(1, 18), (20, 117)], [(1, 19), (21, 117)], [(1, 20), (22, 117)], [(1, 21), (23, 117)], [(1, 22), (24, 117)], [(1, 23), (25, 117)], [(1, 24), (26, 117)], [(1, 25), (27, 117)], [(1, 26), (28, 117)], [(1, 27), (29, 117)], [(1, 28), (30, 117)], [(1, 29), (31, 117)], [(1, 30), (32, 117)], [(1, 31), (33, 117)], [(1, 32), (34, 117)], [(1, 33), (35, 117)], [(1, 34), (36, 117)], [(1, 35), (37, 117)], [(1, 36), (38, 117)], [(1, 37), (39, 117)], [(1, 38), (40, 117)], [(1, 39), (41, 117)], [(1, 40), (42, 117)], [(1, 41), (43, 117)], [(1, 42), (44, 117)], [(1, 43), (45, 117)], [(1, 44), (46, 117)], [(1, 45), (47, 117)], [(1, 46), (48

In [47]:
test_range = generate_single_mutation_range(50)
print(len(test_range))

50


In [43]:
aa_list = ['A','R','N','D','C','Q','E','G','H','I','L','K','M','F','P','S','T','W','Y','V']
print(len(aa_list))
test_ranges = [[(2, 117)], [(1, 1), (3, 117)]]
#for i in range(len(test_ranges)):
    #print('range:', test_ranges[i])
for i in range(len(single_mutation_ranges)):
    print('range:', single_mutation_ranges[i])
    for aa in aa_list:
        outname = f'/usr/users/fatma.chafra01/ColabDesign/af/examples/bias_matrix/single_mutation_{i}th_bias_{aa}.csv'
        print('aa:', aa)
        #generate_bias_matrix(seq=seq, fix_pos=test_ranges[i], to_be_mutated=[aa], outfile=outname, verbose=1)
        generate_bias_matrix1(seq=seq, fix_pos=single_mutation_ranges[i], to_be_mutated= aa, outfile=outname)
    

20
range: [(2, 117)]
aa: A
File saved in /usr/users/fatma.chafra01/ColabDesign/af/examples/bias_matrix/single_mutation_0th_bias_A.csv
aa: R
File saved in /usr/users/fatma.chafra01/ColabDesign/af/examples/bias_matrix/single_mutation_0th_bias_R.csv
aa: N
File saved in /usr/users/fatma.chafra01/ColabDesign/af/examples/bias_matrix/single_mutation_0th_bias_N.csv
aa: D
File saved in /usr/users/fatma.chafra01/ColabDesign/af/examples/bias_matrix/single_mutation_0th_bias_D.csv
aa: C
File saved in /usr/users/fatma.chafra01/ColabDesign/af/examples/bias_matrix/single_mutation_0th_bias_C.csv
aa: Q
File saved in /usr/users/fatma.chafra01/ColabDesign/af/examples/bias_matrix/single_mutation_0th_bias_Q.csv
aa: E
File saved in /usr/users/fatma.chafra01/ColabDesign/af/examples/bias_matrix/single_mutation_0th_bias_E.csv
aa: G
File saved in /usr/users/fatma.chafra01/ColabDesign/af/examples/bias_matrix/single_mutation_0th_bias_G.csv
aa: H
File saved in /usr/users/fatma.chafra01/ColabDesign/af/examples/bias_

In [5]:
# bias indices to consider (less than 10 A to antigen) but according to original indexing and also 1 indexed not zero indexed so has to subtract 4 instead of 3:
positions = [4,5,26,27,28,29,30,31,32,33,34,35,36,37,39,49,52,53,54,55,56,58,60,73,75,78,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112]
positions_reindexed = [position - 4 for position in positions]
print(positions_reindexed)

[0, 1, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 45, 48, 49, 50, 51, 52, 54, 56, 69, 71, 74, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108]


In [6]:
import glob
import os
matching_files = []
for position in positions_reindexed:
    pattern = f"af/examples/bias_matrix/single_mutation_{position}th_bias_*.csv"
    matching_files.extend(glob.glob(pattern))

# Print the matching filenames
for file in matching_files:
    print(os.path.basename(file))

single_mutation_0th_bias_G.csv
single_mutation_0th_bias_H.csv
single_mutation_0th_bias_K.csv
single_mutation_0th_bias_D.csv
single_mutation_0th_bias_M.csv
single_mutation_0th_bias_N.csv
single_mutation_0th_bias_A.csv
single_mutation_0th_bias_P.csv
single_mutation_0th_bias_S.csv
single_mutation_0th_bias_V.csv
single_mutation_0th_bias_Y.csv
single_mutation_0th_bias_W.csv
single_mutation_0th_bias_T.csv
single_mutation_0th_bias_R.csv
single_mutation_0th_bias_Q.csv
single_mutation_0th_bias_C.csv
single_mutation_0th_bias_L.csv
single_mutation_0th_bias_E.csv
single_mutation_0th_bias_F.csv
single_mutation_0th_bias_I.csv
single_mutation_1th_bias_V.csv
single_mutation_1th_bias_Y.csv
single_mutation_1th_bias_S.csv
single_mutation_1th_bias_P.csv
single_mutation_1th_bias_N.csv
single_mutation_1th_bias_A.csv
single_mutation_1th_bias_M.csv
single_mutation_1th_bias_K.csv
single_mutation_1th_bias_D.csv
single_mutation_1th_bias_G.csv
single_mutation_1th_bias_H.csv
single_mutation_1th_bias_F.csv
single_m