In [2]:
#You have to download iUPRED2A package
#import relevant packages
import pandas as pd
import iupred2a_lib
from scipy.spatial import distance
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


In [7]:
#define the function that chooses the best linker according to the user's choise of target gene, essential gene 
#and the weight given to each one of target gene,essential gene and linker's optimization.
def best_fusion_linker(target_gene,essential_gene,output_path):
    length_target=len(target_gene)
    length_essential=len(essential_gene)
    #compute the scores of the target gene and the essential genes before adding a linker between them
    score_target_before=iupred2a_lib.iupred(target_gene) 
    score_essential_before=iupred2a_lib.iupred(essential_gene)
    distances_target=[]
    distances_essential=[]
    #for each linker, an euclidean distance between the target/essential gene score
    #before the construct building and after is computed.
    for linker in linkers_list:
        length_linker=len(linker)
        ones_vector=np.ones(length_linker)#creating a zeros vector with the same length as the linker
        construct=target_gene+linker+essential_gene #the construct is composed from a target gene - linker - essential gene
        #the score of essential and target genes in the construct is taken from the whole contrust score
        score_construct=iupred2a_lib.iupred(construct)
        score_target_after=score_construct[0:length_target]
        score_linker=score_construct[length_target:length_target+length_linker]
        score_essential_after=score_construct[length_linker+length_target:length_linker+length_target+length_essential]
        #calculate the euclidean distances for each linker
        dstֹֹ_target= distance.euclidean(score_target_after,score_target_before)
        dstֹֹ_essential = distance.euclidean(score_essential_after,score_essential_before)
        distances_target.append(dstֹֹ_target)
        distances_essential.append(dstֹֹ_essential)
        #the final score is computed from both essentail and target scores
    final_score = (np.array(distances_target)+np.array(distances_essential))/2
    indexes=np.argsort(final_score) 
    sorted_linkers=np.array(linkers_list)[indexes]
    sorted_linkers_names=np.array(linkers_names)[indexes]
    chosen_linkers_names=sorted_linkers_names[0:10]
    chosen_linkers=sorted_linkers[0:10]
    final_scores=np.sort(final_score)[0:10] 
    keys=chosen_linkers_names
    values=list(zip(chosen_linkers, final_scores))
    dic_final=dict(zip(keys,values)) 
    i=1
    #create figures that will help the user choose the most suitable linker for his purpose
    for linker in chosen_linkers:
        chosen_construct=target_gene+linker+essential_gene
        score_chosen_construct=iupred2a_lib.iupred(chosen_construct)
        score_target=score_chosen_construct[0:length_target]
        score_essential=score_chosen_construct[len(linker)+length_target:len(linker)+length_target+length_essential]        

        fig, (ax1, ax2) = plt.subplots(1, 2)
        fig.suptitle('Disorder profile for linker ' + str(i),x=1,fontsize=15)

        x_asix=range(length_target)
        ax1.plot(x_asix,score_target_before,color='black',label='Before fusion')   
        ax1.plot(x_asix,score_target,color='red',label='After fusion')
        ax1.legend(loc=0)
        ax1.set_xlabel('Position')
        ax1.set_ylabel('Score')
        ax1.set_title('Target gene')

        x_asix2=range(length_essential)
        ax2.plot(x_asix2,score_essential_before,color='black',label='Before fusion')
        ax2.plot(x_asix2,score_essential,color='blue',label='After fusion')
        ax2.legend(loc=0)
        ax2.set_xlabel('Position')
        ax2.set_ylabel('Score')
        ax2.set_title('Essential gene')

        plt.subplots_adjust(left=0.125,bottom=0.1,right=2,top=0.8,wspace=0.8,hspace=0.35)


        fig.savefig(output_path + 'Disorder profile'+str(i)+'.png',dpi=plt.gcf().dpi, bbox_inches = 'tight')    
        i=i+1           

    return dic_final


In [8]:
def best_2A_linker():
    result={'P2A':('ATNFSLLKQAGDVEENPGP',1),'T2A':('EGRGSLLCGDVEENPGP',2),'E2A':('QCTNYALLKLAGDVESNPGP',3),'F2A':('VKQTLNFDLLKLAGDVESNPGP',4)}
    return result

In [9]:
def choose_linker(type_of_linker,target_gene,essential_gene,output_path,linkers):
    linkers_names=linkers['name'].tolist()
    linkers_list=linkers['sequence'].tolist()
    valid_aa=['G','A','L','M','F','W','K','Q','E','S','P','V','I','C','Y','H','R','N','D','T']
    if not(all(i in valid_aa for i in target_gene)):
        return "Your target gene isn't valid!"
    if not(all(i in valid_aa for i in essential_gene)):
        return "Your essential gene isn't valid!"
    if type_of_linker=='fusion':
        return(best_fusion_linker(target_gene,essential_gene,output_path))
    elif type_of_linker=='2A':
        return(best_2A_linker())
    else:
        return 'You can only choose between 2A and fusion linkers!'    