In [203]:
import pandas as pd
import itertools
from itertools import chain
#Visualizations
import plotly.express as px
import plotly.graph_objects as go 
import matplotlib.cm as cm
import matplotlib.pyplot as plt


# TTK in colorectal cancer cell lines

In [207]:
networkfile = pd.read_excel('input/edges cell lines and primary breast.xlsm', sheet_name='edges cell lines and primary br')

networkfile = networkfile[["edge","difference hct116","difference dld1"]]

# Convert first column (kinase interactions/pairs) of the document to a list
edges_list= networkfile[networkfile.columns[0]].to_list()
# Convert the list into a list of tuples, each tuple being one kinase pair/edge (since this is the format needed to 
# add edges onto pyvis.network (network visualization package))
edges_list2 = pd.DataFrame({'col':[tuple(x.split('.',1)) for x in edges_list]})
addr = ['from','to']
networkfile = networkfile.join(edges_list2.col.apply(lambda loc: pd.Series(loc, index=addr)))

#Select rows that are pik3ca/akt12/mtor edges
networkfile = networkfile[ (networkfile['from'].isin(['PIK3CA','AKT1_2','MTOR'])) | (networkfile['to'].isin(['PIK3CA','AKT1_2','MTOR']))]

#Kinases
kinases = list(set(itertools.chain(*[networkfile['from'],networkfile['to']])))
kin_dict = {kin:set(itertools.chain(*[networkfile[ (networkfile['from']==kin) | (networkfile['to']==kin) ][["from","to"]] ][0].values.tolist())) for kin in ['PIK3CA','AKT1_2','MTOR']}
values = [ [networkfile[( (networkfile['from']==kin) & (networkfile['to']==x) )|( (networkfile['from']==x) & (networkfile['to']==kin) )]['difference dld1'] if (kin in kin_dict[x]) and (len(networkfile[( (networkfile['from']==kin) & (networkfile['to']==x) )|( (networkfile['from']==x) & (networkfile['to']==kin) )]['difference dld1'])==1) else None for kin in kinases] for x in ['PIK3CA','AKT1_2','MTOR'] ]
values = [ [float(x) if x is not None else None for x in listi ] for listi in values ]

# HEATMAP
fig = go.Figure(data=go.Heatmap(
                z=values,
                x=kinases,
                y= ['PIK3CA','AKT1_2','MTOR'],
                yaxis='y2',
                #title= 'edges z-scores in HCT116 cells \n with PIK3CA mutations vs wild-type',
                hoverongaps = False))

fig.update_layout(xaxis=dict(domain=[0.09,1]),
                    yaxis2=dict(anchor='free', position=0, side='right'),
                     title= 'PI3K/AKT/mTOR edges z-scores in <br> DLD1 cells with PIK3CA mutations vs DLD1 wild-type cells'
                 )

fig.update_layout(
    title={
        'y':0.88,
        'x':0.52,
        'xanchor': 'center'},
    font={
        'size':16
    }
)

#kin='PIK3CA'
#x='AKT1_2'
#float(networkfile[( (networkfile['from']==kin) & (networkfile['to']==x) )|( (networkfile['from']==x) & (networkfile['to']==kin) )]['difference hct116'])

#pandas.core.series.Series
#plt.savefig('output_visualizations/hdct116_heatmap.pdf')



# Interactive network visualization

In [None]:
# input: edges_list is a list corresponding to a treatment in the dataset, it's contents being the kinase interactions
# in said treatment with a z_score <0 and their z_scores,in the shape of tuples, i.e. a network
# kinase_groups is a list of the groups in which we want to classify the kinases in the network
# kg_colors is a list of the colors in which each kinase groups should appear in the visualization
# treatment is the header of the column from which the z-scores for edges_list was extracted (i.e. name of the treatment)
# output: A visualization of the network constructed from the edges list, in HTML format
def network_visualization(edges_list, kinases, kg_colors, treatment):    
    #Initialize a pyvis.network instance
    networkgraph = Network(height='1000px',width='1000px', heading='')
    
    #Add each community of nodes with their assigned color
    for n in range(len(kinases)):
        for x in kinases[n]:
            networkgraph.add_node(x, value=15000000000, title=x, label=x, color=kg_colors[n], mass=23)
    
    #Add the edges
    networkgraph.add_edges(edges_list)
    
    #Make empty dictionary in which to store the degree of each kinase
    degree_graphs = {}
    #Save list of nodes to extract information such as kinases degree centrality
    kinases_map = networkgraph.get_adj_list()

    for node in networkgraph.nodes:
        #Add degree to each kinase in the empty dictionary
        degree_graphs[node["title"]] = len(kinases_map[node["id"]]) 
        #Add box showing degree + to which nodes each node is connected, which appears when the user hovers over a node on the network graph
        node["title"] += " connected to "+ str(len(kinases_map[node["id"]])) +" kinases:<br>" + "<br>".join(kinases_map[node["id"]]) 
        kinase = len(kinases_map[node["id"]])
        #Change the nodes to be bigger the higher their degree is
        if  0 <=  kinase <= 19 :
            node["value"] = 100000

        if  20 <=  kinase <= 29 :
            node["value"] = 1000000000

        if  30 <=  kinase <= 39 :
            node["value"] = 2000000000

        if  40 <=  kinase <= 49:
            node["value"] = 10000000000
    
        if  50 <=  kinase <= 59:
            node["value"] = 20000000000

        if 60 <=  kinase <= 69:
            node["value"] = 30000000000
        
        if  70 <=  kinase:
            node["value"] = 40000000000
    
    #Settings regarding the appearance of the network
    networkgraph.set_options("""
    var options = {
    "nodes": {
    "borderWidth": 1.5,
        "font": {
        "size": 80,
        "face": "verdana"
        }
      },
      "edges": {
        "color": {
          "color": "rgba(192,187,223,1)",
          "inherit": true
        },
        "font": {
          "strokeWidth": 6
        },
        "hoverWidth": 3.2,
        "smooth": false
      },
      "physics": {
        "barnesHut": {
          "springLength": 1040
        },
        "minVelocity": 0.75
      }
    }
    """)
    
    #Return visualization as HTML file
    final_network = treatment + '[-]'
    networkgraph.show('output_visualizations/network_visualization{}.html'.format(final_network))


## 2.2 - Network visualizations of networks trametinib(MAP2K1)[-], GDC0994(MAPK1_3), GDC0941(PI3K)[-], and AZD5363(AKT)[-]

In [None]:
for n in range(len(target_communities)):
    networkgraph = network_visualization(edges_list=negative_edges[n],kinases=kinase_groups[n],kg_colors=["#fcbb8b", "#857be3", "#baeeff"], treatment=treatments[n])

## 2.3 - Network visualization of intersection between the selected communities of  trametinib(MAP2K1)[-], GDC0941(PI3K)[-] and AZD5363(AKT)[-]

In [None]:
#Kinases present in the selected community of network trametinib(MAP2K1)[-], but not in the selected communities of 
#networks GDC0941(PIK3CA)[-] and AZD5363(AKT1_2)[-]
#MAP2K1only = [x for x in communities[0] if x not in communities[1] and x not in communities[2]]
#Kinases present accross the selected communities of networks trametinib(MAP2K1)[-], GDC0941(PIK3CA)[-] and AZD5363(AKT1_2)[-]
intersection = [x for x in target_communities[0] if x in target_communities[1] and x in target_communities[2]]
#Kinases present accross the selected communities of networks trametinib(MAP2K1)[-] and GDC0941(PIK3CA)[-], but not in the
#selected community of network AZD5363(AKT)[-]
MAP2K1_PIK3CA = [x for x in target_communities[0] if x in target_communities[1] and x not in target_communities[2]]
#Kinases present accross the selected communities of networks AZD5363(AKT)[-] and GDC0941(PIK3CA)[-] but not trametinib(MAP2K1)[-]
#and kinases present only in the selected communities of GDC0941(PIK3CA)[-] and AZD5363(AKT1_2)[-] networks
#therest = [[x for x in communities[3] if x in communities[2] and x not in communities[0]],[x for x in communities[3] if x not in communities[2] and x not in communities[0]], [x for x in communities[2] if x not in communities[3] and x not in communities[0]]]

kinase_groups = MAP2K1_PIK3CA, intersection
kinases = list(itertools.chain(*kinase_groups))
#Store kinase interactions across the three networks in which the two kinases are in any of the selected communities
#edges = negative_edges[0] + negative_edges[1] + negative_edges[2]
#edges = [(a,b,0) for a,b,c in edges if a in kinases and b in kinases]
edges = [ [(a,b) for a,b,c in negative_edges[n]] for n in [0,1,2] ]
edges = set(edges[0]).intersection(edges[1], edges[2])
edges = [(a,b) for a,b in edges if a in kinases and b in kinases]

#Intersection of communities visualized as a network
networkgraph= network_visualization(edges_list=edges,kinases=kinase_groups,kg_colors=["#fcbb8b", "#857be3", "#baeeff","green"], treatment='intersection_MAP2K1_PIK3CA_AKT')


# Return target names from column headers

In [None]:
## Return targets (if they are present in the treatment name)
targets = []
#csv with KSEA kinase names and all uniprot aliases
kinase_gene_ids = pd.read_csv('input/kinase_gene_names.csv')
#Return targets for each treatment
for treatment in headers:
    ## Return targets (if they are present in the treatment name)
    #Separate targets from treatment
    sep_target_treat = treatment.split('.')[0:-1]
    #Separate kinase name from isoform numbers
    target = [re.findall('\d+|\D+', x) if x[-1].isdigit() else x for x in sep_target_treat  ]
    if len(target) > 1:
        target = [item for sublist in target for item in sublist]
        #Add kinase name with each of the isoform numbers
        name_range = list(range(0,len(target)))
        name_ids = [n for n in name_range if target[n].isnumeric()==False]    
        name_ids.append(len(target)-1)
        name_range_split = [ name_range[name_ids[n-1]:(name_ids[n]+1)] for n in list(range(1,len(name_ids)))]
        #Paste each kinase with its isoforms numbers
        target = [ [target[x[0]]+target[y] for y in x[1:len(x)]] for x in name_range_split]
        target = [item for sublist in target for item in sublist]
    #Replace with KSEA kinase names
    for n in list(range(0,len(target))):
        kin = target[n].upper()
        df1 = kinase_gene_ids[kinase_gene_ids.apply(lambda row: row.astype(str).str.contains( kin+'_', case=False).any(), axis=1)]
        if len(df1) == 0:
            df1 = kinase_gene_ids[kinase_gene_ids.apply(lambda row: row.astype(str).str.contains(kin, case=False).any(), axis=1)]
        target[n] = list(dict.fromkeys(df1["node"]))
        
    #Add to list of all treatments targets
    target = [item for sublist in target for item in sublist]
    targets.append(target)