# check regulons


In [4]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import seaborn as sns
import math
import numpy as np
import networkx as nx
import warnings
warnings.filterwarnings("ignore")

In [5]:
exh_mm = pd.read_csv("Results/exhausted_regulons_score.csv")
res_mm = pd.read_csv("Results/resident_regulons_score.csv")
exh_ht = pd.read_csv("Results/ht_exhausted_regulons_score.csv")
res_ht = pd.read_csv("Results/ht_resident_regulons_score.csv")

def fix_names(df):
    genes = df.columns
    genes = [s.replace("_(+)", "") for s in genes]
    df.columns = genes
    return df
exh_mm = fix_names(exh_mm).drop(0)
exh_mm = exh_mm.to_dict(orient="list")
res_mm = fix_names(res_mm).drop(0)
res_mm = res_mm.to_dict(orient="list")
exh_ht = fix_names(exh_ht).drop(0)
exh_ht = exh_ht.to_dict(orient="list")
res_ht = fix_names(res_ht).drop(0)
res_ht = res_ht.to_dict(orient="list")

In [6]:
genes = pd.read_csv("Data/gene_lists-check_regulons.csv")
genes = genes.to_dict(orient="list")

In [7]:
def remove_na(dirty_list):
    for i in range(len(dirty_list)-1,-1,-1):
        #print(i)
        if pd.isnull(dirty_list[i]):
            #print("gotcha")
            dirty_list.pop(i)
    return dirty_list
            

def remove_from_dict(dictionary):
    for key in dictionary:
        dictionary[key] = remove_na(dictionary[key])
    return dictionary
        
exh_mm = remove_from_dict(exh_mm)
res_mm = remove_from_dict(res_mm)
exh_ht = remove_from_dict(exh_ht)
res_ht = remove_from_dict(res_ht)
genes = remove_from_dict(genes)

In [8]:
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import interact_manual

def intersection(lst1, lst2):
    return list(set(lst1) & set(lst2))

def hist_plot(dataframe, variable):

    tmp_mm = {k:intersection(dataframe[0][k],genes[variable]) for k,v in dataframe[0].items() if len(intersection(dataframe[0][k],genes[variable])) > 0}
    tmp_ht = {k:intersection(dataframe[1][k],genes[variable]) for k,v in dataframe[1].items() if len(intersection(dataframe[1][k],genes[variable])) > 0}

    tmp_mm_n = pd.DataFrame({k:len(v) for k,v in tmp_mm.items()}, index=[0]).T
    tmp_mm_n.columns = [variable]
    tmp_ht_n = pd.DataFrame({k:len(v) for k,v in tmp_ht.items()}, index=[0]).T
    tmp_ht_n.columns = [variable]
    
    if dataframe[0] == exh_mm:
        condition = "Exhausted"
    else:
        condition = "Resident"

    print(f"{condition} NK cells:\nThe TFs whose regulons contain {variable} are:\nMM: {tmp_mm_n.shape[0]}\
    Healthy: {tmp_ht_n.shape[0]}\nThe TF with more {variable} in the regulon is:\nMM: {tmp_mm_n[variable].idxmax()}\
        with: {tmp_mm_n.loc[tmp_mm_n[variable].idxmax()][0]}\nHealthy: {tmp_ht_n[variable].idxmax()}\
        with: {tmp_ht_n.loc[tmp_ht_n[variable].idxmax()][0]}")
    
    title = variable + " find in regulons from " + condition + " NK cells"
    
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10))
    fig.suptitle(title)
    ax1.bar(tmp_mm_n.index,tmp_mm_n[variable])
    ax1.set_xticklabels(tmp_mm_n.index,rotation=35, fontsize=8)
    ax1.set_title("Multiple Myeloma")

    ax2.bar(tmp_ht_n.index,tmp_ht_n[variable])
    ax2.set_xticklabels(tmp_ht_n.index,rotation=35, fontsize=8)
    ax2.set_title("Healthy")

    plt.show()
    return tmp_mm, tmp_ht, condition


def net_plot(tmp, variable, condition, dataframe):
    nodes = {}
    for key in tmp:
        nodes[key] = {"color" :"green"}

    for f in sum(list(tmp.values()),[]):
        nodes[f] = {"color": "red"}

    nodes = [(k, v) for k, v in nodes.items()]

    edges = []
    for key in tmp:
        for item in tmp[key]:
            edges.append((key, item))

    G = nx.Graph()
    G.add_nodes_from(nodes)
    G.add_edges_from(edges)
    centrality = nx.betweenness_centrality(G)
    ATTRIBUTE_NAME = 'color'
    colors = [G.nodes[node][ATTRIBUTE_NAME] for node in list(G.nodes())]
    fig = plt.figure(figsize=(15,10))
    nx.draw_kamada_kawai(G, node_color=colors, with_labels=True, font_size=15, node_size=[v * 1000 for v in centrality.values()])
    title = "Network TFs and " + variable + " in " + condition + " " + dataframe
    plt.title(title)
    plt.suptitle("Node size: Centrality\nGreen: TF\nRed:"+ variable, fontsize=10)
    plt.show()

    sorted_centrality = sorted(centrality.items(), key=lambda x:x[1], reverse=True)
    print(pd.DataFrame(dict(sorted_centrality),index=[0]).T.head())
    
    
variables = sorted(genes.keys())

dfs = {"Exhausted": [exh_mm, exh_ht], "Resident": [res_mm, res_ht]}

w = widgets.Select(options=variables)

@interact_manual
def plot_data(dataframe=dfs.keys(), variable=w):
    tmp_mm, tmp_ht, condition = hist_plot(dfs[dataframe], variable)
    net_plot(tmp_mm,variable, condition, "MM")
    net_plot(tmp_ht, variable, condition, "Healthy")

interactive(children=(Dropdown(description='dataframe', options=('Exhausted', 'Resident'), value='Exhausted'),…