In [1]:
import networkx as nx
import random
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import pickle 
import re
import sys
import os
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
sns.set_theme()
np.random.seed(0)

In [2]:
"""
Helper Functions
"""

# function checks if directory exists, if not it constructs it
def check_directory_exists(dir_name):
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

# function saves DataFrame, list, or set as a textfile in a specific folder
def save_to_text_file(output_folder_dest, input_data, text_file_name):
    text_file_ouput = output_folder_dest + text_file_name + ".txt"
    drug_output_info_file = open(text_file_ouput, 'w+')
    if isinstance(input_data, pd.DataFrame):
        drug_output_info_file.write(input_data.to_string())
    else:
        drug_output_info_file.write(str(input_data))
    drug_output_info_file.close() 
    print("Constructed and saved", text_file_ouput)

# Read in Pickle File
def read_pickle_file(file):
    if not os.path.exists(file_path):
        sys.exit("Can't locate input file %s" % file_path)
    return pd.read_pickle(file_path)
    
# Save data into a pickel file
def save_to_pickle_file(output_folder_dest, dict_data, dict_file_name):
    output_dict_filename = output_folder_dest + dict_file_name + '.pkl'
    with open(output_dict_filename, 'wb') as handle:
        pickle.dump(dict_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print("Constructed and saved", output_dict_filename)

# Read in a CSV file
def read_csv_file(file_path, input_sep=',', input_delimiter=None, input_index_col=None, input_dtype=None, input_delim_whitespace=False, input_low_memory=True):
    if not os.path.exists(file_path):
        sys.exit("Can't locate input file %s" % file_path)
    return pd.read_csv(filepath_or_buffer=file_path, sep=input_sep, delimiter=None, index_col=input_index_col, dtype=input_dtype, delim_whitespace=input_delim_whitespace, low_memory=input_low_memory)

# funcion saves DataFrame or list to as a textfile
def save_to_csv_file(output_folder_dest, df, csv_file_name, input_index=False):
    output_filename = output_folder_dest + csv_file_name + ".csv"
    df.to_csv(output_filename, index=input_index)
    print("Constructed and saved", output_filename)
    
def read_csv_file(file_path, input_sep=',', input_delimiter=None, input_index_col=None, input_dtype=None, input_delim_whitespace=False, input_low_memory=True):
    return pd.read_csv(filepath_or_buffer=file_path, sep=input_sep, delimiter=None, index_col=input_index_col, dtype=input_dtype, delim_whitespace=input_delim_whitespace, low_memory=input_low_memory)

def save_to_gpickle_file(output_folder_dest, data, file_name):
    output_filename = output_folder_dest + file_name + ".gpickle"
    nx.write_gpickle(data, output_filename)
    print("Constructed and saved", output_filename)
    
def read_networkx_gpickle_file(input_file_name):
    if not os.path.exists(input_file_name):
        sys.exit("Can't locate input file %s" % input_file_name)
    return nx.read_gpickle(input_file_name)

In [3]:
def plot_histplot(output_folder_dest, fig_size, data_df, x_col, plot_series=False, x_label="", input_kde=True, input_color="black", set_axis=False, axis_array=[], input_label="", graph_title=""):
    plt.figure(figsize=fig_size)
    if not plot_series:
        sns.histplot(data=data_df, x=x_col, kde=input_kde, color=input_color, label=input_label)
    else:
        sns.histplot(data=data_df, kde=input_kde, color=input_color, label=input_label)
    plt.legend()
    if(set_axis):
        plt.axis(xmin=axis_array[0], xmax=axis_array[1], ymin=axis_array[2], ymax=axis_array[3])
    plt.xlabel(x_label, fontsize=18)
    plt.ylabel("Count", fontsize=18)
    plt.title(graph_title, fontsize=25)
    output_filename = output_folder_dest + graph_title + "_histplot.png"
    plt.savefig(output_filename, facecolor='w', edgecolor='w',transparent=False)
    plt.close()
    print("Constructed and saved", output_filename)

In [4]:
if 'filter_data' not in globals():
    filter_data = True
    if (filter_data):
        filter_for_same_nodes = False
        filter_for_close_nodes = True
        filter_for_same_edges = False
        if filter_for_same_nodes == filter_for_same_edges and filter_for_same_nodes == filter_for_close_nodes:
            sys.exit("ERROR!!! User must filter for SAME EDGES, SAME NODES, or for CLOSE NODES only!")

In [5]:
# Save DataFrame Table to CSV and Visual Graphs as .png

# Turn on saving - True, Turn off saving - False
save_visual_figures = True
save_table_figures = True
save_text_outputs = True

output_folder = "outputs/output_pathfx_network_analysis/"
output_csv_folder = output_folder + "csv_files/"
output_graph_folder = output_folder + "visual_graphs/"

check_directory_exists(output_folder)
check_directory_exists(output_csv_folder)
check_directory_exists(output_graph_folder)

In [6]:
pathfx_score_df = read_csv_file("inputs/input_pathfx_network_analysis/pathfx_score_table.csv")
print()
display("pathfx_score_df", pathfx_score_df)
print()




'pathfx_score_df'

Unnamed: 0,source,target,weight
0,CHMP1B,KNSTRN,0.279930
1,CHMP1B,USP8,0.562588
2,CHMP1B,STAMBP,0.676545
3,CHMP1B,SPAST,0.416595
4,CHMP1B,SNRNP200,0.279930
...,...,...,...
262235,tat,GPX2,0.246744
262236,tat,GPX5,0.246744
262237,tat,GPX6,0.246744
262238,tat,BRIX1,0.279930





In [7]:
"""
Outputting Number of Distinct Proteins
"""
source_list = pathfx_score_df["source"].unique()
target_list = pathfx_score_df["target"].unique()

num_sources = len(source_list)
num_targets = len(target_list)

node_list = []
node_list.extend(source_list)
node_list.extend(target_list)
pathfx_unique_nodes = pd.Series(node_list).unique()

print("\nOutputting Number of Distinct Proteins")
print("------------------------------------------------")
print("Number of Unique sources:", num_sources)
print("Number of Unique targets:", num_targets)
print("Number of Unique sources and targets:", len(pathfx_unique_nodes))
print()


Outputting Number of Distinct Proteins
------------------------------------------------
Number of Unique sources: 22832
Number of Unique targets: 22832
Number of Unique sources and targets: 22832



In [8]:
# calculate stats for numeric column
pathfx_source_to_target_weight_stats_df = pathfx_score_df.groupby('source').agg({"weight" :['count', 'mean', 'std', 'max', 'min', 'sum']})
pathfx_source_to_target_weight_stats_df.columns = pathfx_source_to_target_weight_stats_df.columns.droplevel()
pathfx_source_to_target_weight_stats_df = pathfx_source_to_target_weight_stats_df.reset_index()
pathfx_source_to_target_weight_stats_df = pathfx_source_to_target_weight_stats_df.sort_values(by='count', ascending=False)
print()
display("pathfx_source_to_target_weight_stats_df", pathfx_source_to_target_weight_stats_df)
print()




'pathfx_source_to_target_weight_stats_df'

Unnamed: 0,source,count,mean,std,max,min,sum
15239,UBC,6753,0.528777,0.132470,0.935012,0.279930,3570.832522
691,APP,1984,0.287942,0.040666,0.705007,0.246744,571.276135
13717,SUMO2,713,0.429295,0.086484,0.818355,0.246936,306.087256
14713,TP53,627,0.417612,0.124118,0.990000,0.246936,261.842982
22077,tat,436,0.321098,0.085697,0.770889,0.246744,139.998813
...,...,...,...,...,...,...,...
12227,RTTN,1,0.605763,,0.605763,0.605763,0.605763
12226,RTT109,1,0.416595,,0.416595,0.416595,0.416595
12224,RTP2,1,0.279930,,0.279930,0.279930,0.279930
12214,RTFDC1,1,0.279930,,0.279930,0.279930,0.279930





In [9]:
# constructing interaction stats panda frame
pathfx_num_target_per_source_stats_df = pd.DataFrame(pathfx_score_df.groupby('source')["target"].count().agg(['count', 'mean', 'std', 'max', 'min', 'sum']))
pathfx_num_target_per_source_stats_df = pathfx_num_target_per_source_stats_df.rename(columns={"target": "Number Target Interacton Per Source"})
print()
display("pathfx_num_target_per_source_stats_df", pathfx_num_target_per_source_stats_df)
print()




'pathfx_num_target_per_source_stats_df'

Unnamed: 0,Number Target Interacton Per Source
count,22832.0
mean,11.485634
std,52.692758
max,6753.0
min,1.0
sum,262240.0





In [10]:
pathfx_num_interac_distribution_df = pd.DataFrame(pathfx_source_to_target_weight_stats_df["count"].value_counts()).reset_index()
pathfx_num_interac_distribution_df = pathfx_num_interac_distribution_df.rename(columns={"index":"Number of Protein Interactions"})
pathfx_num_interac_distribution_df = pathfx_num_interac_distribution_df.sort_values(by="count", ascending=False)
print()
display("pathfx_num_interac_distribution_df", pathfx_num_interac_distribution_df)
print()




'pathfx_num_interac_distribution_df'

Unnamed: 0,Number of Protein Interactions,count
0,1,6955
1,3,2691
2,2,2409
3,4,1488
4,5,1033
...,...,...
190,394,1
189,407,1
188,418,1
187,428,1





In [11]:
pathfx_edge_score_distribution_df = pd.DataFrame(pathfx_score_df["weight"].value_counts()).reset_index()
pathfx_edge_score_distribution_df = pathfx_edge_score_distribution_df.rename(columns={"index":"Edge Score", "weight":"count"})
pathfx_edge_score_distribution_df = pathfx_edge_score_distribution_df.sort_values(by="count", ascending=False)
pathfx_edge_score_distribution_df["count"] = pathfx_edge_score_distribution_df["count"] // 2
print()
display("pathfx_edge_score_distribution_df", pathfx_edge_score_distribution_df)
print()




'pathfx_edge_score_distribution_df'

Unnamed: 0,Edge Score,count
0,0.279930,38100
1,0.323872,18812
2,0.380279,11614
3,0.471716,11525
4,0.346946,7614
...,...,...
694,0.725046,1
693,0.809616,1
692,0.788432,1
691,0.915291,1





In [12]:
print()
pathfx_removed_cross_pair_df = pathfx_score_df[pathfx_score_df['source'] < pathfx_score_df['target']]
display("pathfx_removed_cross_pair_df", pathfx_removed_cross_pair_df)
print()




'pathfx_removed_cross_pair_df'

Unnamed: 0,source,target,weight
0,CHMP1B,KNSTRN,0.279930
1,CHMP1B,USP8,0.562588
2,CHMP1B,STAMBP,0.676545
3,CHMP1B,SPAST,0.416595
4,CHMP1B,SNRNP200,0.279930
...,...,...,...
262189,Ppif,VDAC1,0.279930
262195,ADGRG1,ADRB2,0.490794
262198,ORF26,TRIM37,0.279930
262200,ALL3_AEDAE,COL3A1,0.416595





In [13]:
# constructing weight stats panda frame
pathfx_all_weight_stats_df = pd.DataFrame(pathfx_removed_cross_pair_df['weight'].agg(['count', 'mean', 'std', 'max', 'min', 'sum']))
print()
display("pathfx_all_weight_stats_df", pathfx_all_weight_stats_df)
print()




'pathfx_all_weight_stats_df'

Unnamed: 0,weight
count,131120.0
mean,0.387541
std,0.131891
max,0.99
min,0.244958
sum,50814.314438





In [14]:
# Saving pandas Dataframe to .csv files 
if (save_table_figures):
    save_to_csv_file(output_csv_folder, pathfx_score_df, "pathfx_score_table", input_index=True)
    save_to_csv_file(output_csv_folder, pathfx_all_weight_stats_df, "pathfx_all_weight_stats_table", input_index=True)
    save_to_csv_file(output_csv_folder, pathfx_num_target_per_source_stats_df, "pathfx_num_target_per_source_stats_table", input_index=True)
    save_to_csv_file(output_csv_folder, pathfx_source_to_target_weight_stats_df, "pathfx_source_to_target_weight_stats", input_index=True)
    save_to_csv_file(output_csv_folder, pathfx_num_interac_distribution_df, "pathfx_num_interac_distribution_table")
    save_to_csv_file(output_csv_folder, pathfx_removed_cross_pair_df, "pathfx_removed_cross_pair_pathfx_table")

Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_score_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_all_weight_stats_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_num_target_per_source_stats_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_source_to_target_weight_stats.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_num_interac_distribution_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_removed_cross_pair_pathfx_table.csv


In [15]:
# filter for SAME NODES
# --------------------------------------------------------------------------------------------------------
if save_visual_figures and filter_data and filter_for_same_nodes:
    plot_histplot(output_graph_folder, (15, 10), pathfx_source_to_target_weight_stats_df, "count", x_label="Number of Interactions", input_color="blue", set_axis=True, axis_array=[-100, 7000, 0, 4500], input_label="PathFX", graph_title="Distribution of the Number of Protein Interactions per Protein in PathFX")
    plot_histplot(output_graph_folder, (15, 10), pathfx_source_to_target_weight_stats_df, "count", x_label="Number of Interactions", input_color="blue", set_axis=True, axis_array=[-10, 250, 0, 4500], input_label="PathFX", graph_title="Distribution of the Number of Protein Interactions per Protein in PathFX (Zoomed)")
    plot_histplot(output_graph_folder, (15, 10), pathfx_source_to_target_weight_stats_df, "count", x_label="Number of Interactions", input_color="blue", set_axis=True, axis_array=[0, 100, 0, 1500], input_label="PathFX", graph_title="Distribution of the Number of Protein Interactions per Protein in PathFX (Zoomed 2x)")
    plot_histplot(output_graph_folder, (15, 10), pathfx_removed_cross_pair_df, "weight", x_label="Edge Score", input_color="blue", set_axis=True, axis_array=[0, 1.1, 0, 35000], input_label="PathFX", graph_title="Distribution of edge scores in PathFX")
    plot_histplot(output_graph_folder, (15, 10), pathfx_removed_cross_pair_df, "weight", x_label="Edge Score", input_color="blue", set_axis=True, axis_array=[0.2, 0.8, 0, 20000], input_label="PathFX", graph_title="Distribution of edge scores in PathFX (Zoomed)")

# filter for CLOSE NODES
# --------------------------------------------------------------------------------------------------------
if save_visual_figures and filter_data and filter_for_close_nodes:    
    plot_histplot(output_graph_folder, (15, 10), pathfx_source_to_target_weight_stats_df, "count", x_label="Number of Interactions",input_color="blue", set_axis=True, axis_array=[-100, 7000, 0, 4500], input_label="PathFX", graph_title="Distribution of the Number of Protein Interactions per Protein in PathFX")
    plot_histplot(output_graph_folder, (15, 10), pathfx_source_to_target_weight_stats_df, "count", x_label="Number of Interactions",input_color="blue", set_axis=True, axis_array=[-10, 250, 0, 4500], input_label="PathFX", graph_title="Distribution of the Number of Protein Interactions per Protein in PathFX (Zoomed)")
    plot_histplot(output_graph_folder, (15, 10), pathfx_source_to_target_weight_stats_df, "count", x_label="Number of Interactions",input_color="blue", set_axis=True, axis_array=[0, 100, 0, 1500], input_label="PathFX", graph_title="Distribution of the Number of Protein Interactions per Protein in PathFX (Zoomed 2x)")
    plot_histplot(output_graph_folder, (15, 10), pathfx_removed_cross_pair_df, "weight", x_label="Edge Score", input_color="blue", set_axis=True, axis_array=[0, 1.1, 0, 35000], input_label="PathFX", graph_title="Distribution of edge scores in PathFX")
    plot_histplot(output_graph_folder, (15, 10), pathfx_removed_cross_pair_df, "weight", x_label="Edge Score", input_color="blue", set_axis=True, axis_array=[0.2, 0.8, 0, 20000], input_label="PathFX", graph_title="Distribution of edge scores in PathFX (Zoomed)")

# filter for same EDGES
# --------------------------------------------------------------------------------------------------------
if save_visual_figures and filter_data and filter_for_same_edges:
    plot_histplot(output_graph_folder, (15, 10), pathfx_source_to_target_weight_stats_df, "count", x_label="Number of Interactions",input_color="blue", set_axis=True, axis_array=[-100, 2000, 0, 3000], input_label="PathFX", graph_title="Distribution of the Number of Protein Interactions per Protein in PathFX")
    plot_histplot(output_graph_folder, (15, 10), pathfx_source_to_target_weight_stats_df, "count", x_label="Number of Interactions",input_color="blue", set_axis=True, axis_array=[-10, 250, 0, 1000], input_label="PathFX", graph_title="Distribution of the Number of Protein Interactions per Protein in PathFX (Zoomed)")
    plot_histplot(output_graph_folder, (15, 10), pathfx_source_to_target_weight_stats_df, "count", x_label="Number of Interactions",input_color="blue", set_axis=True, axis_array=[0, 50, 0, 400], input_label="PathFX", graph_title="Distribution of the Number of Protein Interactions per Protein in PathFX (Zoomed 2x)")
    plot_histplot(output_graph_folder, (15, 10), pathfx_removed_cross_pair_df, "weight", x_label="Edge Score", input_color="blue", set_axis=True, axis_array=[0, 1.1, 0, 10000], input_label="PathFX", graph_title="Distribution of edge scores in PathFX")
    plot_histplot(output_graph_folder, (15, 10), pathfx_removed_cross_pair_df, "weight", x_label="Edge Score", input_color="blue", set_axis=True, axis_array=[0.2, 0.8, 0, 5000], input_label="PathFX", graph_title="Distribution of edge scores in PathFX (Zoomed)")
    
# NON-filter
# --------------------------------------------------------------------------------------------------------
if save_visual_figures and not filter_data:    
    plot_histplot(output_graph_folder, (15, 10), pathfx_source_to_target_weight_stats_df, "count", x_label="Number of Interactions",input_color="blue", set_axis=True, axis_array=[-500, 6800, 0, 8000], input_label="PathFX", graph_title="Distribution of the Number of Protein Interactions per Protein in PathFX")
    plot_histplot(output_graph_folder, (15, 10), pathfx_source_to_target_weight_stats_df, "count", x_label="Number of Interactions",input_color="blue", set_axis=True, axis_array=[0, 50, 0, 6000], input_label="PathFX", graph_title="Distribution of the Number of Protein Interactions per Protein in PathFX (Zoomed)")
    plot_histplot(output_graph_folder, (15, 10), pathfx_source_to_target_weight_stats_df, "count", x_label="Number of Interactions",input_color="blue", set_axis=True, axis_array=[0, 30, 0, 3000], input_label="PathFX", graph_title="Distribution of the Number of Protein Interactions per Protein in PathFX (Zoomed 2x)")
    plot_histplot(output_graph_folder, (15, 10), pathfx_removed_cross_pair_df, "weight", x_label="Edge Score", input_color="blue", set_axis=True, axis_array=[0, 1.1, 0, 40000], input_label="PathFX", graph_title="Distribution of edge scores in PathFX")
    plot_histplot(output_graph_folder, (15, 10), pathfx_removed_cross_pair_df, "weight", x_label="Edge Score", input_color="blue", set_axis=True, axis_array=[0.2, 0.8, 0, 10000], input_label="PathFX", graph_title="Distribution of edge scores in PathFX (Zoomed)")
        

Constructed and saved outputs/output_pathfx_network_analysis/visual_graphs/Distribution of the Number of Protein Interactions per Protein in PathFX_histplot.png
Constructed and saved outputs/output_pathfx_network_analysis/visual_graphs/Distribution of the Number of Protein Interactions per Protein in PathFX (Zoomed)_histplot.png
Constructed and saved outputs/output_pathfx_network_analysis/visual_graphs/Distribution of the Number of Protein Interactions per Protein in PathFX (Zoomed 2x)_histplot.png
Constructed and saved outputs/output_pathfx_network_analysis/visual_graphs/Distribution of edge scores in PathFX_histplot.png
Constructed and saved outputs/output_pathfx_network_analysis/visual_graphs/Distribution of edge scores in PathFX (Zoomed)_histplot.png


In [16]:
source_list = pathfx_score_df["source"].unique()
target_list = pathfx_score_df["target"].unique()

num_sources = len(source_list)
num_targets = len(target_list)

pathfx_node_list = []
pathfx_node_list.extend(source_list)
pathfx_node_list.extend(target_list)
pathfx_unique_nodes = pd.Series(pathfx_node_list).unique()

pathfx_num_unique_nodes = len(pathfx_unique_nodes)

print("\nPathFX Interactome")
print("-------------------------------------------")
print("Number of Unique sources:", num_sources)
print("Number of Unique targets:", num_targets)
print("Number of Unique sources and targets:", pathfx_num_unique_nodes)
print()


PathFX Interactome
-------------------------------------------
Number of Unique sources: 22832
Number of Unique targets: 22832
Number of Unique sources and targets: 22832



In [17]:
"""
Constructing Network graph for Protein-Protein Combined score interaction (Avg Physical Combined Score)
"""
print("\nConstructing Network graph for Protein-Protein Combined score interaction (Avg Physical Combined Score)\n")
pathfx_G = nx.from_pandas_edgelist(df=pathfx_score_df, source="source", target='target', edge_attr='weight')


Constructing Network graph for Protein-Protein Combined score interaction (Avg Physical Combined Score)



In [18]:
# Save Constructed Network graph for PathFX

print("\nSaving Constructed Network graph for PathFX")
output_filename = output_folder + "pathfx_G.gpickle"
nx.write_gpickle(pathfx_G, output_filename)


Saving Constructed Network graph for PathFX


In [19]:
print("\nPathFX NetworkX Interactome Graph")
print("-------------------------------------")
print("Number of Nodes:", pathfx_G.number_of_nodes())
print("Number of Edges:", pathfx_G.number_of_edges())

print("\nAverage node degree:", pathfx_num_target_per_source_stats_df.loc["mean"][0])
print("Standard Deviation node degree:", pathfx_num_target_per_source_stats_df.loc["std"][0])
print("Max node degree:", pathfx_num_target_per_source_stats_df.loc["max"][0])
print("Min node degree:", pathfx_num_target_per_source_stats_df.loc["min"][0])

print("\nAverage edge weight:", pathfx_all_weight_stats_df.loc["mean"][0])
print("Standard Deviation node degree:", pathfx_all_weight_stats_df.loc["std"][0])
print("Max edge weight:", pathfx_all_weight_stats_df.loc["max"][0])
print("Min edge weight:", pathfx_all_weight_stats_df.loc["min"][0])

print("\nnx Graph is connected -", nx.is_connected(pathfx_G))
print("Number of Connected Components:", nx.number_connected_components(pathfx_G))

print("\nTop 25 Proteins with Most Neighbors - Weighted Edge Scores")
print("-------------------------------------------------------------------------------------")
print(pathfx_source_to_target_weight_stats_df.head(25))
print()


PathFX NetworkX Interactome Graph
-------------------------------------
Number of Nodes: 22832
Number of Edges: 131120

Average node degree: 11.48563419761738
Standard Deviation node degree: 52.69275848135201
Max node degree: 6753.0
Min node degree: 1.0

Average edge weight: 0.3875405310995365
Standard Deviation node degree: 0.13189134383586784
Max edge weight: 0.99
Min edge weight: 0.244957679172

nx Graph is connected - False
Number of Connected Components: 121

Top 25 Proteins with Most Neighbors - Weighted Edge Scores
-------------------------------------------------------------------------------------
          source  count      mean       std       max       min          sum
15239        UBC   6753  0.528777  0.132470  0.935012  0.279930  3570.832522
691          APP   1984  0.287942  0.040666  0.705007  0.246744   571.276135
13717      SUMO2    713  0.429295  0.086484  0.818355  0.246936   306.087256
14713       TP53    627  0.417612  0.124118  0.990000  0.246936   261.842982


In [20]:
if (save_text_outputs):
    
    pathfx_output_info_file = open(r"outputs/output_pathfx_network_analysis/pathfx_network_info_output.txt","w+")

    pathfx_output_info_file.write("PathFX Network Analysis \n")
    pathfx_output_info_file.write("============================================================================================= \n \n")

    pathfx_output_info_file.write("PathFX Interactome \n")
    pathfx_output_info_file.write("------------------------------------------- \n")
    pathfx_output_info_file.write("Number of Unique sources: " + str(num_sources) + "\n")
    pathfx_output_info_file.write("Number of Unique targets: " + str(num_targets) + "\n")
    pathfx_output_info_file.write("Number of Unique sources and targets: " + str(len(pathfx_unique_nodes)) + "\n \n")

    pathfx_output_info_file.write("PathFX Interactome NetworkX Graph \n")
    pathfx_output_info_file.write("------------------------------------- \n")
    pathfx_output_info_file.write("Number of Nodes: " + str(pathfx_G.number_of_nodes()) + "\n")
    pathfx_output_info_file.write("Number of Edges: " + str(pathfx_G.number_of_edges()) + "\n \n")

    pathfx_output_info_file.write("Average node degree: " + str(pathfx_num_target_per_source_stats_df.loc["mean"][0]) + "\n")
    pathfx_output_info_file.write("Standard Deviation node degree: " + str(pathfx_num_target_per_source_stats_df.loc["std"][0]) + "\n")
    pathfx_output_info_file.write("Max node degree: " + str(pathfx_num_target_per_source_stats_df.loc["max"][0]) + "\n")
    pathfx_output_info_file.write("Min node degree: " + str(pathfx_num_target_per_source_stats_df.loc["min"][0]) + "\n \n")

    pathfx_output_info_file.write("Average edge weight: " + str(pathfx_all_weight_stats_df.loc["mean"][0]) + "\n")
    pathfx_output_info_file.write("Standard Deviation node degree: " + str(pathfx_all_weight_stats_df.loc["std"][0]) + "\n")
    pathfx_output_info_file.write("Max edge weight: " + str(pathfx_all_weight_stats_df.loc["max"][0]) + "\n")
    pathfx_output_info_file.write("Min edge weight: " + str(pathfx_all_weight_stats_df.loc["min"][0]) + "\n \n")

    pathfx_output_info_file.write("nx Graph is connected - " + str(nx.is_connected(pathfx_G)) + "\n")
    pathfx_output_info_file.write("Number of Connected Components: " + str(nx.number_connected_components(pathfx_G)) + "\n \n \n \n")

    pathfx_output_info_file.write("Top 25 Proteins with Most Neighbors - Weighted Edge Scores \n")
    pathfx_output_info_file.write("--------------------------------------------------------------------------------------------- \n")
    pathfx_output_info_file.write(pathfx_source_to_target_weight_stats_df.head(25).to_string())
    pathfx_output_info_file.write(" ")

    pathfx_output_info_file.close()  

In [21]:
input_comparison_network_analysis_folder = "inputs/input_comparison_network_analysis/"
check_directory_exists(input_comparison_network_analysis_folder)

input_comparison_network_analysis_pathfx_folder = "inputs/input_comparison_network_analysis/pathfx/"
check_directory_exists(input_comparison_network_analysis_pathfx_folder)

save_to_gpickle_file(input_comparison_network_analysis_pathfx_folder, pathfx_G, "pathfx_G")

save_to_csv_file(input_comparison_network_analysis_pathfx_folder, pathfx_score_df, "pathfx_score_table")
save_to_csv_file(input_comparison_network_analysis_pathfx_folder, pathfx_num_target_per_source_stats_df, "pathfx_num_target_per_source_stats_table", input_index=True)
save_to_csv_file(input_comparison_network_analysis_pathfx_folder, pathfx_removed_cross_pair_df, "pathfx_removed_cross_pair_pathfx_table")
save_to_csv_file(input_comparison_network_analysis_pathfx_folder, pathfx_all_weight_stats_df, "pathfx_all_weight_stats_table", input_index=True)
save_to_csv_file(input_comparison_network_analysis_pathfx_folder, pathfx_source_to_target_weight_stats_df, "pathfx_source_to_target_weight_stats_table")

Constructed and saved inputs/input_comparison_network_analysis/pathfx/pathfx_G.gpickle
Constructed and saved inputs/input_comparison_network_analysis/pathfx/pathfx_score_table.csv
Constructed and saved inputs/input_comparison_network_analysis/pathfx/pathfx_num_target_per_source_stats_table.csv
Constructed and saved inputs/input_comparison_network_analysis/pathfx/pathfx_removed_cross_pair_pathfx_table.csv
Constructed and saved inputs/input_comparison_network_analysis/pathfx/pathfx_all_weight_stats_table.csv
Constructed and saved inputs/input_comparison_network_analysis/pathfx/pathfx_source_to_target_weight_stats_table.csv
