In [1]:
import networkx as nx
import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('fivethirtyeight')
from matplotlib import pyplot
import seaborn as sns
sns.set()
sns.set_context("talk")
import numpy as np
import pandas as pd
import pickle 
import os
import sys
import shutil
# Run this to suppress all DeprecationWarnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
sns.set_theme()
np.random.seed(0)

In [2]:
"""
Helper Functions
"""

# function checks if directory exists, if not it constructs it
def check_directory_exists(dir_name):
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

# function saves DataFrame, list, or set as a textfile in a specific folder
def save_to_text_file(output_folder_dest, input_data, text_file_name):
    text_file_ouput = output_folder_dest + text_file_name + ".txt"
    drug_output_info_file = open(text_file_ouput, 'w+')
    if isinstance(input_data, pd.DataFrame):
        drug_output_info_file.write(input_data.to_string())
    else:
        drug_output_info_file.write(str(input_data))
    drug_output_info_file.close() 
    print("Constructed and saved", text_file_ouput)

# Read in Pickle File
def read_pickle_file(file_path):
    if not os.path.exists(file_path):
        sys.exit("Can't locate input file %s" % file_path)
    return pd.read_pickle(file_path)
    
# Save data into a pickel file
def save_to_pickle_file(output_folder_dest, dict_data, dict_file_name):
    output_dict_filename = output_folder_dest + dict_file_name + '.pkl'
    with open(output_dict_filename, 'wb') as handle:
        pickle.dump(dict_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print("Constructed and saved", output_dict_filename)

# Read in a CSV file
def read_csv_file(file_path, input_sep=',', input_delimiter=None, input_index_col=None, input_dtype=None, input_delim_whitespace=False, input_low_memory=True):
    if not os.path.exists(file_path):
        sys.exit("Can't locate input file %s" % file_path)
    return pd.read_csv(filepath_or_buffer=file_path, sep=input_sep, delimiter=None, index_col=input_index_col, dtype=input_dtype, delim_whitespace=input_delim_whitespace, low_memory=input_low_memory)

# funcion saves DataFrame or list to as a textfile
def save_to_csv_file(output_folder_dest, df, csv_file_name, save_with_indexing=False):
    output_filename = output_folder_dest + csv_file_name + ".csv"
    df.to_csv(output_filename, index=save_with_indexing)
    print("Constructed and saved", output_filename)
    
def read_csv_file(file_path, input_sep=',', input_delimiter=None, input_index_col=None, input_dtype=None, input_delim_whitespace=False, input_low_memory=True):
    return pd.read_csv(filepath_or_buffer=file_path, sep=input_sep, delimiter=None, index_col=input_index_col, dtype=input_dtype, delim_whitespace=input_delim_whitespace, low_memory=input_low_memory)

def save_to_gpickle_file(output_folder_dest, data, file_name):
    output_filename = output_folder_dest + file_name + ".gpickle"
    nx.write_gpickle(data, output_filename)
    print("Constructed and saved", output_filename)
    
def read_networkx_gpickle_file(file_path):
    if not os.path.exists(input_file_name):
        sys.exit("Can't locate input file %s" % file_path)
    return nx.read_gpickle(file_path)

In [3]:
filter_type_dict = {"NOT_FILTERED_":[False, False, False, False], 
                    "FLTERED_FOR_SAME_EDGES_":[True, True, False, False], 
                    "FLTERED_FOR_SAME_NODES_":[True, False, True, False], 
                    "FLTERED_FOR_CLOSE_NODES_":[True, False, False, True]}

output_folder_lst = ['output_pathfx_network_analysis', 'output_string_network_analysis', 'output_comparison_network_analysis']


In [4]:
for filter_type in filter_type_dict.keys():
    
    print()
    print("Nework Analysis for Filter Type:", filter_type)
    print("================================================================================================ \n \n")
    
    filter_data, filter_for_same_edges, filter_for_same_nodes, filter_for_close_nodes = filter_type_dict[filter_type]
    
    print("Running clean_pathfx_score_table.ipynb ... \n")
    %run ./clean_pathfx_score_table.ipynb
    print("Finished Running clean_pathfx_score_table.ipynb \n")

    print("Running clean_string_score_table.ipynb ... \n")
    %run ./clean_string_score_table.ipynb
    print("Finished Running clean_string_score_table.ipynb \n")

    if(filter_data):
        print("Running filter_string_and_pathfx_score_tables.ipynb ... \n")
        %run ./filter_string_and_pathfx_score_tables.ipynb
        print("Finished Running filter_string_and_pathfx_score_tables.ipynb \n")

    print("Running pathfx_network_analysis.ipynb ... \n")
    %run ./pathfx_network_analysis.ipynb
    print("Finished Running pathfx_network_analysis.ipynb \n")

    print("Running string_network_analysis.ipynb ... \n")
    %run ./string_network_analysis.ipynb
    print("Finished Running string_network_analysis.ipynb \n")

    print("Running comparison_network_analysis.ipynb ... \n")
    %run ./comparison_network_analysis.ipynb
    print("Finished Running comparison_network_analysis.ipynb \n")

    print("Finished Running all iPython Notebooks \n")
    
    print("Saving Output Folders...")
    
    for output_folder in output_folder_lst:

        custom_folder_name = "outputs/" + filter_type + output_folder
    
        if os.path.exists(custom_folder_name):
    
            try:
                shutil.rmtree(custom_folder_name)
            except OSError as e:
                print("Error: %s : %s" % (custom_folder_name, e.strerror))

        os.rename("outputs/" + output_folder, custom_folder_name)
        
        print("Folder " + output_folder + " renamed to " + custom_folder_name + " successfully")
    
    print()
    print("================================================================================================ \n \n")


Nework Analysis for Filter Type: NOT_FILTERED_
 

Running clean_pathfx_score_table.ipynb ... 




'pre_pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
139082,VCAN,SELP,0.279930
139083,NFIC,NFIB,0.279930
139084,NFIB,NFIX,0.279930
139085,SELK,SELK,0.416595



Nonself Records: 


'pre_pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
139075,SFTPA2,SFTPA1,0.359348
139080,SELL,VCAN,0.279930
139082,VCAN,SELP,0.279930
139083,NFIC,NFIB,0.279930


Outputting Number of Distinct Proteins
------------------------------------------------
Number of Unique sources: 17602
Number of Unique targets: 18089
Number of Unique sources and targets: 23970



'mirror_copy_pre_pathfx_score_df'

Unnamed: 0,source,target,weight
0,isoproterenol,rs1042713,0.233675
1,Ace Inhibitors,rs1042713,0.285181
2,Angiotensin,rs1042713,0.285181
3,Plain,rs1042713,0.285181
4,risperidone,rs1042713,0.285181
...,...,...,...
139075,SFTPA1,SFTPA2,0.359348
139080,VCAN,SELL,0.279930
139082,SELP,VCAN,0.279930
139083,NFIB,NFIC,0.279930






'pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
139075,SFTPA1,SFTPA2,0.359348
139080,VCAN,SELL,0.279930
139082,SELP,VCAN,0.279930
139083,NFIB,NFIC,0.279930




Outputting Number of Distinct Proteins
------------------------------------------------
Number of Unique sources: 23970
Number of Unique targets: 23970
Number of Unique sources and targets: 23970

Constructed and saved outputs/output_clean_pathfx_score_table/pathfx_score_table.csv
Constructed and saved inputs/input_pathfx_network_analysis/pathfx_score_table.csv
Constructed and saved inputs/input_filter_string_and_pathfx_score_tables/pathfx_score_table.csv
Finished Running clean_pathfx_score_table.ipynb 

Running clean_string_score_table.ipynb ... 




'string_pre_score_df'

Unnamed: 0,Protein 1 ID,Protein 2 ID,experiments,database,textmining,Physical Combined Score
0,9606.ENSP00000000233,9606.ENSP00000272298,0,0,0,41
1,9606.ENSP00000000233,9606.ENSP00000253401,0,0,0,41
2,9606.ENSP00000000233,9606.ENSP00000401445,0,0,0,41
3,9606.ENSP00000000233,9606.ENSP00000418915,0,0,542,542
4,9606.ENSP00000000233,9606.ENSP00000327801,0,0,0,41
...,...,...,...,...,...,...
11759449,9606.ENSP00000485678,9606.ENSP00000310488,0,0,0,41
11759450,9606.ENSP00000485678,9606.ENSP00000342448,0,0,0,41
11759451,9606.ENSP00000485678,9606.ENSP00000350222,0,0,0,41
11759452,9606.ENSP00000485678,9606.ENSP00000367590,0,900,0,900


'string_info_df'

Unnamed: 0,Protein ID,Protein,protein_size,annotation
0,9606.ENSP00000000233,ARF5,180,ADP-ribosylation factor 5; GTP-binding protein...
1,9606.ENSP00000000412,M6PR,277,Cation-dependent mannose-6-phosphate receptor;...
2,9606.ENSP00000001008,FKBP4,459,Peptidyl-prolyl cis-trans isomerase FKBP4; Imm...
3,9606.ENSP00000001146,CYP26B1,512,Cytochrome P450 26B1; Involved in the metaboli...
4,9606.ENSP00000002125,NDUFAF7,441,"Protein arginine methyltransferase NDUFAF7, mi..."
...,...,...,...,...
19561,9606.ENSP00000485671,ENSG00000280273,120,HCG1991042
19562,9606.ENSP00000485672,ENSG00000279458,86,annotation not available
19563,9606.ENSP00000485673,ENSG00000279988,243,annotation not available
19564,9606.ENSP00000485675,ENSG00000280116,84,annotation not available






'string_score_df'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
0,ARF5,9606.ENSP00000000233,CALM2,9606.ENSP00000272298,0,0,0,41
1,FKBP4,9606.ENSP00000001008,CALM2,9606.ENSP00000272298,0,0,0,41
2,CYP51A1,9606.ENSP00000003100,CALM2,9606.ENSP00000272298,0,0,0,41
3,PDK4,9606.ENSP00000005178,CALM2,9606.ENSP00000272298,0,0,104,104
4,RALA,9606.ENSP00000005257,CALM2,9606.ENSP00000272298,313,0,0,313
...,...,...,...,...,...,...,...,...
11759449,DUX4L7,9606.ENSP00000451411,ENSG00000274175,9606.ENSP00000479378,0,0,560,560
11759450,DUX4,9606.ENSP00000458065,ENSG00000274175,9606.ENSP00000479378,0,0,556,556
11759451,DUX4L8,9606.ENSP00000485452,ENSG00000274175,9606.ENSP00000479378,0,0,559,559
11759452,DYNLL2,9606.ENSP00000477310,C17orf47,9606.ENSP00000354874,212,0,0,212






'self_interacting_protein'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
2674693,ENSG00000243667,9606.ENSP00000295121,ENSG00000243667,9606.ENSP00000477980,800,0,0,800
7375638,ENSG00000216937,9606.ENSP00000364165,ENSG00000216937,9606.ENSP00000355078,0,0,0,41
7409667,ENSG00000216937,9606.ENSP00000355078,ENSG00000216937,9606.ENSP00000364165,0,0,0,41
8264678,ENSG00000243667,9606.ENSP00000477980,ENSG00000243667,9606.ENSP00000295121,800,0,0,800






'non_self_interacting_protein'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
0,ARF5,9606.ENSP00000000233,CALM2,9606.ENSP00000272298,0,0,0,41
1,FKBP4,9606.ENSP00000001008,CALM2,9606.ENSP00000272298,0,0,0,41
2,CYP51A1,9606.ENSP00000003100,CALM2,9606.ENSP00000272298,0,0,0,41
3,PDK4,9606.ENSP00000005178,CALM2,9606.ENSP00000272298,0,0,104,104
4,RALA,9606.ENSP00000005257,CALM2,9606.ENSP00000272298,313,0,0,313
...,...,...,...,...,...,...,...,...
11759449,DUX4L7,9606.ENSP00000451411,ENSG00000274175,9606.ENSP00000479378,0,0,560,560
11759450,DUX4,9606.ENSP00000458065,ENSG00000274175,9606.ENSP00000479378,0,0,556,556
11759451,DUX4L8,9606.ENSP00000485452,ENSG00000274175,9606.ENSP00000479378,0,0,559,559
11759452,DYNLL2,9606.ENSP00000477310,C17orf47,9606.ENSP00000354874,212,0,0,212






'proteins_interac_unique_ids'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0,0,0,41,41,41
1,FKBP4,CALM2,0,0,0,41,41,41
2,CYP51A1,CALM2,0,0,0,41,41,41
3,PDK4,CALM2,0,0,104,104,104,104
4,RALA,CALM2,313,0,0,313,313,313
...,...,...,...,...,...,...,...,...
11759449,DUX4L7,ENSG00000274175,0,0,560,560,560,560
11759450,DUX4,ENSG00000274175,0,0,556,556,556,556
11759451,DUX4L8,ENSG00000274175,0,0,559,559,559,559
11759452,DYNLL2,C17orf47,212,0,0,212,212,212






'proteins_interac_nonunique_ids'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
720,ENSG00000258947,9606.ENSP00000320295,CALM2,9606.ENSP00000272298,0,0,94,94
1737,ENSG00000258947,9606.ENSP00000451560,CALM2,9606.ENSP00000272298,0,0,84,84
1816,ENSG00000243667,9606.ENSP00000477980,CALM2,9606.ENSP00000272298,0,0,0,41
2128,ENSG00000258947,9606.ENSP00000320295,ARHGEF9,9606.ENSP00000253401,0,0,0,41
2524,ENSG00000258947,9606.ENSP00000451560,ARHGEF9,9606.ENSP00000253401,0,0,0,41
...,...,...,...,...,...,...,...,...
11724878,PADI6,9606.ENSP00000483125,ENSG00000239810,9606.ENSP00000480027,0,0,257,256
11724879,PRAMEF7,9606.ENSP00000484237,ENSG00000239810,9606.ENSP00000480027,0,0,0,41
11724880,PRAMEF25,9606.ENSP00000485258,ENSG00000239810,9606.ENSP00000480027,0,0,0,41
11724881,DUX4L8,9606.ENSP00000485452,ENSG00000239810,9606.ENSP00000480027,0,0,420,420






Unnamed: 0,Protein 1,Protein 2,Protein 1 ID,Protein 2 ID,experiments,database,textmining,Physical Combined Score
5269,ENSG00000258947,ENSG00000166160,3,3,3,3,3,3
1540,ENSG00000166160,ENSG00000258947,3,3,3,3,3,3
5415,ENSG00000258947,GFRA2,2,2,2,2,2,2
4717,ENSG00000258947,ANKS1B,2,2,2,2,2,2
8027,LETM1,ENSG00000183628,2,2,2,2,2,2
...,...,...,...,...,...,...,...,...
4086,ENSG00000243667,PRKAR1B,1,1,1,1,1,1
4085,ENSG00000243667,PRKAR1A,1,1,1,1,1,1
4084,ENSG00000243667,PRKACG,1,1,1,1,1,1
4083,ENSG00000243667,PRKACB,1,1,1,1,1,1



1    5608
2    5220
3       2
Name: Physical Combined Score, dtype: int64




'corrected_proteins_interac_nonunique_ids'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,339010,ENSG00000243667,,,,,41,41.0
1,339010,ENSG00000258947,,,,,41,41.0
2,AAMDC,ENSG00000243667,,,,,41,41.0
3,ABCA4,ENSG00000166160,,,,,145,145.0
4,ABCB1,ENSG00000258947,,,,,329,307.5
...,...,...,...,...,...,...,...,...
10825,ZSCAN5A,ENSG00000239810,,,,,268,267.0
10826,ZSWIM3,ENSG00000197054,,,,,214,214.0
10827,ZSWIM3,ENSG00000239810,,,,,215,213.5
10828,ZSWIM7,ENSG00000166160,,,,,41,41.0






'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.0,0.0,0.0,41.0,41,41.0
1,FKBP4,CALM2,0.0,0.0,0.0,41.0,41,41.0
2,CYP51A1,CALM2,0.0,0.0,0.0,41.0,41,41.0
3,PDK4,CALM2,0.0,0.0,104.0,104.0,104,104.0
4,RALA,CALM2,313.0,0.0,0.0,313.0,313,313.0
...,...,...,...,...,...,...,...,...
10825,ZSCAN5A,ENSG00000239810,,,,,268,267.0
10826,ZSWIM3,ENSG00000197054,,,,,214,214.0
10827,ZSWIM3,ENSG00000239810,,,,,215,213.5
10828,ZSWIM7,ENSG00000166160,,,,,41,41.0






'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
1,FKBP4,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
2,CYP51A1,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
3,PDK4,CALM2,0.000,0.0,0.104,0.104,0.104,0.1040
4,RALA,CALM2,0.313,0.0,0.000,0.313,0.313,0.3130
...,...,...,...,...,...,...,...,...
10825,ZSCAN5A,ENSG00000239810,,,,,0.268,0.2670
10826,ZSWIM3,ENSG00000197054,,,,,0.214,0.2140
10827,ZSWIM3,ENSG00000239810,,,,,0.215,0.2135
10828,ZSWIM7,ENSG00000166160,,,,,0.041,0.0410




Printing out counting statistics for entire filtering and correction process
-----------------------------------------------------------------------------
Number of Original Records: 11759454
Number of Self-Interacting Protein Records: 4
Number of Non-self Interacting Protein Records: 11759450

Number of Records containing Non-self Interacting Unique Protein ID: 11743396
Number of Records containing Non-self Interacting Non-unique Protein ID: 16054
Number of Records containing Non-self Interacting Non-unique Protein ID (Corrected): 10830

Number of Records in Final Table: 11754226
Number of Records reduced: 5228

Constructed and saved outputs/output_clean_string_score_table/string_score_table.csv
Constructed and saved inputs/input_string_network_analysis/string_score_table.csv
Constructed and saved inputs/input_filter_string_and_pathfx_score_tables/string_score_table.csv
Finished Running clean_string_score_table.ipynb 

Running pathfx_network_analysis.ipynb ... 




'pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
273057,SFTPA1,SFTPA2,0.359348
273058,VCAN,SELL,0.279930
273059,SELP,VCAN,0.279930
273060,NFIB,NFIC,0.279930




Outputting Number of Distinct Proteins
------------------------------------------------
Number of Unique sources: 23970
Number of Unique targets: 23970
Number of Unique sources and targets: 23970




'pathfx_source_to_target_weight_stats_df'

Unnamed: 0,source,count,mean,std,max,min,sum
15441,UBC,6753,0.528777,0.132470,0.935012,0.279930,3570.832522
695,APP,1984,0.287942,0.040666,0.705007,0.246744,571.276135
13900,SUMO2,713,0.429295,0.086484,0.818355,0.246936,306.087256
14904,TP53,627,0.417612,0.124118,0.990000,0.246936,261.842982
23067,tat,502,0.321185,0.081879,0.770889,0.246744,161.235009
...,...,...,...,...,...,...,...
12591,SAMD1,1,0.323872,,0.323872,0.323872,0.323872
12590,SAM1,1,0.279930,,0.279930,0.279930,0.279930
12589,SAM-2,1,0.279930,,0.279930,0.279930,0.279930
12588,SALL4,1,0.380279,,0.380279,0.380279,0.380279






'pathfx_num_target_per_source_stats_df'

Unnamed: 0,Number Target Interacton Per Source
count,23970.0
mean,11.391823
std,51.4994
max,6753.0
min,1.0
sum,273062.0






'pathfx_num_interac_distribution_df'

Unnamed: 0,Number of Protein Interactions,count
0,1,5548
1,2,3582
2,3,3276
3,4,1710
4,5,1148
...,...,...
191,326,1
190,280,1
189,318,1
188,281,1






'pathfx_edge_score_distribution_df'

Unnamed: 0,Edge Score,count
0,0.279930,38181
1,0.323872,18816
2,0.380279,11701
3,0.471716,11610
4,0.346946,7669
...,...,...
729,0.690513,1
728,0.698414,1
727,0.681025,1
726,0.775171,1






'pathfx_removed_cross_pair_df'

Unnamed: 0,source,target,weight
7,rs1042713,spironolactone,0.285181
10,rs1042713,salbutamol,0.659492
12,rs1042713,salmeterol,0.659492
13,rs1042713,tiotropium,0.285181
20,rs1042714,terbutaline,0.233675
...,...,...,...
273052,STAT1,STAT3,0.354509
273053,STAT1,STAT2,0.305411
273057,SFTPA1,SFTPA2,0.359348
273059,SELP,VCAN,0.279930






'pathfx_all_weight_stats_df'

Unnamed: 0,weight
count,136531.0
mean,0.386598
std,0.131861
max,0.99
min,0.233675
sum,52782.576648



Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_score_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_all_weight_stats_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_num_target_per_source_stats_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_source_to_target_weight_stats.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_num_interac_distribution_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_removed_cross_pair_pathfx_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/visual_graphs/Distribution of the Number of Protein Interactions per Protein in PathFX_histplot.png
Constructed and saved outputs/output_pathfx_network_analysis/visual_graphs/Distribution of the Number of Protein Interactions per Protein in PathFX (Zoomed)_histplot.png
Constructed an

'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
1,FKBP4,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
2,CYP51A1,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
3,PDK4,CALM2,0.000,0.0,0.104,0.104,0.104,0.1040
4,RALA,CALM2,0.313,0.0,0.000,0.313,0.313,0.3130
...,...,...,...,...,...,...,...,...
11754221,ZSCAN5A,ENSG00000239810,,,,,0.268,0.2670
11754222,ZSWIM3,ENSG00000197054,,,,,0.214,0.2140
11754223,ZSWIM3,ENSG00000239810,,,,,0.215,0.2135
11754224,ZSWIM7,ENSG00000166160,,,,,0.041,0.0410






'string_num_protein_per_protein_stats_df'

Unnamed: 0,Number Protein Interaction Per Protein
count,19344.0
mean,607.642
std,528.7327
max,7644.0
min,1.0
sum,11754230.0






'string_removed_cross_pair_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.0,0.0,0.000,0.041,0.041,0.0410
18,ACTL6B,CALM2,0.0,0.0,0.000,0.041,0.041,0.0410
26,AQP2,CALM2,0.0,0.0,0.051,0.050,0.050,0.0500
31,APOH,CALM2,0.0,0.0,0.000,0.041,0.041,0.0410
40,ANKRD54,CALM2,0.0,0.0,0.210,0.210,0.210,0.2100
...,...,...,...,...,...,...,...,...
11750292,ENSG00000258947,ZNF445,,,,,0.138,0.0895
11750293,ENSG00000258947,ZNF516,,,,,0.093,0.0875
11750294,ENSG00000258947,ZNF607,,,,,0.169,0.1595
11750295,ENSG00000258947,ZNRF1,,,,,0.041,0.0410






'string_all_max_physical_combined_scores_stats_df'

Unnamed: 0,Max Physical Combined Score
count,5877113.0
mean,0.1836416
std,0.2219951
max,0.999
min,0.041
sum,1079283.0






'string_all_avg_physical_combined_scores_stats_df'

Unnamed: 0,Avg Physical Combined Score
count,5877113.0
mean,0.1836376
std,0.2219929
max,0.999
min,0.041
sum,1079259.0



Constructed and saved outputs/output_string_network_analysis/csv_files/string_score_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_num_protein_per_protein_stats_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_removed_cross_pair_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_all_max_physical_combined_scores_stats_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_all_avg_physical_combined_scores_stats_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/summary_scores/string_summary_score_stats_for_experiments_per_protein.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/summary_scores/string_summary_score_stats_for_database_per_protein.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/summary_scores/string_summary_score_stats_for_textmining_per_protein

'string_summary_score_stats_for_avg_physical_combined_score_df'

Unnamed: 0,Protein ID,count,mean,std,max,min,sum
0,GAPDH,7644,0.241637,0.150712,0.994,0.041,1847.0700
1,AKT1,6507,0.300910,0.222489,0.999,0.041,1958.0225
2,TP53,6193,0.354423,0.240752,0.999,0.041,2194.9425
3,INS,6053,0.320572,0.201611,0.998,0.041,1940.4200
4,MYC,5786,0.294266,0.186558,0.999,0.041,1702.6225
...,...,...,...,...,...,...,...
19339,PGBD3,1,0.532000,,0.532,0.532,0.5320
19340,CCDC179,1,0.198000,,0.198,0.198,0.1980
19341,FAM90A26,1,0.955000,,0.955,0.955,0.9550
19342,C18orf42,1,0.196000,,0.196,0.196,0.1960




STRING Database
-------------------------------------------
Number of Unique Protein 1s: 19344
Number of Unique Protein 2s: 19344
Number of Unique Protein: 19344


Constructing Network graph for Protein-Protein Combined score interaction (Avg Physical Combined Score) 

Saving Constructed Network graph for Protein-Protein Combined score interaction (Avg Physical Combined Score)

STRING Database NetworkX Graph
-------------------------------------
Number of Nodes: 19344
Number of Edges: 5877113
Average node degree: 607.6419561621175
Standard Deviation node degree: 528.732679726321
Max node degree: 7644.0
Min node degree: 1.0
Average edge weight: 0.18363761665666914
Standard Deviation node degree: 0.2219928864881866
Max edge weight: 0.999
Min edge weight: 0.041
nx Graph is connected - True
Number of Connected Components: 1

Top 25 Proteins with Most Neighbors - Avg Physical Combined Scored
-------------------------------------------------------------------------------------
   Protein I

'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
1,FKBP4,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
2,CYP51A1,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
3,PDK4,CALM2,0.000,0.0,0.104,0.104,0.104,0.1040
4,RALA,CALM2,0.313,0.0,0.000,0.313,0.313,0.3130
...,...,...,...,...,...,...,...,...
11754221,ZSCAN5A,ENSG00000239810,,,,,0.268,0.2670
11754222,ZSWIM3,ENSG00000197054,,,,,0.214,0.2140
11754223,ZSWIM3,ENSG00000239810,,,,,0.215,0.2135
11754224,ZSWIM7,ENSG00000166160,,,,,0.041,0.0410





'string_num_protein_per_protein_stats_df'

Unnamed: 0,Number Protein Interaction Per Protein
count,19344.0
mean,607.642
std,528.7327
max,7644.0
min,1.0
sum,11754230.0





'string_removed_cross_pair_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.0,0.0,0.000,0.041,0.041,0.0410
1,ACTL6B,CALM2,0.0,0.0,0.000,0.041,0.041,0.0410
2,AQP2,CALM2,0.0,0.0,0.051,0.050,0.050,0.0500
3,APOH,CALM2,0.0,0.0,0.000,0.041,0.041,0.0410
4,ANKRD54,CALM2,0.0,0.0,0.210,0.210,0.210,0.2100
...,...,...,...,...,...,...,...,...
5877108,ENSG00000258947,ZNF445,,,,,0.138,0.0895
5877109,ENSG00000258947,ZNF516,,,,,0.093,0.0875
5877110,ENSG00000258947,ZNF607,,,,,0.169,0.1595
5877111,ENSG00000258947,ZNRF1,,,,,0.041,0.0410





'string_all_avg_physical_combined_scores_stats_df'

Unnamed: 0,Avg Physical Combined Score
count,5877113.0
mean,0.1836376
std,0.2219929
max,0.999
min,0.041
sum,1079259.0





'string_summary_score_stats_for_avg_physical_combined_score_df'

Unnamed: 0,Protein ID,count,mean,std,max,min,sum
0,GAPDH,7644,0.241637,0.150712,0.994,0.041,1847.0700
1,AKT1,6507,0.300910,0.222489,0.999,0.041,1958.0225
2,TP53,6193,0.354423,0.240752,0.999,0.041,2194.9425
3,INS,6053,0.320572,0.201611,0.998,0.041,1940.4200
4,MYC,5786,0.294266,0.186558,0.999,0.041,1702.6225
...,...,...,...,...,...,...,...
19339,PGBD3,1,0.532000,,0.532,0.532,0.5320
19340,CCDC179,1,0.198000,,0.198,0.198,0.1980
19341,FAM90A26,1,0.955000,,0.955,0.955,0.9550
19342,C18orf42,1,0.196000,,0.196,0.196,0.1960





'pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
273057,SFTPA1,SFTPA2,0.359348
273058,VCAN,SELL,0.279930
273059,SELP,VCAN,0.279930
273060,NFIB,NFIC,0.279930





'pathfx_num_target_per_source_stats_df'

Unnamed: 0,Number Target Interacton Per Source
count,23970.0
mean,11.391823
std,51.4994
max,6753.0
min,1.0
sum,273062.0





'pathfx_removed_cross_pair_df'

Unnamed: 0,source,target,weight
0,rs1042713,spironolactone,0.285181
1,rs1042713,salbutamol,0.659492
2,rs1042713,salmeterol,0.659492
3,rs1042713,tiotropium,0.285181
4,rs1042714,terbutaline,0.233675
...,...,...,...
136526,STAT1,STAT3,0.354509
136527,STAT1,STAT2,0.305411
136528,SFTPA1,SFTPA2,0.359348
136529,SELP,VCAN,0.279930





'pathfx_all_weight_stats_df'

Unnamed: 0,weight
count,136531.0
mean,0.386598
std,0.131861
max,0.99
min,0.233675
sum,52782.576648





'pathfx_source_to_target_weight_stats_df'

Unnamed: 0,source,count,mean,std,max,min,sum
0,UBC,6753,0.528777,0.132470,0.935012,0.279930,3570.832522
1,APP,1984,0.287942,0.040666,0.705007,0.246744,571.276135
2,SUMO2,713,0.429295,0.086484,0.818355,0.246936,306.087256
3,TP53,627,0.417612,0.124118,0.990000,0.246936,261.842982
4,tat,502,0.321185,0.081879,0.770889,0.246744,161.235009
...,...,...,...,...,...,...,...
23965,SAMD1,1,0.323872,,0.323872,0.323872,0.323872
23966,SAM1,1,0.279930,,0.279930,0.279930,0.279930
23967,SAM-2,1,0.279930,,0.279930,0.279930,0.279930
23968,SALL4,1,0.380279,,0.380279,0.380279,0.380279




STRING Database
-------------------------------------------
Number of Unique Protein 1s: 19344
Number of Unique Protein 2s: 19344
Number of Unique Protein: 19344


PathFX Interactome
-------------------------------------------
Number of Unique sources: 23970
Number of Unique targets: 23970
Number of Unique sources and targets: 23970


Both STRING and PathFX
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 13948
Number of distinct STRING nodes: 5396
Number of distinct PathFX nodes: 10022


Comparison of NODES in STRING and PathFX NetworkX Graphs
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 13948
Number of distinct STRING nodes: 5396
Number of distinct PathFX nodes: 10022


Comparison of EDGES in STRING and PathFX NetworkX Graphs
----------------------------------------------------------------
Number of Shared Common Edges from both graph n

'pre_pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
139082,VCAN,SELP,0.279930
139083,NFIC,NFIB,0.279930
139084,NFIB,NFIX,0.279930
139085,SELK,SELK,0.416595



Nonself Records: 


'pre_pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
139075,SFTPA2,SFTPA1,0.359348
139080,SELL,VCAN,0.279930
139082,VCAN,SELP,0.279930
139083,NFIC,NFIB,0.279930


Outputting Number of Distinct Proteins
------------------------------------------------
Number of Unique sources: 17602
Number of Unique targets: 18089
Number of Unique sources and targets: 23970



'mirror_copy_pre_pathfx_score_df'

Unnamed: 0,source,target,weight
0,isoproterenol,rs1042713,0.233675
1,Ace Inhibitors,rs1042713,0.285181
2,Angiotensin,rs1042713,0.285181
3,Plain,rs1042713,0.285181
4,risperidone,rs1042713,0.285181
...,...,...,...
139075,SFTPA1,SFTPA2,0.359348
139080,VCAN,SELL,0.279930
139082,SELP,VCAN,0.279930
139083,NFIB,NFIC,0.279930






'pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
139075,SFTPA1,SFTPA2,0.359348
139080,VCAN,SELL,0.279930
139082,SELP,VCAN,0.279930
139083,NFIB,NFIC,0.279930




Outputting Number of Distinct Proteins
------------------------------------------------
Number of Unique sources: 23970
Number of Unique targets: 23970
Number of Unique sources and targets: 23970

Constructed and saved outputs/output_clean_pathfx_score_table/pathfx_score_table.csv
Constructed and saved inputs/input_pathfx_network_analysis/pathfx_score_table.csv
Constructed and saved inputs/input_filter_string_and_pathfx_score_tables/pathfx_score_table.csv
Finished Running clean_pathfx_score_table.ipynb 

Running clean_string_score_table.ipynb ... 




'string_pre_score_df'

Unnamed: 0,Protein 1 ID,Protein 2 ID,experiments,database,textmining,Physical Combined Score
0,9606.ENSP00000000233,9606.ENSP00000272298,0,0,0,41
1,9606.ENSP00000000233,9606.ENSP00000253401,0,0,0,41
2,9606.ENSP00000000233,9606.ENSP00000401445,0,0,0,41
3,9606.ENSP00000000233,9606.ENSP00000418915,0,0,542,542
4,9606.ENSP00000000233,9606.ENSP00000327801,0,0,0,41
...,...,...,...,...,...,...
11759449,9606.ENSP00000485678,9606.ENSP00000310488,0,0,0,41
11759450,9606.ENSP00000485678,9606.ENSP00000342448,0,0,0,41
11759451,9606.ENSP00000485678,9606.ENSP00000350222,0,0,0,41
11759452,9606.ENSP00000485678,9606.ENSP00000367590,0,900,0,900


'string_info_df'

Unnamed: 0,Protein ID,Protein,protein_size,annotation
0,9606.ENSP00000000233,ARF5,180,ADP-ribosylation factor 5; GTP-binding protein...
1,9606.ENSP00000000412,M6PR,277,Cation-dependent mannose-6-phosphate receptor;...
2,9606.ENSP00000001008,FKBP4,459,Peptidyl-prolyl cis-trans isomerase FKBP4; Imm...
3,9606.ENSP00000001146,CYP26B1,512,Cytochrome P450 26B1; Involved in the metaboli...
4,9606.ENSP00000002125,NDUFAF7,441,"Protein arginine methyltransferase NDUFAF7, mi..."
...,...,...,...,...
19561,9606.ENSP00000485671,ENSG00000280273,120,HCG1991042
19562,9606.ENSP00000485672,ENSG00000279458,86,annotation not available
19563,9606.ENSP00000485673,ENSG00000279988,243,annotation not available
19564,9606.ENSP00000485675,ENSG00000280116,84,annotation not available






'string_score_df'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
0,ARF5,9606.ENSP00000000233,CALM2,9606.ENSP00000272298,0,0,0,41
1,FKBP4,9606.ENSP00000001008,CALM2,9606.ENSP00000272298,0,0,0,41
2,CYP51A1,9606.ENSP00000003100,CALM2,9606.ENSP00000272298,0,0,0,41
3,PDK4,9606.ENSP00000005178,CALM2,9606.ENSP00000272298,0,0,104,104
4,RALA,9606.ENSP00000005257,CALM2,9606.ENSP00000272298,313,0,0,313
...,...,...,...,...,...,...,...,...
11759449,DUX4L7,9606.ENSP00000451411,ENSG00000274175,9606.ENSP00000479378,0,0,560,560
11759450,DUX4,9606.ENSP00000458065,ENSG00000274175,9606.ENSP00000479378,0,0,556,556
11759451,DUX4L8,9606.ENSP00000485452,ENSG00000274175,9606.ENSP00000479378,0,0,559,559
11759452,DYNLL2,9606.ENSP00000477310,C17orf47,9606.ENSP00000354874,212,0,0,212






'self_interacting_protein'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
2674693,ENSG00000243667,9606.ENSP00000295121,ENSG00000243667,9606.ENSP00000477980,800,0,0,800
7375638,ENSG00000216937,9606.ENSP00000364165,ENSG00000216937,9606.ENSP00000355078,0,0,0,41
7409667,ENSG00000216937,9606.ENSP00000355078,ENSG00000216937,9606.ENSP00000364165,0,0,0,41
8264678,ENSG00000243667,9606.ENSP00000477980,ENSG00000243667,9606.ENSP00000295121,800,0,0,800






'non_self_interacting_protein'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
0,ARF5,9606.ENSP00000000233,CALM2,9606.ENSP00000272298,0,0,0,41
1,FKBP4,9606.ENSP00000001008,CALM2,9606.ENSP00000272298,0,0,0,41
2,CYP51A1,9606.ENSP00000003100,CALM2,9606.ENSP00000272298,0,0,0,41
3,PDK4,9606.ENSP00000005178,CALM2,9606.ENSP00000272298,0,0,104,104
4,RALA,9606.ENSP00000005257,CALM2,9606.ENSP00000272298,313,0,0,313
...,...,...,...,...,...,...,...,...
11759449,DUX4L7,9606.ENSP00000451411,ENSG00000274175,9606.ENSP00000479378,0,0,560,560
11759450,DUX4,9606.ENSP00000458065,ENSG00000274175,9606.ENSP00000479378,0,0,556,556
11759451,DUX4L8,9606.ENSP00000485452,ENSG00000274175,9606.ENSP00000479378,0,0,559,559
11759452,DYNLL2,9606.ENSP00000477310,C17orf47,9606.ENSP00000354874,212,0,0,212






'proteins_interac_unique_ids'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0,0,0,41,41,41
1,FKBP4,CALM2,0,0,0,41,41,41
2,CYP51A1,CALM2,0,0,0,41,41,41
3,PDK4,CALM2,0,0,104,104,104,104
4,RALA,CALM2,313,0,0,313,313,313
...,...,...,...,...,...,...,...,...
11759449,DUX4L7,ENSG00000274175,0,0,560,560,560,560
11759450,DUX4,ENSG00000274175,0,0,556,556,556,556
11759451,DUX4L8,ENSG00000274175,0,0,559,559,559,559
11759452,DYNLL2,C17orf47,212,0,0,212,212,212






'proteins_interac_nonunique_ids'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
720,ENSG00000258947,9606.ENSP00000320295,CALM2,9606.ENSP00000272298,0,0,94,94
1737,ENSG00000258947,9606.ENSP00000451560,CALM2,9606.ENSP00000272298,0,0,84,84
1816,ENSG00000243667,9606.ENSP00000477980,CALM2,9606.ENSP00000272298,0,0,0,41
2128,ENSG00000258947,9606.ENSP00000320295,ARHGEF9,9606.ENSP00000253401,0,0,0,41
2524,ENSG00000258947,9606.ENSP00000451560,ARHGEF9,9606.ENSP00000253401,0,0,0,41
...,...,...,...,...,...,...,...,...
11724878,PADI6,9606.ENSP00000483125,ENSG00000239810,9606.ENSP00000480027,0,0,257,256
11724879,PRAMEF7,9606.ENSP00000484237,ENSG00000239810,9606.ENSP00000480027,0,0,0,41
11724880,PRAMEF25,9606.ENSP00000485258,ENSG00000239810,9606.ENSP00000480027,0,0,0,41
11724881,DUX4L8,9606.ENSP00000485452,ENSG00000239810,9606.ENSP00000480027,0,0,420,420






Unnamed: 0,Protein 1,Protein 2,Protein 1 ID,Protein 2 ID,experiments,database,textmining,Physical Combined Score
5269,ENSG00000258947,ENSG00000166160,3,3,3,3,3,3
1540,ENSG00000166160,ENSG00000258947,3,3,3,3,3,3
5415,ENSG00000258947,GFRA2,2,2,2,2,2,2
4717,ENSG00000258947,ANKS1B,2,2,2,2,2,2
8027,LETM1,ENSG00000183628,2,2,2,2,2,2
...,...,...,...,...,...,...,...,...
4086,ENSG00000243667,PRKAR1B,1,1,1,1,1,1
4085,ENSG00000243667,PRKAR1A,1,1,1,1,1,1
4084,ENSG00000243667,PRKACG,1,1,1,1,1,1
4083,ENSG00000243667,PRKACB,1,1,1,1,1,1



1    5608
2    5220
3       2
Name: Physical Combined Score, dtype: int64




'corrected_proteins_interac_nonunique_ids'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,339010,ENSG00000243667,,,,,41,41.0
1,339010,ENSG00000258947,,,,,41,41.0
2,AAMDC,ENSG00000243667,,,,,41,41.0
3,ABCA4,ENSG00000166160,,,,,145,145.0
4,ABCB1,ENSG00000258947,,,,,329,307.5
...,...,...,...,...,...,...,...,...
10825,ZSCAN5A,ENSG00000239810,,,,,268,267.0
10826,ZSWIM3,ENSG00000197054,,,,,214,214.0
10827,ZSWIM3,ENSG00000239810,,,,,215,213.5
10828,ZSWIM7,ENSG00000166160,,,,,41,41.0






'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.0,0.0,0.0,41.0,41,41.0
1,FKBP4,CALM2,0.0,0.0,0.0,41.0,41,41.0
2,CYP51A1,CALM2,0.0,0.0,0.0,41.0,41,41.0
3,PDK4,CALM2,0.0,0.0,104.0,104.0,104,104.0
4,RALA,CALM2,313.0,0.0,0.0,313.0,313,313.0
...,...,...,...,...,...,...,...,...
10825,ZSCAN5A,ENSG00000239810,,,,,268,267.0
10826,ZSWIM3,ENSG00000197054,,,,,214,214.0
10827,ZSWIM3,ENSG00000239810,,,,,215,213.5
10828,ZSWIM7,ENSG00000166160,,,,,41,41.0






'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
1,FKBP4,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
2,CYP51A1,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
3,PDK4,CALM2,0.000,0.0,0.104,0.104,0.104,0.1040
4,RALA,CALM2,0.313,0.0,0.000,0.313,0.313,0.3130
...,...,...,...,...,...,...,...,...
10825,ZSCAN5A,ENSG00000239810,,,,,0.268,0.2670
10826,ZSWIM3,ENSG00000197054,,,,,0.214,0.2140
10827,ZSWIM3,ENSG00000239810,,,,,0.215,0.2135
10828,ZSWIM7,ENSG00000166160,,,,,0.041,0.0410




Printing out counting statistics for entire filtering and correction process
-----------------------------------------------------------------------------
Number of Original Records: 11759454
Number of Self-Interacting Protein Records: 4
Number of Non-self Interacting Protein Records: 11759450

Number of Records containing Non-self Interacting Unique Protein ID: 11743396
Number of Records containing Non-self Interacting Non-unique Protein ID: 16054
Number of Records containing Non-self Interacting Non-unique Protein ID (Corrected): 10830

Number of Records in Final Table: 11754226
Number of Records reduced: 5228

Constructed and saved outputs/output_clean_string_score_table/string_score_table.csv
Constructed and saved inputs/input_string_network_analysis/string_score_table.csv
Constructed and saved inputs/input_filter_string_and_pathfx_score_tables/string_score_table.csv
Finished Running clean_string_score_table.ipynb 

Running filter_string_and_pathfx_score_tables.ipynb ... 




'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
1,FKBP4,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
2,CYP51A1,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
3,PDK4,CALM2,0.000,0.0,0.104,0.104,0.104,0.1040
4,RALA,CALM2,0.313,0.0,0.000,0.313,0.313,0.3130
...,...,...,...,...,...,...,...,...
11754221,ZSCAN5A,ENSG00000239810,,,,,0.268,0.2670
11754222,ZSWIM3,ENSG00000197054,,,,,0.214,0.2140
11754223,ZSWIM3,ENSG00000239810,,,,,0.215,0.2135
11754224,ZSWIM7,ENSG00000166160,,,,,0.041,0.0410





'pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
273057,SFTPA1,SFTPA2,0.359348
273058,VCAN,SELL,0.279930
273059,SELP,VCAN,0.279930
273060,NFIB,NFIC,0.279930




STRING Database
-------------------------------------------
Number of Unique Protein 1s: 19344
Number of Unique Protein 2s: 19344
Number of Unique Protein: 19344

PathFX Interactome
-------------------------------------------
Number of Unique sources: 23970
Number of Unique targets: 23970
Number of Unique sources and targets: 23970

Both STRING and PathFX
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 13948
Number of distinct STRING nodes: 5396
Number of distinct PathFX nodes: 10022


Comparison of NODES in STRING and PathFX NetworkX Graphs
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 13948
Number of distinct STRING nodes: 5396
Number of distinct PathFX nodes: 10022

Comparison of EDGES in STRING and PathFX NetworkX Graphs
----------------------------------------------------------------
Number of Shared Common Edges from both graph netw

'filtered_string_score_df'

Unnamed: 0,source,target,textmining,Physical Combined Score,database,Avg Physical Combined Score,experiments,Max Physical Combined Score
0,ARF5,AP1B1,0.145,0.418,0.0,0.418,0.348,0.418
1,ARF5,RAB11FIP3,0.641,0.767,0.0,0.767,0.379,0.767
2,ARF5,IKBKG,0.197,0.197,0.0,0.197,0.000,0.197
3,ARF5,GGA1,0.293,0.493,0.0,0.493,0.313,0.493
4,ARF5,ARFIP2,0.521,0.656,0.0,0.656,0.313,0.656
...,...,...,...,...,...,...,...,...
72273,ADAMTSL4,ADAMTSL5,0.444,0.946,0.9,0.946,0.110,0.946
72274,EBI3,IL27,0.709,0.980,0.9,0.980,0.379,0.980
72275,MRPL51,MRPL52,0.355,0.997,0.9,0.997,0.960,0.997
72276,SMCP,KRTAP5-6,0.000,0.237,0.0,0.237,0.237,0.237





'filtered_pathfx_score_df'

Unnamed: 0,source,target,weight
0,ADRB2,CALM3,0.471716
1,ADRB2,PPP3CA,0.323872
2,ADRB2,ADCY3,0.490794
3,ADRB2,RGS2,0.375065
4,ADRB2,NEDD4,0.380279
...,...,...,...
72273,CBLN3,CBLN1,0.422287
72274,HCN2,HCN4,0.346946
72275,ZIC2,RNF180,0.495315
72276,BCKDHB,BCKDHA,0.279930





'filtered_string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,AP1B1,0.348,0.0,0.145,0.418,0.418,0.418
1,ARF5,RAB11FIP3,0.379,0.0,0.641,0.767,0.767,0.767
2,ARF5,IKBKG,0.000,0.0,0.197,0.197,0.197,0.197
3,ARF5,GGA1,0.313,0.0,0.293,0.493,0.493,0.493
4,ARF5,ARFIP2,0.313,0.0,0.521,0.656,0.656,0.656
...,...,...,...,...,...,...,...,...
72273,ADAMTSL5,ADAMTSL4,0.110,0.9,0.444,0.946,0.946,0.946
72274,IL27,EBI3,0.379,0.9,0.709,0.980,0.980,0.980
72275,MRPL52,MRPL51,0.960,0.9,0.355,0.997,0.997,0.997
72276,KRTAP5-6,SMCP,0.237,0.0,0.000,0.237,0.237,0.237


'filtered_pathfx_score_df'

Unnamed: 0,source,target,weight
0,ADRB2,CALM3,0.471716
1,ADRB2,PPP3CA,0.323872
2,ADRB2,ADCY3,0.490794
3,ADRB2,RGS2,0.375065
4,ADRB2,NEDD4,0.380279
...,...,...,...
72273,CBLN1,CBLN3,0.422287
72274,HCN4,HCN2,0.346946
72275,RNF180,ZIC2,0.495315
72276,BCKDHA,BCKDHB,0.279930



Filtered STRING Database
-------------------------------------------
Number of Unique Protein 1s: 11500
Number of Unique Protein 2s: 11500
Number of Unique Protein: 11500

Filtered PathFX Interactome
-------------------------------------------
Number of Unique sources: 11500
Number of Unique targets: 11500
Number of Unique sources and targets: 11500

Both STRING and PathFX
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 11500
Number of distinct STRING nodes: 0
Number of distinct PathFX nodes: 0

Constructed and saved outputs/output_filter_string_and_pathfx_score_tables/pathfx_score_table.csv.csv
Constructed and saved outputs/output_filter_string_and_pathfx_score_tables/string_score_table.csv.csv
Constructed and saved inputs/input_pathfx_network_analysis/pathfx_score_table.csv
Constructed and saved inputs/input_string_network_analysis/string_score_table.csv
Finished Running filter_string_and_pathfx_score_tables.ip

'pathfx_score_df'

Unnamed: 0,source,target,weight
0,ADRB2,CALM3,0.471716
1,ADRB2,PPP3CA,0.323872
2,ADRB2,ADCY3,0.490794
3,ADRB2,RGS2,0.375065
4,ADRB2,NEDD4,0.380279
...,...,...,...
144551,CBLN1,CBLN3,0.422287
144552,HCN4,HCN2,0.346946
144553,RNF180,ZIC2,0.495315
144554,BCKDHA,BCKDHB,0.279930




Outputting Number of Distinct Proteins
------------------------------------------------
Number of Unique sources: 11500
Number of Unique targets: 11500
Number of Unique sources and targets: 11500




'pathfx_source_to_target_weight_stats_df'

Unnamed: 0,source,count,mean,std,max,min,sum
10647,UBC,2012,0.582679,0.143764,0.935012,0.279930,1172.349315
10313,TP53,501,0.431408,0.123262,0.872160,0.279930,216.135288
520,APP,458,0.309989,0.076054,0.705007,0.246744,141.975066
9712,SUMO2,365,0.452888,0.099200,0.818355,0.279930,165.304231
2846,EGFR,356,0.402973,0.107479,0.935012,0.246936,143.458217
...,...,...,...,...,...,...,...
6261,NAT9,1,0.539793,,0.539793,0.539793,0.539793
6262,NATD1,1,0.279930,,0.279930,0.279930,0.279930
6265,NAV3,1,0.279930,,0.279930,0.279930,0.279930
6268,NBEAL2,1,0.471716,,0.471716,0.471716,0.471716






'pathfx_num_target_per_source_stats_df'

Unnamed: 0,Number Target Interacton Per Source
count,11500.0
mean,12.570087
std,29.251411
max,2012.0
min,1.0
sum,144556.0






'pathfx_num_interac_distribution_df'

Unnamed: 0,Number of Protein Interactions,count
0,1,2360
1,2,1340
2,3,999
3,4,780
4,5,603
...,...,...
147,301,1
148,333,1
149,356,1
150,365,1






'pathfx_edge_score_distribution_df'

Unnamed: 0,Edge Score,count
0,0.323872,17239
1,0.279930,15764
2,0.380279,6872
3,0.346946,6167
4,0.471716,3791
...,...,...
598,0.856611,1
597,0.821204,1
596,0.578061,1
595,0.721781,1






'pathfx_removed_cross_pair_df'

Unnamed: 0,source,target,weight
0,ADRB2,CALM3,0.471716
1,ADRB2,PPP3CA,0.323872
3,ADRB2,RGS2,0.375065
4,ADRB2,NEDD4,0.380279
5,ADRB2,ADRBK1,0.400626
...,...,...,...
144545,S1PR5,SGPP1,0.279930
144547,GRHL1,GRHL3,0.346946
144551,CBLN1,CBLN3,0.422287
144553,RNF180,ZIC2,0.495315






'pathfx_all_weight_stats_df'

Unnamed: 0,weight
count,72278.0
mean,0.381317
std,0.10724
max,0.935012
min,0.244958
sum,27560.81907



Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_score_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_all_weight_stats_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_num_target_per_source_stats_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_source_to_target_weight_stats.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_num_interac_distribution_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_removed_cross_pair_pathfx_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/visual_graphs/Distribution of the Number of Protein Interactions per Protein in PathFX_histplot.png
Constructed and saved outputs/output_pathfx_network_analysis/visual_graphs/Distribution of the Number of Protein Interactions per Protein in PathFX (Zoomed)_histplot.png
Constructed an

'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,AP1B1,0.348,0.0,0.145,0.418,0.418,0.418
1,ARF5,RAB11FIP3,0.379,0.0,0.641,0.767,0.767,0.767
2,ARF5,IKBKG,0.000,0.0,0.197,0.197,0.197,0.197
3,ARF5,GGA1,0.313,0.0,0.293,0.493,0.493,0.493
4,ARF5,ARFIP2,0.313,0.0,0.521,0.656,0.656,0.656
...,...,...,...,...,...,...,...,...
144551,ADAMTSL5,ADAMTSL4,0.110,0.9,0.444,0.946,0.946,0.946
144552,IL27,EBI3,0.379,0.9,0.709,0.980,0.980,0.980
144553,MRPL52,MRPL51,0.960,0.9,0.355,0.997,0.997,0.997
144554,KRTAP5-6,SMCP,0.237,0.0,0.000,0.237,0.237,0.237






'string_num_protein_per_protein_stats_df'

Unnamed: 0,Number Protein Interaction Per Protein
count,11500.0
mean,12.570087
std,29.251411
max,2012.0
min,1.0
sum,144556.0






'string_removed_cross_pair_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
1,ARF5,RAB11FIP3,0.379,0.0,0.641,0.767,0.767,0.767
2,ARF5,IKBKG,0.000,0.0,0.197,0.197,0.197,0.197
3,ARF5,GGA1,0.313,0.0,0.293,0.493,0.493,0.493
4,ARF5,ARFIP2,0.313,0.0,0.521,0.656,0.656,0.656
5,ARF5,ASAP2,0.313,0.0,0.541,0.671,0.671,0.671
...,...,...,...,...,...,...,...,...
144544,KRTAP10-7,KRTAP10-9,0.166,0.0,0.000,0.165,0.165,0.165
144547,C19orf66,KRTAP10-5,0.166,0.0,0.000,0.165,0.165,0.165
144550,ADAMTSL4,SPINK2,0.123,0.0,0.000,0.123,0.123,0.123
144554,KRTAP5-6,SMCP,0.237,0.0,0.000,0.237,0.237,0.237






'string_all_max_physical_combined_scores_stats_df'

Unnamed: 0,Max Physical Combined Score
count,72278.0
mean,0.5622
std,0.313293
max,0.999
min,0.041
sum,40634.679






'string_all_avg_physical_combined_scores_stats_df'

Unnamed: 0,Avg Physical Combined Score
count,72278.0
mean,0.5622
std,0.313293
max,0.999
min,0.041
sum,40634.679



Constructed and saved outputs/output_string_network_analysis/csv_files/string_score_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_num_protein_per_protein_stats_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_removed_cross_pair_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_all_max_physical_combined_scores_stats_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_all_avg_physical_combined_scores_stats_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/summary_scores/string_summary_score_stats_for_experiments_per_protein.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/summary_scores/string_summary_score_stats_for_database_per_protein.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/summary_scores/string_summary_score_stats_for_textmining_per_protein

'string_summary_score_stats_for_avg_physical_combined_score_df'

Unnamed: 0,Protein ID,count,mean,std,max,min,sum
0,UBC,2012,0.457029,0.399132,0.999,0.041,919.543
1,TP53,501,0.697928,0.247446,0.999,0.122,349.662
2,APP,458,0.365557,0.273463,0.998,0.041,167.425
3,SUMO2,365,0.325548,0.314524,0.998,0.041,118.825
4,EGFR,356,0.605264,0.279473,0.999,0.078,215.474
...,...,...,...,...,...,...,...
11495,NAT9,1,0.686000,,0.686,0.686,0.686
11496,NATD1,1,0.338000,,0.338,0.338,0.338
11497,NAV3,1,0.130000,,0.130,0.130,0.130
11498,NBEAL2,1,0.117000,,0.117,0.117,0.117




STRING Database
-------------------------------------------
Number of Unique Protein 1s: 11500
Number of Unique Protein 2s: 11500
Number of Unique Protein: 11500


Constructing Network graph for Protein-Protein Combined score interaction (Avg Physical Combined Score) 

Saving Constructed Network graph for Protein-Protein Combined score interaction (Avg Physical Combined Score)

STRING Database NetworkX Graph
-------------------------------------
Number of Nodes: 11500
Number of Edges: 72278
Average node degree: 12.57008695652174
Standard Deviation node degree: 29.251411195939102
Max node degree: 2012.0
Min node degree: 1.0
Average edge weight: 0.5621998256730999
Standard Deviation node degree: 0.31329281849887314
Max edge weight: 0.999
Min edge weight: 0.041
nx Graph is connected - False
Number of Connected Components: 76

Top 25 Proteins with Most Neighbors - Avg Physical Combined Scored
-------------------------------------------------------------------------------------
   Protein

'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,AP1B1,0.348,0.0,0.145,0.418,0.418,0.418
1,ARF5,RAB11FIP3,0.379,0.0,0.641,0.767,0.767,0.767
2,ARF5,IKBKG,0.000,0.0,0.197,0.197,0.197,0.197
3,ARF5,GGA1,0.313,0.0,0.293,0.493,0.493,0.493
4,ARF5,ARFIP2,0.313,0.0,0.521,0.656,0.656,0.656
...,...,...,...,...,...,...,...,...
144551,ADAMTSL5,ADAMTSL4,0.110,0.9,0.444,0.946,0.946,0.946
144552,IL27,EBI3,0.379,0.9,0.709,0.980,0.980,0.980
144553,MRPL52,MRPL51,0.960,0.9,0.355,0.997,0.997,0.997
144554,KRTAP5-6,SMCP,0.237,0.0,0.000,0.237,0.237,0.237





'string_num_protein_per_protein_stats_df'

Unnamed: 0,Number Protein Interaction Per Protein
count,11500.0
mean,12.570087
std,29.251411
max,2012.0
min,1.0
sum,144556.0





'string_removed_cross_pair_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,RAB11FIP3,0.379,0.0,0.641,0.767,0.767,0.767
1,ARF5,IKBKG,0.000,0.0,0.197,0.197,0.197,0.197
2,ARF5,GGA1,0.313,0.0,0.293,0.493,0.493,0.493
3,ARF5,ARFIP2,0.313,0.0,0.521,0.656,0.656,0.656
4,ARF5,ASAP2,0.313,0.0,0.541,0.671,0.671,0.671
...,...,...,...,...,...,...,...,...
72273,KRTAP10-7,KRTAP10-9,0.166,0.0,0.000,0.165,0.165,0.165
72274,C19orf66,KRTAP10-5,0.166,0.0,0.000,0.165,0.165,0.165
72275,ADAMTSL4,SPINK2,0.123,0.0,0.000,0.123,0.123,0.123
72276,KRTAP5-6,SMCP,0.237,0.0,0.000,0.237,0.237,0.237





'string_all_avg_physical_combined_scores_stats_df'

Unnamed: 0,Avg Physical Combined Score
count,72278.0
mean,0.5622
std,0.313293
max,0.999
min,0.041
sum,40634.679





'string_summary_score_stats_for_avg_physical_combined_score_df'

Unnamed: 0,Protein ID,count,mean,std,max,min,sum
0,UBC,2012,0.457029,0.399132,0.999,0.041,919.543
1,TP53,501,0.697928,0.247446,0.999,0.122,349.662
2,APP,458,0.365557,0.273463,0.998,0.041,167.425
3,SUMO2,365,0.325548,0.314524,0.998,0.041,118.825
4,EGFR,356,0.605264,0.279473,0.999,0.078,215.474
...,...,...,...,...,...,...,...
11495,NAT9,1,0.686000,,0.686,0.686,0.686
11496,NATD1,1,0.338000,,0.338,0.338,0.338
11497,NAV3,1,0.130000,,0.130,0.130,0.130
11498,NBEAL2,1,0.117000,,0.117,0.117,0.117





'pathfx_score_df'

Unnamed: 0,source,target,weight
0,ADRB2,CALM3,0.471716
1,ADRB2,PPP3CA,0.323872
2,ADRB2,ADCY3,0.490794
3,ADRB2,RGS2,0.375065
4,ADRB2,NEDD4,0.380279
...,...,...,...
144551,CBLN1,CBLN3,0.422287
144552,HCN4,HCN2,0.346946
144553,RNF180,ZIC2,0.495315
144554,BCKDHA,BCKDHB,0.279930





'pathfx_num_target_per_source_stats_df'

Unnamed: 0,Number Target Interacton Per Source
count,11500.0
mean,12.570087
std,29.251411
max,2012.0
min,1.0
sum,144556.0





'pathfx_removed_cross_pair_df'

Unnamed: 0,source,target,weight
0,ADRB2,CALM3,0.471716
1,ADRB2,PPP3CA,0.323872
2,ADRB2,RGS2,0.375065
3,ADRB2,NEDD4,0.380279
4,ADRB2,ADRBK1,0.400626
...,...,...,...
72273,S1PR5,SGPP1,0.279930
72274,GRHL1,GRHL3,0.346946
72275,CBLN1,CBLN3,0.422287
72276,RNF180,ZIC2,0.495315





'pathfx_all_weight_stats_df'

Unnamed: 0,weight
count,72278.0
mean,0.381317
std,0.10724
max,0.935012
min,0.244958
sum,27560.81907





'pathfx_source_to_target_weight_stats_df'

Unnamed: 0,source,count,mean,std,max,min,sum
0,UBC,2012,0.582679,0.143764,0.935012,0.279930,1172.349315
1,TP53,501,0.431408,0.123262,0.872160,0.279930,216.135288
2,APP,458,0.309989,0.076054,0.705007,0.246744,141.975066
3,SUMO2,365,0.452888,0.099200,0.818355,0.279930,165.304231
4,EGFR,356,0.402973,0.107479,0.935012,0.246936,143.458217
...,...,...,...,...,...,...,...
11495,NAT9,1,0.539793,,0.539793,0.539793,0.539793
11496,NATD1,1,0.279930,,0.279930,0.279930,0.279930
11497,NAV3,1,0.279930,,0.279930,0.279930,0.279930
11498,NBEAL2,1,0.471716,,0.471716,0.471716,0.471716




STRING Database
-------------------------------------------
Number of Unique Protein 1s: 11500
Number of Unique Protein 2s: 11500
Number of Unique Protein: 11500


PathFX Interactome
-------------------------------------------
Number of Unique sources: 11500
Number of Unique targets: 11500
Number of Unique sources and targets: 11500


Both STRING and PathFX
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 11500
Number of distinct STRING nodes: 0
Number of distinct PathFX nodes: 0


Comparison of NODES in STRING and PathFX NetworkX Graphs
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 11500
Number of distinct STRING nodes: 0
Number of distinct PathFX nodes: 0


Comparison of EDGES in STRING and PathFX NetworkX Graphs
----------------------------------------------------------------
Number of Shared Common Edges from both graph networks: 72278

'pre_pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
139082,VCAN,SELP,0.279930
139083,NFIC,NFIB,0.279930
139084,NFIB,NFIX,0.279930
139085,SELK,SELK,0.416595



Nonself Records: 


'pre_pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
139075,SFTPA2,SFTPA1,0.359348
139080,SELL,VCAN,0.279930
139082,VCAN,SELP,0.279930
139083,NFIC,NFIB,0.279930


Outputting Number of Distinct Proteins
------------------------------------------------
Number of Unique sources: 17602
Number of Unique targets: 18089
Number of Unique sources and targets: 23970



'mirror_copy_pre_pathfx_score_df'

Unnamed: 0,source,target,weight
0,isoproterenol,rs1042713,0.233675
1,Ace Inhibitors,rs1042713,0.285181
2,Angiotensin,rs1042713,0.285181
3,Plain,rs1042713,0.285181
4,risperidone,rs1042713,0.285181
...,...,...,...
139075,SFTPA1,SFTPA2,0.359348
139080,VCAN,SELL,0.279930
139082,SELP,VCAN,0.279930
139083,NFIB,NFIC,0.279930






'pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
139075,SFTPA1,SFTPA2,0.359348
139080,VCAN,SELL,0.279930
139082,SELP,VCAN,0.279930
139083,NFIB,NFIC,0.279930




Outputting Number of Distinct Proteins
------------------------------------------------
Number of Unique sources: 23970
Number of Unique targets: 23970
Number of Unique sources and targets: 23970

Constructed and saved outputs/output_clean_pathfx_score_table/pathfx_score_table.csv
Constructed and saved inputs/input_pathfx_network_analysis/pathfx_score_table.csv
Constructed and saved inputs/input_filter_string_and_pathfx_score_tables/pathfx_score_table.csv
Finished Running clean_pathfx_score_table.ipynb 

Running clean_string_score_table.ipynb ... 




'string_pre_score_df'

Unnamed: 0,Protein 1 ID,Protein 2 ID,experiments,database,textmining,Physical Combined Score
0,9606.ENSP00000000233,9606.ENSP00000272298,0,0,0,41
1,9606.ENSP00000000233,9606.ENSP00000253401,0,0,0,41
2,9606.ENSP00000000233,9606.ENSP00000401445,0,0,0,41
3,9606.ENSP00000000233,9606.ENSP00000418915,0,0,542,542
4,9606.ENSP00000000233,9606.ENSP00000327801,0,0,0,41
...,...,...,...,...,...,...
11759449,9606.ENSP00000485678,9606.ENSP00000310488,0,0,0,41
11759450,9606.ENSP00000485678,9606.ENSP00000342448,0,0,0,41
11759451,9606.ENSP00000485678,9606.ENSP00000350222,0,0,0,41
11759452,9606.ENSP00000485678,9606.ENSP00000367590,0,900,0,900


'string_info_df'

Unnamed: 0,Protein ID,Protein,protein_size,annotation
0,9606.ENSP00000000233,ARF5,180,ADP-ribosylation factor 5; GTP-binding protein...
1,9606.ENSP00000000412,M6PR,277,Cation-dependent mannose-6-phosphate receptor;...
2,9606.ENSP00000001008,FKBP4,459,Peptidyl-prolyl cis-trans isomerase FKBP4; Imm...
3,9606.ENSP00000001146,CYP26B1,512,Cytochrome P450 26B1; Involved in the metaboli...
4,9606.ENSP00000002125,NDUFAF7,441,"Protein arginine methyltransferase NDUFAF7, mi..."
...,...,...,...,...
19561,9606.ENSP00000485671,ENSG00000280273,120,HCG1991042
19562,9606.ENSP00000485672,ENSG00000279458,86,annotation not available
19563,9606.ENSP00000485673,ENSG00000279988,243,annotation not available
19564,9606.ENSP00000485675,ENSG00000280116,84,annotation not available






'string_score_df'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
0,ARF5,9606.ENSP00000000233,CALM2,9606.ENSP00000272298,0,0,0,41
1,FKBP4,9606.ENSP00000001008,CALM2,9606.ENSP00000272298,0,0,0,41
2,CYP51A1,9606.ENSP00000003100,CALM2,9606.ENSP00000272298,0,0,0,41
3,PDK4,9606.ENSP00000005178,CALM2,9606.ENSP00000272298,0,0,104,104
4,RALA,9606.ENSP00000005257,CALM2,9606.ENSP00000272298,313,0,0,313
...,...,...,...,...,...,...,...,...
11759449,DUX4L7,9606.ENSP00000451411,ENSG00000274175,9606.ENSP00000479378,0,0,560,560
11759450,DUX4,9606.ENSP00000458065,ENSG00000274175,9606.ENSP00000479378,0,0,556,556
11759451,DUX4L8,9606.ENSP00000485452,ENSG00000274175,9606.ENSP00000479378,0,0,559,559
11759452,DYNLL2,9606.ENSP00000477310,C17orf47,9606.ENSP00000354874,212,0,0,212






'self_interacting_protein'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
2674693,ENSG00000243667,9606.ENSP00000295121,ENSG00000243667,9606.ENSP00000477980,800,0,0,800
7375638,ENSG00000216937,9606.ENSP00000364165,ENSG00000216937,9606.ENSP00000355078,0,0,0,41
7409667,ENSG00000216937,9606.ENSP00000355078,ENSG00000216937,9606.ENSP00000364165,0,0,0,41
8264678,ENSG00000243667,9606.ENSP00000477980,ENSG00000243667,9606.ENSP00000295121,800,0,0,800






'non_self_interacting_protein'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
0,ARF5,9606.ENSP00000000233,CALM2,9606.ENSP00000272298,0,0,0,41
1,FKBP4,9606.ENSP00000001008,CALM2,9606.ENSP00000272298,0,0,0,41
2,CYP51A1,9606.ENSP00000003100,CALM2,9606.ENSP00000272298,0,0,0,41
3,PDK4,9606.ENSP00000005178,CALM2,9606.ENSP00000272298,0,0,104,104
4,RALA,9606.ENSP00000005257,CALM2,9606.ENSP00000272298,313,0,0,313
...,...,...,...,...,...,...,...,...
11759449,DUX4L7,9606.ENSP00000451411,ENSG00000274175,9606.ENSP00000479378,0,0,560,560
11759450,DUX4,9606.ENSP00000458065,ENSG00000274175,9606.ENSP00000479378,0,0,556,556
11759451,DUX4L8,9606.ENSP00000485452,ENSG00000274175,9606.ENSP00000479378,0,0,559,559
11759452,DYNLL2,9606.ENSP00000477310,C17orf47,9606.ENSP00000354874,212,0,0,212






'proteins_interac_unique_ids'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0,0,0,41,41,41
1,FKBP4,CALM2,0,0,0,41,41,41
2,CYP51A1,CALM2,0,0,0,41,41,41
3,PDK4,CALM2,0,0,104,104,104,104
4,RALA,CALM2,313,0,0,313,313,313
...,...,...,...,...,...,...,...,...
11759449,DUX4L7,ENSG00000274175,0,0,560,560,560,560
11759450,DUX4,ENSG00000274175,0,0,556,556,556,556
11759451,DUX4L8,ENSG00000274175,0,0,559,559,559,559
11759452,DYNLL2,C17orf47,212,0,0,212,212,212






'proteins_interac_nonunique_ids'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
720,ENSG00000258947,9606.ENSP00000320295,CALM2,9606.ENSP00000272298,0,0,94,94
1737,ENSG00000258947,9606.ENSP00000451560,CALM2,9606.ENSP00000272298,0,0,84,84
1816,ENSG00000243667,9606.ENSP00000477980,CALM2,9606.ENSP00000272298,0,0,0,41
2128,ENSG00000258947,9606.ENSP00000320295,ARHGEF9,9606.ENSP00000253401,0,0,0,41
2524,ENSG00000258947,9606.ENSP00000451560,ARHGEF9,9606.ENSP00000253401,0,0,0,41
...,...,...,...,...,...,...,...,...
11724878,PADI6,9606.ENSP00000483125,ENSG00000239810,9606.ENSP00000480027,0,0,257,256
11724879,PRAMEF7,9606.ENSP00000484237,ENSG00000239810,9606.ENSP00000480027,0,0,0,41
11724880,PRAMEF25,9606.ENSP00000485258,ENSG00000239810,9606.ENSP00000480027,0,0,0,41
11724881,DUX4L8,9606.ENSP00000485452,ENSG00000239810,9606.ENSP00000480027,0,0,420,420






Unnamed: 0,Protein 1,Protein 2,Protein 1 ID,Protein 2 ID,experiments,database,textmining,Physical Combined Score
5269,ENSG00000258947,ENSG00000166160,3,3,3,3,3,3
1540,ENSG00000166160,ENSG00000258947,3,3,3,3,3,3
5415,ENSG00000258947,GFRA2,2,2,2,2,2,2
4717,ENSG00000258947,ANKS1B,2,2,2,2,2,2
8027,LETM1,ENSG00000183628,2,2,2,2,2,2
...,...,...,...,...,...,...,...,...
4086,ENSG00000243667,PRKAR1B,1,1,1,1,1,1
4085,ENSG00000243667,PRKAR1A,1,1,1,1,1,1
4084,ENSG00000243667,PRKACG,1,1,1,1,1,1
4083,ENSG00000243667,PRKACB,1,1,1,1,1,1



1    5608
2    5220
3       2
Name: Physical Combined Score, dtype: int64




'corrected_proteins_interac_nonunique_ids'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,339010,ENSG00000243667,,,,,41,41.0
1,339010,ENSG00000258947,,,,,41,41.0
2,AAMDC,ENSG00000243667,,,,,41,41.0
3,ABCA4,ENSG00000166160,,,,,145,145.0
4,ABCB1,ENSG00000258947,,,,,329,307.5
...,...,...,...,...,...,...,...,...
10825,ZSCAN5A,ENSG00000239810,,,,,268,267.0
10826,ZSWIM3,ENSG00000197054,,,,,214,214.0
10827,ZSWIM3,ENSG00000239810,,,,,215,213.5
10828,ZSWIM7,ENSG00000166160,,,,,41,41.0






'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.0,0.0,0.0,41.0,41,41.0
1,FKBP4,CALM2,0.0,0.0,0.0,41.0,41,41.0
2,CYP51A1,CALM2,0.0,0.0,0.0,41.0,41,41.0
3,PDK4,CALM2,0.0,0.0,104.0,104.0,104,104.0
4,RALA,CALM2,313.0,0.0,0.0,313.0,313,313.0
...,...,...,...,...,...,...,...,...
10825,ZSCAN5A,ENSG00000239810,,,,,268,267.0
10826,ZSWIM3,ENSG00000197054,,,,,214,214.0
10827,ZSWIM3,ENSG00000239810,,,,,215,213.5
10828,ZSWIM7,ENSG00000166160,,,,,41,41.0






'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
1,FKBP4,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
2,CYP51A1,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
3,PDK4,CALM2,0.000,0.0,0.104,0.104,0.104,0.1040
4,RALA,CALM2,0.313,0.0,0.000,0.313,0.313,0.3130
...,...,...,...,...,...,...,...,...
10825,ZSCAN5A,ENSG00000239810,,,,,0.268,0.2670
10826,ZSWIM3,ENSG00000197054,,,,,0.214,0.2140
10827,ZSWIM3,ENSG00000239810,,,,,0.215,0.2135
10828,ZSWIM7,ENSG00000166160,,,,,0.041,0.0410




Printing out counting statistics for entire filtering and correction process
-----------------------------------------------------------------------------
Number of Original Records: 11759454
Number of Self-Interacting Protein Records: 4
Number of Non-self Interacting Protein Records: 11759450

Number of Records containing Non-self Interacting Unique Protein ID: 11743396
Number of Records containing Non-self Interacting Non-unique Protein ID: 16054
Number of Records containing Non-self Interacting Non-unique Protein ID (Corrected): 10830

Number of Records in Final Table: 11754226
Number of Records reduced: 5228

Constructed and saved outputs/output_clean_string_score_table/string_score_table.csv
Constructed and saved inputs/input_string_network_analysis/string_score_table.csv
Constructed and saved inputs/input_filter_string_and_pathfx_score_tables/string_score_table.csv
Finished Running clean_string_score_table.ipynb 

Running filter_string_and_pathfx_score_tables.ipynb ... 




'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
1,FKBP4,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
2,CYP51A1,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
3,PDK4,CALM2,0.000,0.0,0.104,0.104,0.104,0.1040
4,RALA,CALM2,0.313,0.0,0.000,0.313,0.313,0.3130
...,...,...,...,...,...,...,...,...
11754221,ZSCAN5A,ENSG00000239810,,,,,0.268,0.2670
11754222,ZSWIM3,ENSG00000197054,,,,,0.214,0.2140
11754223,ZSWIM3,ENSG00000239810,,,,,0.215,0.2135
11754224,ZSWIM7,ENSG00000166160,,,,,0.041,0.0410





'pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
273057,SFTPA1,SFTPA2,0.359348
273058,VCAN,SELL,0.279930
273059,SELP,VCAN,0.279930
273060,NFIB,NFIC,0.279930




STRING Database
-------------------------------------------
Number of Unique Protein 1s: 19344
Number of Unique Protein 2s: 19344
Number of Unique Protein: 19344

PathFX Interactome
-------------------------------------------
Number of Unique sources: 23970
Number of Unique targets: 23970
Number of Unique sources and targets: 23970

Both STRING and PathFX
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 13948
Number of distinct STRING nodes: 5396
Number of distinct PathFX nodes: 10022


Comparison of NODES in STRING and PathFX NetworkX Graphs
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 13948
Number of distinct STRING nodes: 5396
Number of distinct PathFX nodes: 10022

Comparison of EDGES in STRING and PathFX NetworkX Graphs
----------------------------------------------------------------
Number of Shared Common Edges from both graph netw

'filtered_string_score_df'

Unnamed: 0,source,target,textmining,Physical Combined Score,database,Avg Physical Combined Score,experiments,Max Physical Combined Score
0,ARF5,ARHGEF9,0.000,0.041,0.0,0.041,0.000,0.041
1,ARF5,ERN1,0.000,0.041,0.0,0.041,0.000,0.041
2,ARF5,CDKN2A,0.542,0.542,0.0,0.542,0.000,0.542
3,ARF5,P4HB,0.000,0.041,0.0,0.041,0.000,0.041
4,ARF5,STX10,0.000,0.041,0.0,0.041,0.000,0.041
...,...,...,...,...,...,...,...,...
4266450,TSACC,ALS2CR11,0.567,0.567,0.0,0.567,0.000,0.567
4266451,LCE3E,LCE1B,0.330,0.930,0.9,0.930,0.000,0.930
4266452,LCE3E,KRTAP10-7,0.344,0.380,0.0,0.380,0.095,0.380
4266453,LCE3E,KRTAP5-6,0.000,0.232,0.0,0.232,0.233,0.232





'filtered_pathfx_score_df'

Unnamed: 0,source,target,weight
0,ADRB2,CALM3,0.471716
1,ADRB2,PPP3CA,0.323872
2,ADRB2,GNL3L,0.375065
3,ADRB2,ADCY3,0.490794
4,ADRB2,PLA2G2D,0.490794
...,...,...,...
103822,CBLN3,CBLN1,0.422287
103823,HCN2,HCN4,0.346946
103824,ZIC2,RNF180,0.495315
103825,BCKDHB,BCKDHA,0.279930





'filtered_string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,ARHGEF9,0.000,0.0,0.000,0.041,0.041,0.041
1,ARF5,ERN1,0.000,0.0,0.000,0.041,0.041,0.041
2,ARF5,CDKN2A,0.000,0.0,0.542,0.542,0.542,0.542
3,ARF5,P4HB,0.000,0.0,0.000,0.041,0.041,0.041
4,ARF5,STX10,0.000,0.0,0.000,0.041,0.041,0.041
...,...,...,...,...,...,...,...,...
4266450,ALS2CR11,TSACC,0.000,0.0,0.567,0.567,0.567,0.567
4266451,LCE1B,LCE3E,0.000,0.9,0.330,0.930,0.930,0.930
4266452,KRTAP10-7,LCE3E,0.095,0.0,0.344,0.380,0.380,0.380
4266453,KRTAP5-6,LCE3E,0.233,0.0,0.000,0.232,0.232,0.232


'filtered_pathfx_score_df'

Unnamed: 0,source,target,weight
0,ADRB2,CALM3,0.471716
1,ADRB2,PPP3CA,0.323872
2,ADRB2,GNL3L,0.375065
3,ADRB2,ADCY3,0.490794
4,ADRB2,PLA2G2D,0.490794
...,...,...,...
103822,CBLN1,CBLN3,0.422287
103823,HCN4,HCN2,0.346946
103824,RNF180,ZIC2,0.495315
103825,BCKDHA,BCKDHB,0.279930



Filtered STRING Database
-------------------------------------------
Number of Unique Protein 1s: 13948
Number of Unique Protein 2s: 13948
Number of Unique Protein: 13948

Filtered PathFX Interactome
-------------------------------------------
Number of Unique sources: 13661
Number of Unique targets: 13661
Number of Unique sources and targets: 13661

Both STRING and PathFX
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 13661
Number of distinct STRING nodes: 287
Number of distinct PathFX nodes: 0

Constructed and saved outputs/output_filter_string_and_pathfx_score_tables/pathfx_score_table.csv.csv
Constructed and saved outputs/output_filter_string_and_pathfx_score_tables/string_score_table.csv.csv
Constructed and saved inputs/input_pathfx_network_analysis/pathfx_score_table.csv
Constructed and saved inputs/input_string_network_analysis/string_score_table.csv
Finished Running filter_string_and_pathfx_score_tables.

'pathfx_score_df'

Unnamed: 0,source,target,weight
0,ADRB2,CALM3,0.471716
1,ADRB2,PPP3CA,0.323872
2,ADRB2,GNL3L,0.375065
3,ADRB2,ADCY3,0.490794
4,ADRB2,PLA2G2D,0.490794
...,...,...,...
207649,CBLN1,CBLN3,0.422287
207650,HCN4,HCN2,0.346946
207651,RNF180,ZIC2,0.495315
207652,BCKDHA,BCKDHB,0.279930




Outputting Number of Distinct Proteins
------------------------------------------------
Number of Unique sources: 13661
Number of Unique targets: 13661
Number of Unique sources and targets: 13661




'pathfx_source_to_target_weight_stats_df'

Unnamed: 0,source,count,mean,std,max,min,sum
12587,UBC,6592,0.530731,0.131993,0.935012,0.279930,3498.577766
636,APP,1934,0.287171,0.039294,0.705007,0.246744,555.389373
11443,SUMO2,686,0.432130,0.084895,0.818355,0.246936,296.441152
12201,TP53,521,0.425785,0.124181,0.872160,0.279930,221.834245
3476,EGFR,383,0.400774,0.104261,0.935012,0.246936,153.496566
...,...,...,...,...,...,...,...
10225,S1PR5,1,0.279930,,0.279930,0.279930,0.279930
10227,SAA2,1,0.279930,,0.279930,0.279930,0.279930
1019,BAAT,1,0.279930,,0.279930,0.279930,0.279930
10234,SAG,1,0.346946,,0.346946,0.346946,0.346946






'pathfx_num_target_per_source_stats_df'

Unnamed: 0,Number Target Interacton Per Source
count,13661.0
mean,15.200498
std,64.419458
max,6592.0
min,1.0
sum,207654.0






'pathfx_num_interac_distribution_df'

Unnamed: 0,Number of Protein Interactions,count
0,1,2447
1,2,1578
2,3,1129
3,4,854
4,5,732
...,...,...
176,293,1
177,279,1
178,278,1
179,258,1






'pathfx_edge_score_distribution_df'

Unnamed: 0,Edge Score,count
0,0.279930,33997
1,0.323872,18345
2,0.380279,9763
3,0.471716,9429
4,0.346946,6227
...,...,...
603,0.557242,1
602,0.918548,1
601,0.794593,1
600,0.673390,1






'pathfx_removed_cross_pair_df'

Unnamed: 0,source,target,weight
0,ADRB2,CALM3,0.471716
1,ADRB2,PPP3CA,0.323872
2,ADRB2,GNL3L,0.375065
4,ADRB2,PLA2G2D,0.490794
5,ADRB2,RAB3A,0.375065
...,...,...,...
207644,DSCAML1,PLCL2,0.279930
207645,GRHL1,GRHL3,0.346946
207649,CBLN1,CBLN3,0.422287
207651,RNF180,ZIC2,0.495315






'pathfx_all_weight_stats_df'

Unnamed: 0,weight
count,103827.0
mean,0.373906
std,0.108259
max,0.935012
min,0.244958
sum,38821.504499



Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_score_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_all_weight_stats_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_num_target_per_source_stats_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_source_to_target_weight_stats.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_num_interac_distribution_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_removed_cross_pair_pathfx_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/visual_graphs/Distribution of the Number of Protein Interactions per Protein in PathFX_histplot.png
Constructed and saved outputs/output_pathfx_network_analysis/visual_graphs/Distribution of the Number of Protein Interactions per Protein in PathFX (Zoomed)_histplot.png
Constructed an

'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,ARHGEF9,0.000,0.0,0.000,0.041,0.041,0.041
1,ARF5,ERN1,0.000,0.0,0.000,0.041,0.041,0.041
2,ARF5,CDKN2A,0.000,0.0,0.542,0.542,0.542,0.542
3,ARF5,P4HB,0.000,0.0,0.000,0.041,0.041,0.041
4,ARF5,STX10,0.000,0.0,0.000,0.041,0.041,0.041
...,...,...,...,...,...,...,...,...
8532905,ALS2CR11,TSACC,0.000,0.0,0.567,0.567,0.567,0.567
8532906,LCE1B,LCE3E,0.000,0.9,0.330,0.930,0.930,0.930
8532907,KRTAP10-7,LCE3E,0.095,0.0,0.344,0.380,0.380,0.380
8532908,KRTAP5-6,LCE3E,0.233,0.0,0.000,0.232,0.232,0.232






'string_num_protein_per_protein_stats_df'

Unnamed: 0,Number Protein Interaction Per Protein
count,13948.0
mean,611.7658
std,485.741
max,6739.0
min,1.0
sum,8532910.0






'string_removed_cross_pair_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,ARHGEF9,0.000,0.0,0.000,0.041,0.041,0.041
1,ARF5,ERN1,0.000,0.0,0.000,0.041,0.041,0.041
2,ARF5,CDKN2A,0.000,0.0,0.542,0.542,0.542,0.542
3,ARF5,P4HB,0.000,0.0,0.000,0.041,0.041,0.041
4,ARF5,STX10,0.000,0.0,0.000,0.041,0.041,0.041
...,...,...,...,...,...,...,...,...
8532905,ALS2CR11,TSACC,0.000,0.0,0.567,0.567,0.567,0.567
8532906,LCE1B,LCE3E,0.000,0.9,0.330,0.930,0.930,0.930
8532907,KRTAP10-7,LCE3E,0.095,0.0,0.344,0.380,0.380,0.380
8532908,KRTAP5-6,LCE3E,0.233,0.0,0.000,0.232,0.232,0.232






'string_all_max_physical_combined_scores_stats_df'

Unnamed: 0,Max Physical Combined Score
count,4266455.0
mean,0.1922272
std,0.2309624
max,0.999
min,0.041
sum,820128.6






'string_all_avg_physical_combined_scores_stats_df'

Unnamed: 0,Avg Physical Combined Score
count,4266455.0
mean,0.1922272
std,0.2309624
max,0.999
min,0.041
sum,820128.6



Constructed and saved outputs/output_string_network_analysis/csv_files/string_score_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_num_protein_per_protein_stats_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_removed_cross_pair_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_all_max_physical_combined_scores_stats_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_all_avg_physical_combined_scores_stats_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/summary_scores/string_summary_score_stats_for_experiments_per_protein.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/summary_scores/string_summary_score_stats_for_database_per_protein.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/summary_scores/string_summary_score_stats_for_textmining_per_protein

'string_summary_score_stats_for_avg_physical_combined_score_df'

Unnamed: 0,Protein ID,count,mean,std,max,min,sum
0,GAPDH,6739,0.248155,0.151822,0.994,0.041,1672.319
1,AKT1,5837,0.312324,0.227158,0.999,0.041,1823.035
2,TP53,5604,0.365078,0.245133,0.999,0.041,2045.899
3,MYC,5302,0.298459,0.189086,0.999,0.041,1582.430
4,INS,5267,0.328817,0.205672,0.998,0.041,1731.879
...,...,...,...,...,...,...,...
13943,CDPF1,3,0.212333,0.031390,0.237,0.177,0.637
13944,C12orf40,2,0.143500,0.144957,0.246,0.041,0.287
13945,BRICD5,1,0.181000,,0.181,0.181,0.181
13946,PGBD3,1,0.532000,,0.532,0.532,0.532




STRING Database
-------------------------------------------
Number of Unique Protein 1s: 13948
Number of Unique Protein 2s: 13948
Number of Unique Protein: 13948


Constructing Network graph for Protein-Protein Combined score interaction (Avg Physical Combined Score) 

Saving Constructed Network graph for Protein-Protein Combined score interaction (Avg Physical Combined Score)

STRING Database NetworkX Graph
-------------------------------------
Number of Nodes: 13948
Number of Edges: 4266455
Average node degree: 611.7658445655292
Standard Deviation node degree: 485.74102472648985
Max node degree: 6739.0
Min node degree: 1.0
Average edge weight: 0.19222717150387195
Standard Deviation node degree: 0.2309623662874354
Max edge weight: 0.999
Min edge weight: 0.041
nx Graph is connected - True
Number of Connected Components: 1

Top 25 Proteins with Most Neighbors - Avg Physical Combined Scored
-------------------------------------------------------------------------------------
   Protein

'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,ARHGEF9,0.000,0.0,0.000,0.041,0.041,0.041
1,ARF5,ERN1,0.000,0.0,0.000,0.041,0.041,0.041
2,ARF5,CDKN2A,0.000,0.0,0.542,0.542,0.542,0.542
3,ARF5,P4HB,0.000,0.0,0.000,0.041,0.041,0.041
4,ARF5,STX10,0.000,0.0,0.000,0.041,0.041,0.041
...,...,...,...,...,...,...,...,...
8532905,ALS2CR11,TSACC,0.000,0.0,0.567,0.567,0.567,0.567
8532906,LCE1B,LCE3E,0.000,0.9,0.330,0.930,0.930,0.930
8532907,KRTAP10-7,LCE3E,0.095,0.0,0.344,0.380,0.380,0.380
8532908,KRTAP5-6,LCE3E,0.233,0.0,0.000,0.232,0.232,0.232





'string_num_protein_per_protein_stats_df'

Unnamed: 0,Number Protein Interaction Per Protein
count,13948.0
mean,611.7658
std,485.741
max,6739.0
min,1.0
sum,8532910.0





'string_removed_cross_pair_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,ARHGEF9,0.000,0.0,0.000,0.041,0.041,0.041
1,ARF5,ERN1,0.000,0.0,0.000,0.041,0.041,0.041
2,ARF5,CDKN2A,0.000,0.0,0.542,0.542,0.542,0.542
3,ARF5,P4HB,0.000,0.0,0.000,0.041,0.041,0.041
4,ARF5,STX10,0.000,0.0,0.000,0.041,0.041,0.041
...,...,...,...,...,...,...,...,...
4266450,ALS2CR11,TSACC,0.000,0.0,0.567,0.567,0.567,0.567
4266451,LCE1B,LCE3E,0.000,0.9,0.330,0.930,0.930,0.930
4266452,KRTAP10-7,LCE3E,0.095,0.0,0.344,0.380,0.380,0.380
4266453,KRTAP5-6,LCE3E,0.233,0.0,0.000,0.232,0.232,0.232





'string_all_avg_physical_combined_scores_stats_df'

Unnamed: 0,Avg Physical Combined Score
count,4266455.0
mean,0.1922272
std,0.2309624
max,0.999
min,0.041
sum,820128.6





'string_summary_score_stats_for_avg_physical_combined_score_df'

Unnamed: 0,Protein ID,count,mean,std,max,min,sum
0,GAPDH,6739,0.248155,0.151822,0.994,0.041,1672.319
1,AKT1,5837,0.312324,0.227158,0.999,0.041,1823.035
2,TP53,5604,0.365078,0.245133,0.999,0.041,2045.899
3,MYC,5302,0.298459,0.189086,0.999,0.041,1582.430
4,INS,5267,0.328817,0.205672,0.998,0.041,1731.879
...,...,...,...,...,...,...,...
13943,CDPF1,3,0.212333,0.031390,0.237,0.177,0.637
13944,C12orf40,2,0.143500,0.144957,0.246,0.041,0.287
13945,BRICD5,1,0.181000,,0.181,0.181,0.181
13946,PGBD3,1,0.532000,,0.532,0.532,0.532





'pathfx_score_df'

Unnamed: 0,source,target,weight
0,ADRB2,CALM3,0.471716
1,ADRB2,PPP3CA,0.323872
2,ADRB2,GNL3L,0.375065
3,ADRB2,ADCY3,0.490794
4,ADRB2,PLA2G2D,0.490794
...,...,...,...
207649,CBLN1,CBLN3,0.422287
207650,HCN4,HCN2,0.346946
207651,RNF180,ZIC2,0.495315
207652,BCKDHA,BCKDHB,0.279930





'pathfx_num_target_per_source_stats_df'

Unnamed: 0,Number Target Interacton Per Source
count,13661.0
mean,15.200498
std,64.419458
max,6592.0
min,1.0
sum,207654.0





'pathfx_removed_cross_pair_df'

Unnamed: 0,source,target,weight
0,ADRB2,CALM3,0.471716
1,ADRB2,PPP3CA,0.323872
2,ADRB2,GNL3L,0.375065
3,ADRB2,PLA2G2D,0.490794
4,ADRB2,RAB3A,0.375065
...,...,...,...
103822,DSCAML1,PLCL2,0.279930
103823,GRHL1,GRHL3,0.346946
103824,CBLN1,CBLN3,0.422287
103825,RNF180,ZIC2,0.495315





'pathfx_all_weight_stats_df'

Unnamed: 0,weight
count,103827.0
mean,0.373906
std,0.108259
max,0.935012
min,0.244958
sum,38821.504499





'pathfx_source_to_target_weight_stats_df'

Unnamed: 0,source,count,mean,std,max,min,sum
0,UBC,6592,0.530731,0.131993,0.935012,0.279930,3498.577766
1,APP,1934,0.287171,0.039294,0.705007,0.246744,555.389373
2,SUMO2,686,0.432130,0.084895,0.818355,0.246936,296.441152
3,TP53,521,0.425785,0.124181,0.872160,0.279930,221.834245
4,EGFR,383,0.400774,0.104261,0.935012,0.246936,153.496566
...,...,...,...,...,...,...,...
13656,S1PR5,1,0.279930,,0.279930,0.279930,0.279930
13657,SAA2,1,0.279930,,0.279930,0.279930,0.279930
13658,BAAT,1,0.279930,,0.279930,0.279930,0.279930
13659,SAG,1,0.346946,,0.346946,0.346946,0.346946




STRING Database
-------------------------------------------
Number of Unique Protein 1s: 13948
Number of Unique Protein 2s: 13948
Number of Unique Protein: 13948


PathFX Interactome
-------------------------------------------
Number of Unique sources: 13661
Number of Unique targets: 13661
Number of Unique sources and targets: 13661


Both STRING and PathFX
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 13661
Number of distinct STRING nodes: 287
Number of distinct PathFX nodes: 0


Comparison of NODES in STRING and PathFX NetworkX Graphs
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 13661
Number of distinct STRING nodes: 287
Number of distinct PathFX nodes: 0


Comparison of EDGES in STRING and PathFX NetworkX Graphs
----------------------------------------------------------------
Number of Shared Common Edges from both graph networks: 7

'pre_pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
139082,VCAN,SELP,0.279930
139083,NFIC,NFIB,0.279930
139084,NFIB,NFIX,0.279930
139085,SELK,SELK,0.416595



Nonself Records: 


'pre_pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
139075,SFTPA2,SFTPA1,0.359348
139080,SELL,VCAN,0.279930
139082,VCAN,SELP,0.279930
139083,NFIC,NFIB,0.279930


Outputting Number of Distinct Proteins
------------------------------------------------
Number of Unique sources: 17602
Number of Unique targets: 18089
Number of Unique sources and targets: 23970



'mirror_copy_pre_pathfx_score_df'

Unnamed: 0,source,target,weight
0,isoproterenol,rs1042713,0.233675
1,Ace Inhibitors,rs1042713,0.285181
2,Angiotensin,rs1042713,0.285181
3,Plain,rs1042713,0.285181
4,risperidone,rs1042713,0.285181
...,...,...,...
139075,SFTPA1,SFTPA2,0.359348
139080,VCAN,SELL,0.279930
139082,SELP,VCAN,0.279930
139083,NFIB,NFIC,0.279930






'pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
139075,SFTPA1,SFTPA2,0.359348
139080,VCAN,SELL,0.279930
139082,SELP,VCAN,0.279930
139083,NFIB,NFIC,0.279930




Outputting Number of Distinct Proteins
------------------------------------------------
Number of Unique sources: 23970
Number of Unique targets: 23970
Number of Unique sources and targets: 23970

Constructed and saved outputs/output_clean_pathfx_score_table/pathfx_score_table.csv
Constructed and saved inputs/input_pathfx_network_analysis/pathfx_score_table.csv
Constructed and saved inputs/input_filter_string_and_pathfx_score_tables/pathfx_score_table.csv
Finished Running clean_pathfx_score_table.ipynb 

Running clean_string_score_table.ipynb ... 




'string_pre_score_df'

Unnamed: 0,Protein 1 ID,Protein 2 ID,experiments,database,textmining,Physical Combined Score
0,9606.ENSP00000000233,9606.ENSP00000272298,0,0,0,41
1,9606.ENSP00000000233,9606.ENSP00000253401,0,0,0,41
2,9606.ENSP00000000233,9606.ENSP00000401445,0,0,0,41
3,9606.ENSP00000000233,9606.ENSP00000418915,0,0,542,542
4,9606.ENSP00000000233,9606.ENSP00000327801,0,0,0,41
...,...,...,...,...,...,...
11759449,9606.ENSP00000485678,9606.ENSP00000310488,0,0,0,41
11759450,9606.ENSP00000485678,9606.ENSP00000342448,0,0,0,41
11759451,9606.ENSP00000485678,9606.ENSP00000350222,0,0,0,41
11759452,9606.ENSP00000485678,9606.ENSP00000367590,0,900,0,900


'string_info_df'

Unnamed: 0,Protein ID,Protein,protein_size,annotation
0,9606.ENSP00000000233,ARF5,180,ADP-ribosylation factor 5; GTP-binding protein...
1,9606.ENSP00000000412,M6PR,277,Cation-dependent mannose-6-phosphate receptor;...
2,9606.ENSP00000001008,FKBP4,459,Peptidyl-prolyl cis-trans isomerase FKBP4; Imm...
3,9606.ENSP00000001146,CYP26B1,512,Cytochrome P450 26B1; Involved in the metaboli...
4,9606.ENSP00000002125,NDUFAF7,441,"Protein arginine methyltransferase NDUFAF7, mi..."
...,...,...,...,...
19561,9606.ENSP00000485671,ENSG00000280273,120,HCG1991042
19562,9606.ENSP00000485672,ENSG00000279458,86,annotation not available
19563,9606.ENSP00000485673,ENSG00000279988,243,annotation not available
19564,9606.ENSP00000485675,ENSG00000280116,84,annotation not available






'string_score_df'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
0,ARF5,9606.ENSP00000000233,CALM2,9606.ENSP00000272298,0,0,0,41
1,FKBP4,9606.ENSP00000001008,CALM2,9606.ENSP00000272298,0,0,0,41
2,CYP51A1,9606.ENSP00000003100,CALM2,9606.ENSP00000272298,0,0,0,41
3,PDK4,9606.ENSP00000005178,CALM2,9606.ENSP00000272298,0,0,104,104
4,RALA,9606.ENSP00000005257,CALM2,9606.ENSP00000272298,313,0,0,313
...,...,...,...,...,...,...,...,...
11759449,DUX4L7,9606.ENSP00000451411,ENSG00000274175,9606.ENSP00000479378,0,0,560,560
11759450,DUX4,9606.ENSP00000458065,ENSG00000274175,9606.ENSP00000479378,0,0,556,556
11759451,DUX4L8,9606.ENSP00000485452,ENSG00000274175,9606.ENSP00000479378,0,0,559,559
11759452,DYNLL2,9606.ENSP00000477310,C17orf47,9606.ENSP00000354874,212,0,0,212






'self_interacting_protein'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
2674693,ENSG00000243667,9606.ENSP00000295121,ENSG00000243667,9606.ENSP00000477980,800,0,0,800
7375638,ENSG00000216937,9606.ENSP00000364165,ENSG00000216937,9606.ENSP00000355078,0,0,0,41
7409667,ENSG00000216937,9606.ENSP00000355078,ENSG00000216937,9606.ENSP00000364165,0,0,0,41
8264678,ENSG00000243667,9606.ENSP00000477980,ENSG00000243667,9606.ENSP00000295121,800,0,0,800






'non_self_interacting_protein'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
0,ARF5,9606.ENSP00000000233,CALM2,9606.ENSP00000272298,0,0,0,41
1,FKBP4,9606.ENSP00000001008,CALM2,9606.ENSP00000272298,0,0,0,41
2,CYP51A1,9606.ENSP00000003100,CALM2,9606.ENSP00000272298,0,0,0,41
3,PDK4,9606.ENSP00000005178,CALM2,9606.ENSP00000272298,0,0,104,104
4,RALA,9606.ENSP00000005257,CALM2,9606.ENSP00000272298,313,0,0,313
...,...,...,...,...,...,...,...,...
11759449,DUX4L7,9606.ENSP00000451411,ENSG00000274175,9606.ENSP00000479378,0,0,560,560
11759450,DUX4,9606.ENSP00000458065,ENSG00000274175,9606.ENSP00000479378,0,0,556,556
11759451,DUX4L8,9606.ENSP00000485452,ENSG00000274175,9606.ENSP00000479378,0,0,559,559
11759452,DYNLL2,9606.ENSP00000477310,C17orf47,9606.ENSP00000354874,212,0,0,212






'proteins_interac_unique_ids'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0,0,0,41,41,41
1,FKBP4,CALM2,0,0,0,41,41,41
2,CYP51A1,CALM2,0,0,0,41,41,41
3,PDK4,CALM2,0,0,104,104,104,104
4,RALA,CALM2,313,0,0,313,313,313
...,...,...,...,...,...,...,...,...
11759449,DUX4L7,ENSG00000274175,0,0,560,560,560,560
11759450,DUX4,ENSG00000274175,0,0,556,556,556,556
11759451,DUX4L8,ENSG00000274175,0,0,559,559,559,559
11759452,DYNLL2,C17orf47,212,0,0,212,212,212






'proteins_interac_nonunique_ids'

Unnamed: 0,Protein 1,Protein 1 ID,Protein 2,Protein 2 ID,experiments,database,textmining,Physical Combined Score
720,ENSG00000258947,9606.ENSP00000320295,CALM2,9606.ENSP00000272298,0,0,94,94
1737,ENSG00000258947,9606.ENSP00000451560,CALM2,9606.ENSP00000272298,0,0,84,84
1816,ENSG00000243667,9606.ENSP00000477980,CALM2,9606.ENSP00000272298,0,0,0,41
2128,ENSG00000258947,9606.ENSP00000320295,ARHGEF9,9606.ENSP00000253401,0,0,0,41
2524,ENSG00000258947,9606.ENSP00000451560,ARHGEF9,9606.ENSP00000253401,0,0,0,41
...,...,...,...,...,...,...,...,...
11724878,PADI6,9606.ENSP00000483125,ENSG00000239810,9606.ENSP00000480027,0,0,257,256
11724879,PRAMEF7,9606.ENSP00000484237,ENSG00000239810,9606.ENSP00000480027,0,0,0,41
11724880,PRAMEF25,9606.ENSP00000485258,ENSG00000239810,9606.ENSP00000480027,0,0,0,41
11724881,DUX4L8,9606.ENSP00000485452,ENSG00000239810,9606.ENSP00000480027,0,0,420,420






Unnamed: 0,Protein 1,Protein 2,Protein 1 ID,Protein 2 ID,experiments,database,textmining,Physical Combined Score
5269,ENSG00000258947,ENSG00000166160,3,3,3,3,3,3
1540,ENSG00000166160,ENSG00000258947,3,3,3,3,3,3
5415,ENSG00000258947,GFRA2,2,2,2,2,2,2
4717,ENSG00000258947,ANKS1B,2,2,2,2,2,2
8027,LETM1,ENSG00000183628,2,2,2,2,2,2
...,...,...,...,...,...,...,...,...
4086,ENSG00000243667,PRKAR1B,1,1,1,1,1,1
4085,ENSG00000243667,PRKAR1A,1,1,1,1,1,1
4084,ENSG00000243667,PRKACG,1,1,1,1,1,1
4083,ENSG00000243667,PRKACB,1,1,1,1,1,1



1    5608
2    5220
3       2
Name: Physical Combined Score, dtype: int64




'corrected_proteins_interac_nonunique_ids'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,339010,ENSG00000243667,,,,,41,41.0
1,339010,ENSG00000258947,,,,,41,41.0
2,AAMDC,ENSG00000243667,,,,,41,41.0
3,ABCA4,ENSG00000166160,,,,,145,145.0
4,ABCB1,ENSG00000258947,,,,,329,307.5
...,...,...,...,...,...,...,...,...
10825,ZSCAN5A,ENSG00000239810,,,,,268,267.0
10826,ZSWIM3,ENSG00000197054,,,,,214,214.0
10827,ZSWIM3,ENSG00000239810,,,,,215,213.5
10828,ZSWIM7,ENSG00000166160,,,,,41,41.0






'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.0,0.0,0.0,41.0,41,41.0
1,FKBP4,CALM2,0.0,0.0,0.0,41.0,41,41.0
2,CYP51A1,CALM2,0.0,0.0,0.0,41.0,41,41.0
3,PDK4,CALM2,0.0,0.0,104.0,104.0,104,104.0
4,RALA,CALM2,313.0,0.0,0.0,313.0,313,313.0
...,...,...,...,...,...,...,...,...
10825,ZSCAN5A,ENSG00000239810,,,,,268,267.0
10826,ZSWIM3,ENSG00000197054,,,,,214,214.0
10827,ZSWIM3,ENSG00000239810,,,,,215,213.5
10828,ZSWIM7,ENSG00000166160,,,,,41,41.0






'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
1,FKBP4,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
2,CYP51A1,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
3,PDK4,CALM2,0.000,0.0,0.104,0.104,0.104,0.1040
4,RALA,CALM2,0.313,0.0,0.000,0.313,0.313,0.3130
...,...,...,...,...,...,...,...,...
10825,ZSCAN5A,ENSG00000239810,,,,,0.268,0.2670
10826,ZSWIM3,ENSG00000197054,,,,,0.214,0.2140
10827,ZSWIM3,ENSG00000239810,,,,,0.215,0.2135
10828,ZSWIM7,ENSG00000166160,,,,,0.041,0.0410




Printing out counting statistics for entire filtering and correction process
-----------------------------------------------------------------------------
Number of Original Records: 11759454
Number of Self-Interacting Protein Records: 4
Number of Non-self Interacting Protein Records: 11759450

Number of Records containing Non-self Interacting Unique Protein ID: 11743396
Number of Records containing Non-self Interacting Non-unique Protein ID: 16054
Number of Records containing Non-self Interacting Non-unique Protein ID (Corrected): 10830

Number of Records in Final Table: 11754226
Number of Records reduced: 5228

Constructed and saved outputs/output_clean_string_score_table/string_score_table.csv
Constructed and saved inputs/input_string_network_analysis/string_score_table.csv
Constructed and saved inputs/input_filter_string_and_pathfx_score_tables/string_score_table.csv
Finished Running clean_string_score_table.ipynb 

Running filter_string_and_pathfx_score_tables.ipynb ... 




'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
1,FKBP4,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
2,CYP51A1,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
3,PDK4,CALM2,0.000,0.0,0.104,0.104,0.104,0.1040
4,RALA,CALM2,0.313,0.0,0.000,0.313,0.313,0.3130
...,...,...,...,...,...,...,...,...
11754221,ZSCAN5A,ENSG00000239810,,,,,0.268,0.2670
11754222,ZSWIM3,ENSG00000197054,,,,,0.214,0.2140
11754223,ZSWIM3,ENSG00000239810,,,,,0.215,0.2135
11754224,ZSWIM7,ENSG00000166160,,,,,0.041,0.0410





'pathfx_score_df'

Unnamed: 0,source,target,weight
0,rs1042713,isoproterenol,0.233675
1,rs1042713,Ace Inhibitors,0.285181
2,rs1042713,Angiotensin,0.285181
3,rs1042713,Plain,0.285181
4,rs1042713,risperidone,0.285181
...,...,...,...
273057,SFTPA1,SFTPA2,0.359348
273058,VCAN,SELL,0.279930
273059,SELP,VCAN,0.279930
273060,NFIB,NFIC,0.279930




STRING Database
-------------------------------------------
Number of Unique Protein 1s: 19344
Number of Unique Protein 2s: 19344
Number of Unique Protein: 19344

PathFX Interactome
-------------------------------------------
Number of Unique sources: 23970
Number of Unique targets: 23970
Number of Unique sources and targets: 23970

Both STRING and PathFX
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 13948
Number of distinct STRING nodes: 5396
Number of distinct PathFX nodes: 10022


Comparison of NODES in STRING and PathFX NetworkX Graphs
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 13948
Number of distinct STRING nodes: 5396
Number of distinct PathFX nodes: 10022

Comparison of EDGES in STRING and PathFX NetworkX Graphs
----------------------------------------------------------------
Number of Shared Common Edges from both graph netw

'filtered_string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
1,FKBP4,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
2,CYP51A1,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
3,PDK4,CALM2,0.000,0.0,0.104,0.104,0.104,0.1040
4,RALA,CALM2,0.313,0.0,0.000,0.313,0.313,0.3130
...,...,...,...,...,...,...,...,...
11750292,ENSG00000258947,ZNF445,,,,,0.138,0.0895
11750293,ENSG00000258947,ZNF516,,,,,0.093,0.0875
11750294,ENSG00000258947,ZNF607,,,,,0.169,0.1595
11750295,ENSG00000258947,ZNRF1,,,,,0.041,0.0410





'filtered_pathfx_score_df'

Unnamed: 0,source,target,weight
26,CHMP1B,KNSTRN,0.279930
27,CHMP1B,USP8,0.562588
28,CHMP1B,STAMBP,0.676545
29,CHMP1B,SPAST,0.416595
30,CHMP1B,SNRNP200,0.279930
...,...,...,...
273016,tat,GPX2,0.246744
273017,tat,GPX5,0.246744
273018,tat,GPX6,0.246744
273020,tat,BRIX1,0.279930





'filtered_string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
1,FKBP4,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
2,CYP51A1,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
3,PDK4,CALM2,0.000,0.0,0.104,0.104,0.104,0.1040
4,RALA,CALM2,0.313,0.0,0.000,0.313,0.313,0.3130
...,...,...,...,...,...,...,...,...
11750292,ENSG00000258947,ZNF445,,,,,0.138,0.0895
11750293,ENSG00000258947,ZNF516,,,,,0.093,0.0875
11750294,ENSG00000258947,ZNF607,,,,,0.169,0.1595
11750295,ENSG00000258947,ZNRF1,,,,,0.041,0.0410


'filtered_pathfx_score_df'

Unnamed: 0,source,target,weight
26,CHMP1B,KNSTRN,0.279930
27,CHMP1B,USP8,0.562588
28,CHMP1B,STAMBP,0.676545
29,CHMP1B,SPAST,0.416595
30,CHMP1B,SNRNP200,0.279930
...,...,...,...
273016,tat,GPX2,0.246744
273017,tat,GPX5,0.246744
273018,tat,GPX6,0.246744
273020,tat,BRIX1,0.279930



Filtered STRING Database
-------------------------------------------
Number of Unique Protein 1s: 19338
Number of Unique Protein 2s: 19338
Number of Unique Protein: 19338

Filtered PathFX Interactome
-------------------------------------------
Number of Unique sources: 22832
Number of Unique targets: 22832
Number of Unique sources and targets: 22832

Both STRING and PathFX
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 13948
Number of distinct STRING nodes: 5390
Number of distinct PathFX nodes: 8884

Constructed and saved outputs/output_filter_string_and_pathfx_score_tables/pathfx_score_table.csv.csv
Constructed and saved outputs/output_filter_string_and_pathfx_score_tables/string_score_table.csv.csv
Constructed and saved inputs/input_pathfx_network_analysis/pathfx_score_table.csv
Constructed and saved inputs/input_string_network_analysis/string_score_table.csv
Finished Running filter_string_and_pathfx_score_tab

'pathfx_score_df'

Unnamed: 0,source,target,weight
0,CHMP1B,KNSTRN,0.279930
1,CHMP1B,USP8,0.562588
2,CHMP1B,STAMBP,0.676545
3,CHMP1B,SPAST,0.416595
4,CHMP1B,SNRNP200,0.279930
...,...,...,...
262235,tat,GPX2,0.246744
262236,tat,GPX5,0.246744
262237,tat,GPX6,0.246744
262238,tat,BRIX1,0.279930




Outputting Number of Distinct Proteins
------------------------------------------------
Number of Unique sources: 22832
Number of Unique targets: 22832
Number of Unique sources and targets: 22832




'pathfx_source_to_target_weight_stats_df'

Unnamed: 0,source,count,mean,std,max,min,sum
15239,UBC,6753,0.528777,0.132470,0.935012,0.279930,3570.832522
691,APP,1984,0.287942,0.040666,0.705007,0.246744,571.276135
13717,SUMO2,713,0.429295,0.086484,0.818355,0.246936,306.087256
14713,TP53,627,0.417612,0.124118,0.990000,0.246936,261.842982
22077,tat,436,0.321098,0.085697,0.770889,0.246744,139.998813
...,...,...,...,...,...,...,...
12227,RTTN,1,0.605763,,0.605763,0.605763,0.605763
12226,RTT109,1,0.416595,,0.416595,0.416595,0.416595
12224,RTP2,1,0.279930,,0.279930,0.279930,0.279930
12214,RTFDC1,1,0.279930,,0.279930,0.279930,0.279930






'pathfx_num_target_per_source_stats_df'

Unnamed: 0,Number Target Interacton Per Source
count,22832.0
mean,11.485634
std,52.692758
max,6753.0
min,1.0
sum,262240.0






'pathfx_num_interac_distribution_df'

Unnamed: 0,Number of Protein Interactions,count
0,1,6955
1,3,2691
2,2,2409
3,4,1488
4,5,1033
...,...,...
190,394,1
189,407,1
188,418,1
187,428,1






'pathfx_edge_score_distribution_df'

Unnamed: 0,Edge Score,count
0,0.279930,38100
1,0.323872,18812
2,0.380279,11614
3,0.471716,11525
4,0.346946,7614
...,...,...
694,0.725046,1
693,0.809616,1
692,0.788432,1
691,0.915291,1






'pathfx_removed_cross_pair_df'

Unnamed: 0,source,target,weight
0,CHMP1B,KNSTRN,0.279930
1,CHMP1B,USP8,0.562588
2,CHMP1B,STAMBP,0.676545
3,CHMP1B,SPAST,0.416595
4,CHMP1B,SNRNP200,0.279930
...,...,...,...
262189,Ppif,VDAC1,0.279930
262195,ADGRG1,ADRB2,0.490794
262198,ORF26,TRIM37,0.279930
262200,ALL3_AEDAE,COL3A1,0.416595






'pathfx_all_weight_stats_df'

Unnamed: 0,weight
count,131120.0
mean,0.387541
std,0.131891
max,0.99
min,0.244958
sum,50814.314438



Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_score_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_all_weight_stats_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_num_target_per_source_stats_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_source_to_target_weight_stats.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_num_interac_distribution_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/csv_files/pathfx_removed_cross_pair_pathfx_table.csv
Constructed and saved outputs/output_pathfx_network_analysis/visual_graphs/Distribution of the Number of Protein Interactions per Protein in PathFX_histplot.png
Constructed and saved outputs/output_pathfx_network_analysis/visual_graphs/Distribution of the Number of Protein Interactions per Protein in PathFX (Zoomed)_histplot.png
Constructed an

'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
1,FKBP4,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
2,CYP51A1,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
3,PDK4,CALM2,0.000,0.0,0.104,0.104,0.104,0.1040
4,RALA,CALM2,0.313,0.0,0.000,0.313,0.313,0.3130
...,...,...,...,...,...,...,...,...
11385399,ENSG00000258947,ZNF445,,,,,0.138,0.0895
11385400,ENSG00000258947,ZNF516,,,,,0.093,0.0875
11385401,ENSG00000258947,ZNF607,,,,,0.169,0.1595
11385402,ENSG00000258947,ZNRF1,,,,,0.041,0.0410






'string_num_protein_per_protein_stats_df'

Unnamed: 0,Number Protein Interaction Per Protein
count,19338.0
mean,588.7581
std,530.8547
max,7644.0
min,1.0
sum,11385400.0






'string_removed_cross_pair_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.0,0.0,0.000,0.041,0.041,0.0410
18,ACTL6B,CALM2,0.0,0.0,0.000,0.041,0.041,0.0410
25,AQP2,CALM2,0.0,0.0,0.051,0.050,0.050,0.0500
30,APOH,CALM2,0.0,0.0,0.000,0.041,0.041,0.0410
38,ANKRD54,CALM2,0.0,0.0,0.210,0.210,0.210,0.2100
...,...,...,...,...,...,...,...,...
11385399,ENSG00000258947,ZNF445,,,,,0.138,0.0895
11385400,ENSG00000258947,ZNF516,,,,,0.093,0.0875
11385401,ENSG00000258947,ZNF607,,,,,0.169,0.1595
11385402,ENSG00000258947,ZNRF1,,,,,0.041,0.0410






'string_all_max_physical_combined_scores_stats_df'

Unnamed: 0,Max Physical Combined Score
count,5692702.0
mean,0.1836095
std,0.2223238
max,0.999
min,0.041
sum,1045234.0






'string_all_avg_physical_combined_scores_stats_df'

Unnamed: 0,Avg Physical Combined Score
count,5692702.0
mean,0.1836059
std,0.2223218
max,0.999
min,0.041
sum,1045214.0



Constructed and saved outputs/output_string_network_analysis/csv_files/string_score_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_num_protein_per_protein_stats_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_removed_cross_pair_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_all_max_physical_combined_scores_stats_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/string_all_avg_physical_combined_scores_stats_table.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/summary_scores/string_summary_score_stats_for_experiments_per_protein.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/summary_scores/string_summary_score_stats_for_database_per_protein.csv
Constructed and saved outputs/output_string_network_analysis/csv_files/summary_scores/string_summary_score_stats_for_textmining_per_protein

'string_summary_score_stats_for_avg_physical_combined_score_df'

Unnamed: 0,Protein ID,count,mean,std,max,min,sum
0,GAPDH,7644,0.241637,0.150712,0.994,0.041,1847.0700
1,AKT1,6507,0.300910,0.222489,0.999,0.041,1958.0225
2,TP53,6193,0.354423,0.240752,0.999,0.041,2194.9425
3,INS,6053,0.320572,0.201611,0.998,0.041,1940.4200
4,MYC,5786,0.294266,0.186558,0.999,0.041,1702.6225
...,...,...,...,...,...,...,...
19333,C17orf47,1,0.212000,,0.212,0.212,0.2120
19334,CCDC142,1,0.041000,,0.041,0.041,0.0410
19335,C5orf55,1,0.309000,,0.309,0.309,0.3090
19336,CLPSL2,1,0.556000,,0.556,0.556,0.5560




STRING Database
-------------------------------------------
Number of Unique Protein 1s: 19338
Number of Unique Protein 2s: 19338
Number of Unique Protein: 19338


Constructing Network graph for Protein-Protein Combined score interaction (Avg Physical Combined Score) 

Saving Constructed Network graph for Protein-Protein Combined score interaction (Avg Physical Combined Score)

STRING Database NetworkX Graph
-------------------------------------
Number of Nodes: 19338
Number of Edges: 5692702
Average node degree: 588.7580928741338
Standard Deviation node degree: 530.8547488309175
Max node degree: 7644.0
Min node degree: 1.0
Average edge weight: 0.1836058763441498
Standard Deviation node degree: 0.22232178512697068
Max edge weight: 0.999
Min edge weight: 0.041
nx Graph is connected - True
Number of Connected Components: 1

Top 25 Proteins with Most Neighbors - Avg Physical Combined Scored
-------------------------------------------------------------------------------------
   Protein 

'string_score_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
1,FKBP4,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
2,CYP51A1,CALM2,0.000,0.0,0.000,0.041,0.041,0.0410
3,PDK4,CALM2,0.000,0.0,0.104,0.104,0.104,0.1040
4,RALA,CALM2,0.313,0.0,0.000,0.313,0.313,0.3130
...,...,...,...,...,...,...,...,...
11385399,ENSG00000258947,ZNF445,,,,,0.138,0.0895
11385400,ENSG00000258947,ZNF516,,,,,0.093,0.0875
11385401,ENSG00000258947,ZNF607,,,,,0.169,0.1595
11385402,ENSG00000258947,ZNRF1,,,,,0.041,0.0410





'string_num_protein_per_protein_stats_df'

Unnamed: 0,Number Protein Interaction Per Protein
count,19338.0
mean,588.7581
std,530.8547
max,7644.0
min,1.0
sum,11385400.0





'string_removed_cross_pair_df'

Unnamed: 0,Protein 1,Protein 2,experiments,database,textmining,Physical Combined Score,Max Physical Combined Score,Avg Physical Combined Score
0,ARF5,CALM2,0.0,0.0,0.000,0.041,0.041,0.0410
1,ACTL6B,CALM2,0.0,0.0,0.000,0.041,0.041,0.0410
2,AQP2,CALM2,0.0,0.0,0.051,0.050,0.050,0.0500
3,APOH,CALM2,0.0,0.0,0.000,0.041,0.041,0.0410
4,ANKRD54,CALM2,0.0,0.0,0.210,0.210,0.210,0.2100
...,...,...,...,...,...,...,...,...
5692697,ENSG00000258947,ZNF445,,,,,0.138,0.0895
5692698,ENSG00000258947,ZNF516,,,,,0.093,0.0875
5692699,ENSG00000258947,ZNF607,,,,,0.169,0.1595
5692700,ENSG00000258947,ZNRF1,,,,,0.041,0.0410





'string_all_avg_physical_combined_scores_stats_df'

Unnamed: 0,Avg Physical Combined Score
count,5692702.0
mean,0.1836059
std,0.2223218
max,0.999
min,0.041
sum,1045214.0





'string_summary_score_stats_for_avg_physical_combined_score_df'

Unnamed: 0,Protein ID,count,mean,std,max,min,sum
0,GAPDH,7644,0.241637,0.150712,0.994,0.041,1847.0700
1,AKT1,6507,0.300910,0.222489,0.999,0.041,1958.0225
2,TP53,6193,0.354423,0.240752,0.999,0.041,2194.9425
3,INS,6053,0.320572,0.201611,0.998,0.041,1940.4200
4,MYC,5786,0.294266,0.186558,0.999,0.041,1702.6225
...,...,...,...,...,...,...,...
19333,C17orf47,1,0.212000,,0.212,0.212,0.2120
19334,CCDC142,1,0.041000,,0.041,0.041,0.0410
19335,C5orf55,1,0.309000,,0.309,0.309,0.3090
19336,CLPSL2,1,0.556000,,0.556,0.556,0.5560





'pathfx_score_df'

Unnamed: 0,source,target,weight
0,CHMP1B,KNSTRN,0.279930
1,CHMP1B,USP8,0.562588
2,CHMP1B,STAMBP,0.676545
3,CHMP1B,SPAST,0.416595
4,CHMP1B,SNRNP200,0.279930
...,...,...,...
262235,tat,GPX2,0.246744
262236,tat,GPX5,0.246744
262237,tat,GPX6,0.246744
262238,tat,BRIX1,0.279930





'pathfx_num_target_per_source_stats_df'

Unnamed: 0,Number Target Interacton Per Source
count,22832.0
mean,11.485634
std,52.692758
max,6753.0
min,1.0
sum,262240.0





'pathfx_removed_cross_pair_df'

Unnamed: 0,source,target,weight
0,CHMP1B,KNSTRN,0.279930
1,CHMP1B,USP8,0.562588
2,CHMP1B,STAMBP,0.676545
3,CHMP1B,SPAST,0.416595
4,CHMP1B,SNRNP200,0.279930
...,...,...,...
131115,Ppif,VDAC1,0.279930
131116,ADGRG1,ADRB2,0.490794
131117,ORF26,TRIM37,0.279930
131118,ALL3_AEDAE,COL3A1,0.416595





'pathfx_all_weight_stats_df'

Unnamed: 0,weight
count,131120.0
mean,0.387541
std,0.131891
max,0.99
min,0.244958
sum,50814.314438





'pathfx_source_to_target_weight_stats_df'

Unnamed: 0,source,count,mean,std,max,min,sum
0,UBC,6753,0.528777,0.132470,0.935012,0.279930,3570.832522
1,APP,1984,0.287942,0.040666,0.705007,0.246744,571.276135
2,SUMO2,713,0.429295,0.086484,0.818355,0.246936,306.087256
3,TP53,627,0.417612,0.124118,0.990000,0.246936,261.842982
4,tat,436,0.321098,0.085697,0.770889,0.246744,139.998813
...,...,...,...,...,...,...,...
22827,RTTN,1,0.605763,,0.605763,0.605763,0.605763
22828,RTT109,1,0.416595,,0.416595,0.416595,0.416595
22829,RTP2,1,0.279930,,0.279930,0.279930,0.279930
22830,RTFDC1,1,0.279930,,0.279930,0.279930,0.279930




STRING Database
-------------------------------------------
Number of Unique Protein 1s: 19338
Number of Unique Protein 2s: 19338
Number of Unique Protein: 19338


PathFX Interactome
-------------------------------------------
Number of Unique sources: 22832
Number of Unique targets: 22832
Number of Unique sources and targets: 22832


Both STRING and PathFX
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 13948
Number of distinct STRING nodes: 5390
Number of distinct PathFX nodes: 8884


Comparison of NODES in STRING and PathFX NetworkX Graphs
----------------------------------------------------------------
Number of Shared Common Nodes from both graph networks: 13948
Number of distinct STRING nodes: 5390
Number of distinct PathFX nodes: 8884


Comparison of EDGES in STRING and PathFX NetworkX Graphs
----------------------------------------------------------------
Number of Shared Common Edges from both graph net