In [1]:
import networkx as nx
import random
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import os
import sys
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

sns.set_theme()
np.random.seed(0)

In [2]:
"""
Helper Functions
"""

# function checks if directory exists, if not it constructs it
def check_directory_exists(dir_name):
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

# function saves DataFrame, list, or set as a textfile in a specific folder
def save_to_text_file(output_folder_dest, input_data, text_file_name):
    text_file_ouput = output_folder_dest + text_file_name + ".txt"
    drug_output_info_file = open(text_file_ouput, 'w+')
    if isinstance(input_data, pd.DataFrame):
        drug_output_info_file.write(input_data.to_string())
    else:
        drug_output_info_file.write(str(input_data))
    drug_output_info_file.close() 
    print("Constructed and saved", text_file_ouput)

# Read in Pickle File
def read_pickle_file(file_path):
    if not os.path.exists(file_path):
        sys.exit("Can't locate input file %s" % file_path)
    return pd.read_pickle(file_path)
    
# Save data into a pickel file
def save_to_pickle_file(output_folder_dest, dict_data, dict_file_name):
    output_dict_filename = output_folder_dest + dict_file_name + '.pkl'
    with open(output_dict_filename, 'wb') as handle:
        pickle.dump(dict_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print("Constructed and saved", output_dict_filename)

# Read in a CSV file
def read_csv_file(file_path, input_sep=',', input_delimiter=None, input_index_col=None, input_dtype=None, input_delim_whitespace=False, input_low_memory=True):
    if not os.path.exists(file_path):
        sys.exit("Can't locate input file %s" % file_path)
    return pd.read_csv(filepath_or_buffer=file_path, sep=input_sep, delimiter=None, index_col=input_index_col, dtype=input_dtype, delim_whitespace=input_delim_whitespace, low_memory=input_low_memory)

# funcion saves DataFrame or list to as a textfile
def save_to_csv_file(output_folder_dest, df, csv_file_name, input_index=False):
    output_filename = output_folder_dest + csv_file_name + ".csv"
    df.to_csv(output_filename, index=input_index)
    print("Constructed and saved", output_filename)
    
def read_csv_file(file_path, input_sep=',', input_delimiter=None, input_index_col=None, input_dtype=None, input_delim_whitespace=False, input_low_memory=True):
    return pd.read_csv(filepath_or_buffer=file_path, sep=input_sep, delimiter=None, index_col=input_index_col, dtype=input_dtype, delim_whitespace=input_delim_whitespace, low_memory=input_low_memory)

def save_to_gpickle_file(output_folder_dest, data, file_name):
    output_filename = output_folder_dest + file_name + ".gpickle"
    nx.write_gpickle(data, output_filename)
    print("Constructed and saved", output_filename)
    
def read_networkx_gpickle_file(input_file_name):
    if not os.path.exists(input_file_name):
        sys.exit("Can't locate input file %s" % input_file_name)
    return nx.read_gpickle(input_file_name)

In [3]:
output_run_string_diagnostic_folder = "outputs/"
check_directory_exists(output_run_string_diagnostic_folder)

In [4]:
string_score_df = read_csv_file("inputs/NOT_FILTERED_string_score_table.csv")
string_score_df = string_score_df[["Protein 1", "Protein 2", "Avg Physical Combined Score"]]
display("string_score_df", string_score_df)

'string_score_df'

Unnamed: 0,Protein 1,Protein 2,Avg Physical Combined Score
0,ARF5,CALM2,0.0410
1,FKBP4,CALM2,0.0410
2,CYP51A1,CALM2,0.0410
3,PDK4,CALM2,0.1040
4,RALA,CALM2,0.3130
...,...,...,...
11754221,ZSCAN5A,ENSG00000239810,0.2670
11754222,ZSWIM3,ENSG00000197054,0.2140
11754223,ZSWIM3,ENSG00000239810,0.2135
11754224,ZSWIM7,ENSG00000166160,0.0410


In [None]:
# ----- Threshold Analysis ----- 

In [None]:
main_threshold_analysis_output_folder = output_run_string_diagnostic_folder + "threshold_analysis/"
check_directory_exists(main_threshold_analysis_output_folder)

In [None]:
transfer_filter_val_lst = list(np.round(np.arange(0.7, 1.0, 0.01), 2))
transfer_string_score_df = string_score_df
transfer_output_folder = main_threshold_analysis_output_folder
transfer_output_batch_folder_name = "from_0.70_to_1.0_step_0.01"

print("Running threshold_analysis.ipynb ... \n")
%run ./threshold_analysis.ipynb
print("Finished Running threshold_analysis.ipynb \n")

In [None]:
transfer_filter_val_lst = list(np.round(np.arange(0.9, 0.91, 0.001), 3))
transfer_string_score_df = string_score_df
transfer_output_folder = main_threshold_analysis_output_folder
transfer_output_batch_folder_name = "from_0.90_to_0.91_step_0.001"

print("Running threshold_analysis.ipynb ... \n")
%run ./threshold_analysis.ipynb
print("Finished Running threshold_analysis.ipynb \n")

In [None]:
transfer_filter_val_lst = list(np.round(np.arange(0, 1.01, 0.01), 2))
transfer_string_score_df = string_score_df
transfer_output_folder = main_threshold_analysis_output_folder
transfer_output_batch_folder_name = "from_0.0_to_1.0_step_0.01"

print("Running threshold_analysis.ipynb ... \n")
%run ./threshold_analysis.ipynb
print("Finished Running threshold_analysis.ipynb \n")

In [None]:
# ----- Binnned Score Analysis ----- 

In [5]:
main_binned_score_analysis_output_folder = output_run_string_diagnostic_folder + "binned_score_analysis/"
check_directory_exists(main_binned_score_analysis_output_folder)

In [8]:
transfer_bin_list = list(np.round(np.arange(0.7, 1.00, 0.05), 2))
transfer_output_folder = main_binned_score_analysis_output_folder
transfer_output_batch_folder_name = "from_0.7_to_1.0_step_0.05"
transfer_string_score_df = string_score_df

print("Running binned_score_analysis.ipynb ... \n")
%run ./binned_score_analysis.ipynb
print("Finished Running binned_score_analysis.ipynb \n")

Running binned_score_analysis.ipynb ... 



'input_reduced_string_score_df'

Unnamed: 0,Protein 1,Protein 2,Avg Physical Combined Score,Binned Scored Value
11754199,ZNF630,ENSG00000197054,0.712,"[0.7, 0.75)"
6213103,FXN,SETX,0.717,"[0.7, 0.75)"
6212396,FUS,SETX,0.702,"[0.7, 0.75)"
6210714,CSB-PGBD3,ERCC5,0.732,"[0.7, 0.75)"
6208882,NT5M,SNRPA,0.710,"[0.7, 0.75)"
...,...,...,...,...
724039,COL23A1,FURIN,0.968,"[0.95, 1.0)"
9156382,SEC61B,RPL36A,0.979,"[0.95, 1.0)"
4918846,DARS,EPRS,0.991,"[0.95, 1.0)"
3922977,GNG3,GNG13,0.962,"[0.95, 1.0)"


'input_reduced_string_score_df_removed_cross_pair_df'

Unnamed: 0,Protein 1,Protein 2,Avg Physical Combined Score,Binned Scored Value
6213103,FXN,SETX,0.717,"[0.7, 0.75)"
4257121,CETP,PCSK9,0.731,"[0.7, 0.75)"
4257444,EGFR,PCSK9,0.707,"[0.7, 0.75)"
4257958,LDLRAP1,PCSK9,0.725,"[0.7, 0.75)"
4259351,B4GALNT2,ST6GAL2,0.717,"[0.7, 0.75)"
...,...,...,...,...
570250,CAPN1,EZR,0.970,"[0.95, 1.0)"
11032807,BRCC3,FAM175A,0.999,"[0.95, 1.0)"
24129,ABCC8,RAPGEF4,0.980,"[0.95, 1.0)"
5952510,CCNC,MED25,0.980,"[0.95, 1.0)"





'binned_score_stats_df:'

Unnamed: 0,Binned Scored Value,Number of Protein Interactions,Number Unique Proteins,mean edge score,min edge score,max edge score,std edge score,mean degree,min degree,max degree,std degree
0,"[0.7, 0.75)",20906.0,11047.0,0.721359,0.7,0.749,0.013901,3.784919,1.0,102.0,4.854155
1,"[0.75, 0.8)",11197.0,8177.0,0.775998,0.75,0.799,0.015598,2.738657,1.0,84.0,3.397948
2,"[0.8, 0.85)",15638.0,8913.0,0.816372,0.8,0.849,0.01519,3.509032,1.0,84.0,4.962986
3,"[0.85, 0.9)",7908.0,5979.0,0.873007,0.85,0.899,0.014104,2.645258,1.0,37.0,3.266531
4,"[0.9, 0.95)",263747.0,11687.0,0.907102,0.9,0.949,0.012333,45.135107,1.0,1201.0,74.930169
5,"[0.95, 1.0)",47658.0,9183.0,0.975913,0.95,0.999,0.015365,10.379615,1.0,449.0,15.965373





'binned_degree_dist_df:'

Unnamed: 0,Binned Scored Value,Distribution
0,"[0.7, 0.75)","[3, 1, 3, 1, 1, 1, 1, 1, 5, 1, 5, 5, 1, 1, 2, ..."
1,"[0.75, 0.8)","[1, 1, 5, 1, 1, 1, 1, 9, 1, 14, 1, 1, 5, 4, 2,..."
2,"[0.8, 0.85)","[1, 3, 2, 2, 1, 1, 1, 2, 1, 1, 3, 11, 2, 1, 2,..."
3,"[0.85, 0.9)","[2, 1, 1, 2, 1, 3, 8, 3, 13, 1, 3, 1, 3, 1, 1,..."
4,"[0.9, 0.95)","[1, 74, 6, 84, 3, 6, 14, 129, 11, 1, 4, 114, 6..."
5,"[0.95, 1.0)","[4, 1, 14, 1, 1, 7, 5, 5, 1, 1, 7, 1, 1, 3, 4,..."



Constructed and saved outputs/binned_score_analysis/from_0.7_to_1.0_step_0.05/binned_score_stats_table.csv
Constructed and saved outputs/binned_score_analysis/from_0.7_to_1.0_step_0.05/Total Number of Protein Interactions per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.7_to_1.0_step_0.05/Number Unique Proteins per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.7_to_1.0_step_0.05/Mean Edge Score per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.7_to_1.0_step_0.05/Mean Number of Interactions (Degree) per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.7_to_1.0_step_0.05/Avg Physical Combined Score Distribution Spread per Binned Scored Value.png
Constructed and saved outputs/binned_score_analysis/from_0.7_to_1.0_step_0.05/Distribution of Number of Interactions (Degree) per Binned Scored Value.png
Constructed

In [9]:
transfer_bin_list = list(np.round(np.arange(0.9, 1.01, 0.01), 2))
transfer_output_folder = main_binned_score_analysis_output_folder
transfer_output_batch_folder_name = "from_0.9_to_1.0_step_0.01"
transfer_string_score_df = string_score_df

print("Running binned_score_analysis.ipynb ... \n")
%run ./binned_score_analysis.ipynb
print("Finished Running binned_score_analysis.ipynb \n")

Running binned_score_analysis.ipynb ... 



'input_reduced_string_score_df'

Unnamed: 0,Protein 1,Protein 2,Avg Physical Combined Score,Binned Scored Value
490,CALM3,CALM2,0.906,"[0.9, 0.91)"
7440878,LPAR3,HTR1A,0.900,"[0.9, 0.91)"
7440867,GPSM1,HTR1A,0.902,"[0.9, 0.91)"
7440865,APLN,HTR1A,0.904,"[0.9, 0.91)"
7440859,HCAR1,HTR1A,0.900,"[0.9, 0.91)"
...,...,...,...,...
6752029,RAD51B,RAD51D,0.990,"[0.99, 1.0)"
6751578,XRCC2,RAD51D,0.998,"[0.99, 1.0)"
6751466,RAD51C,RAD51D,0.996,"[0.99, 1.0)"
1165891,ARHGEF7,PAK1,0.992,"[0.99, 1.0)"


'input_reduced_string_score_df_removed_cross_pair_df'

Unnamed: 0,Protein 1,Protein 2,Avg Physical Combined Score,Binned Scored Value
7440867,GPSM1,HTR1A,0.902,"[0.9, 0.91)"
3291661,CDK12,GTF2H1,0.900,"[0.9, 0.91)"
1851252,FBXW12,UBE2D1,0.900,"[0.9, 0.91)"
3291633,CUL4B,GTF2H1,0.904,"[0.9, 0.91)"
3291614,EAF1,GTF2H1,0.900,"[0.9, 0.91)"
...,...,...,...,...
1811510,GNAI2,GNB1,0.997,"[0.99, 1.0)"
8221419,MRPL24,MRPL42,0.994,"[0.99, 1.0)"
8221284,MRPL16,MRPL42,0.995,"[0.99, 1.0)"
8956582,TADA2A,TADA3,0.998,"[0.99, 1.0)"





'binned_score_stats_df:'

Unnamed: 0,Binned Scored Value,Number of Protein Interactions,Number Unique Proteins,mean edge score,min edge score,max edge score,std edge score,mean degree,min degree,max degree,std degree
0,"[0.9, 0.91)",197762.0,9580.0,0.900891,0.9,0.909,0.002044,41.28643,1.0,1092.0,68.432524
1,"[0.91, 0.92)",25026.0,7116.0,0.914031,0.91,0.919,0.002949,7.033727,1.0,100.0,8.595378
2,"[0.92, 0.93)",17120.0,6893.0,0.924233,0.92,0.929,0.002828,4.967358,1.0,63.0,5.627817
3,"[0.93, 0.94)",12943.0,6844.0,0.9347,0.93,0.939,0.002949,3.782291,1.0,75.0,4.264176
4,"[0.94, 0.95)",10896.0,6038.0,0.944226,0.94,0.949,0.002891,3.609142,1.0,46.0,3.89938
5,"[0.95, 0.96)",9408.0,5761.0,0.953786,0.95,0.959,0.002631,3.2661,1.0,54.0,3.589265
6,"[0.96, 0.97)",8647.0,5577.0,0.964594,0.96,0.969,0.002749,3.10095,1.0,42.0,3.384658
7,"[0.97, 0.98)",8701.0,5195.0,0.975744,0.97,0.979,0.003213,3.349759,1.0,65.0,5.394407
8,"[0.98, 0.99)",8935.0,4875.0,0.984341,0.98,0.989,0.002986,3.665641,1.0,377.0,7.762438
9,"[0.99, 1.0)",11967.0,4672.0,0.995316,0.99,0.999,0.00285,5.12286,1.0,74.0,9.044





'binned_degree_dist_df:'

Unnamed: 0,Binned Scored Value,Distribution
0,"[0.9, 0.91)","[52, 1, 49, 2, 4, 11, 103, 3, 2, 84, 2, 1, 2, ..."
1,"[0.91, 0.92)","[9, 1, 13, 1, 1, 7, 3, 1, 11, 1, 4, 4, 6, 1, 2..."
2,"[0.92, 0.93)","[7, 3, 8, 1, 1, 4, 2, 10, 1, 1, 1, 1, 6, 1, 1,..."
3,"[0.93, 0.94)","[1, 3, 1, 9, 1, 9, 2, 3, 1, 1, 1, 1, 3, 2, 1, ..."
4,"[0.94, 0.95)","[3, 5, 1, 6, 1, 1, 1, 6, 1, 2, 1, 2, 2, 4, 1, ..."
5,"[0.95, 0.96)","[2, 5, 1, 5, 1, 1, 1, 4, 2, 1, 1, 1, 2, 2, 14,..."
6,"[0.96, 0.97)","[1, 6, 1, 3, 1, 1, 1, 2, 2, 6, 1, 1, 1, 5, 2, ..."
7,"[0.97, 0.98)","[1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 6, 2, ..."
8,"[0.98, 0.99)","[2, 3, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 10,..."
9,"[0.99, 1.0)","[1, 1, 1, 1, 1, 2, 3, 1, 2, 1, 1, 2, 9, 6, 4, ..."



Constructed and saved outputs/binned_score_analysis/from_0.9_to_1.0_step_0.01/binned_score_stats_table.csv
Constructed and saved outputs/binned_score_analysis/from_0.9_to_1.0_step_0.01/Total Number of Protein Interactions per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.9_to_1.0_step_0.01/Number Unique Proteins per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.9_to_1.0_step_0.01/Mean Edge Score per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.9_to_1.0_step_0.01/Mean Number of Interactions (Degree) per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.9_to_1.0_step_0.01/Avg Physical Combined Score Distribution Spread per Binned Scored Value.png
Constructed and saved outputs/binned_score_analysis/from_0.9_to_1.0_step_0.01/Distribution of Number of Interactions (Degree) per Binned Scored Value.png
Constructed

In [10]:
transfer_bin_list = list(np.round(np.arange(0.9, 0.91, 0.001), 3))
transfer_output_folder = main_binned_score_analysis_output_folder
transfer_output_batch_folder_name = "from_0.9_to_0.91_step_0.001"
transfer_string_score_df = string_score_df

print("Running binned_score_analysis.ipynb ... \n")
%run ./binned_score_analysis.ipynb
print("Finished Running binned_score_analysis.ipynb \n")

Running binned_score_analysis.ipynb ... 



'input_reduced_string_score_df'

Unnamed: 0,Protein 1,Protein 2,Avg Physical Combined Score,Binned Scored Value
6205400,UBR1,RNF19B,0.900,"[0.9, 0.901)"
7105561,HGSNAT,KCNAB2,0.900,"[0.9, 0.901)"
7105554,CYBB,KCNAB2,0.900,"[0.9, 0.901)"
10530481,MCEMP1,SLC44A2,0.900,"[0.9, 0.901)"
7105549,ATP6AP2,KCNAB2,0.900,"[0.9, 0.901)"
...,...,...,...,...
7376697,MAPK1,TRIM24,0.909,"[0.909, 0.91)"
8707217,MTNR1A,HRH4,0.909,"[0.909, 0.91)"
1686998,CDC20,UBE2N,0.909,"[0.909, 0.91)"
566770,CAND1,ASB1,0.909,"[0.909, 0.91)"


'input_reduced_string_score_df_removed_cross_pair_df'

Unnamed: 0,Protein 1,Protein 2,Avg Physical Combined Score,Binned Scored Value
7105561,HGSNAT,KCNAB2,0.900,"[0.9, 0.901)"
404316,ASB8,UBE2D4,0.900,"[0.9, 0.901)"
404304,RBBP6,UBE2D4,0.900,"[0.9, 0.901)"
404134,KLHL41,UBE2D4,0.900,"[0.9, 0.901)"
404133,SH3RF1,UBE2D4,0.900,"[0.9, 0.901)"
...,...,...,...,...
3363516,EIF4A3,NHP2L1,0.909,"[0.909, 0.91)"
6534062,GBP4,HLA-DQB2,0.909,"[0.909, 0.91)"
67674,RAB8A,RAB9B,0.909,"[0.909, 0.91)"
4390969,DIS3,RPL15,0.909,"[0.909, 0.91)"





'binned_score_stats_df:'

Unnamed: 0,Binned Scored Value,Number of Protein Interactions,Number Unique Proteins,mean edge score,min edge score,max edge score,std edge score,mean degree,min degree,max degree,std degree
0,"[0.9, 0.901)",155349.0,7802.0,0.9,0.9,0.9,2.610809e-12,39.822866,1.0,960.0,62.180973
1,"[0.901, 0.902)",7382.0,4284.0,0.901,0.901,0.901,1.034798e-13,3.446312,1.0,51.0,3.463382
2,"[0.902, 0.903)",6517.0,4119.0,0.902,0.902,0.902,3.886079e-14,3.16436,1.0,53.0,3.205209
3,"[0.903, 0.904)",5392.0,3999.0,0.903,0.903,0.903,4.074896e-14,2.696674,1.0,31.0,2.523777
4,"[0.904, 0.905)",5468.0,4001.0,0.904,0.904,0.904,7.439175e-14,2.733317,1.0,41.0,2.584011
5,"[0.905, 0.906)",4350.0,3678.0,0.905,0.905,0.905,8.372044e-14,2.365416,1.0,17.0,2.040178
6,"[0.906, 0.907)",3976.0,3542.0,0.906,0.906,0.906,4.297104e-14,2.245059,1.0,26.0,1.936531
7,"[0.907, 0.908)",4018.0,3529.0,0.907,0.907,0.907,5.04104e-14,2.277132,1.0,23.0,1.939634
8,"[0.908, 0.909)",3821.0,3456.0,0.908,0.908,0.908,6.628899e-14,2.211227,1.0,26.0,1.890475
9,"[0.909, 0.91)",1489.0,2043.0,0.909,0.909,0.909,2.054603e-14,1.45766,1.0,10.0,0.897567





'binned_degree_dist_df:'

Unnamed: 0,Binned Scored Value,Distribution
0,"[0.9, 0.901)","[32, 1, 27, 1, 1, 8, 97, 1, 1, 59, 1, 1, 13, 4..."
1,"[0.901, 0.902)","[3, 4, 3, 4, 1, 1, 4, 1, 2, 2, 1, 5, 1, 1, 1, ..."
2,"[0.902, 0.903)","[2, 7, 1, 1, 2, 3, 1, 1, 1, 1, 1, 2, 1, 3, 1, ..."
3,"[0.903, 0.904)","[3, 2, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
4,"[0.904, 0.905)","[5, 1, 2, 1, 4, 1, 1, 1, 1, 3, 1, 1, 1, 5, 1, ..."
5,"[0.905, 0.906)","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, ..."
6,"[0.906, 0.907)","[2, 3, 1, 4, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, ..."
7,"[0.907, 0.908)","[1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, ..."
8,"[0.908, 0.909)","[2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 4, 2, ..."
9,"[0.909, 0.91)","[1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 4, 2, 1, ..."



Constructed and saved outputs/binned_score_analysis/from_0.9_to_0.91_step_0.001/binned_score_stats_table.csv
Constructed and saved outputs/binned_score_analysis/from_0.9_to_0.91_step_0.001/Total Number of Protein Interactions per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.9_to_0.91_step_0.001/Number Unique Proteins per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.9_to_0.91_step_0.001/Mean Edge Score per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.9_to_0.91_step_0.001/Mean Number of Interactions (Degree) per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.9_to_0.91_step_0.001/Avg Physical Combined Score Distribution Spread per Binned Scored Value.png
Constructed and saved outputs/binned_score_analysis/from_0.9_to_0.91_step_0.001/Distribution of Number of Interactions (Degree) per Binned Scored Value.p

In [11]:
transfer_bin_list = list(np.round(np.arange(0, 1.01, 0.05), 2))
transfer_output_folder = main_binned_score_analysis_output_folder
transfer_output_batch_folder_name = "from_0.0_to_1.0_step_0.05"
transfer_string_score_df = string_score_df

print("Running binned_score_analysis.ipynb ... \n")
%run ./binned_score_analysis.ipynb
print("Finished Running binned_score_analysis.ipynb \n")

Running binned_score_analysis.ipynb ... 



'input_reduced_string_score_df'

Unnamed: 0,Protein 1,Protein 2,Avg Physical Combined Score,Binned Scored Value
0,ARF5,CALM2,0.041,"[0.0, 0.05)"
6323206,CHD8,PLAA,0.041,"[0.0, 0.05)"
6323205,SEL1L3,PLAA,0.041,"[0.0, 0.05)"
6323202,ATP5A1,PLAA,0.041,"[0.0, 0.05)"
6323200,MX1,PLAA,0.041,"[0.0, 0.05)"
...,...,...,...,...
10939200,RPS6KA2,RPS6KA5,0.953,"[0.95, 1.0)"
10939209,RPS6KA1,RPS6KA5,0.965,"[0.95, 1.0)"
1072055,GNAI1,GNA15,0.952,"[0.95, 1.0)"
10939139,RELA,RPS6KA5,0.979,"[0.95, 1.0)"


'input_reduced_string_score_df_removed_cross_pair_df'

Unnamed: 0,Protein 1,Protein 2,Avg Physical Combined Score,Binned Scored Value
0,ARF5,CALM2,0.041,"[0.0, 0.05)"
8529306,ETS1,PAX1,0.041,"[0.0, 0.05)"
8529293,FAM43A,PAX1,0.041,"[0.0, 0.05)"
8529158,HOXA1,PAX1,0.041,"[0.0, 0.05)"
8529138,AZIN1,PAX1,0.041,"[0.0, 0.05)"
...,...,...,...,...
5063935,ADCYAP1R1,VIPR1,0.977,"[0.95, 1.0)"
3814727,CYP1A1,CYP2B6,0.988,"[0.95, 1.0)"
7501345,MRPL24,MRPS11,0.979,"[0.95, 1.0)"
3091769,IRAK1,TAB1,0.968,"[0.95, 1.0)"





'binned_score_stats_df:'

Unnamed: 0,Binned Scored Value,Number of Protein Interactions,Number Unique Proteins,mean edge score,min edge score,max edge score,std edge score,mean degree,min degree,max degree,std degree
0,"[0.0, 0.05)",2387213.0,18973.0,0.041208,0.041,0.0495,0.001082,251.643177,1.0,3519.0,295.563486
1,"[0.05, 0.1)",541171.0,17638.0,0.074416,0.05,0.0995,0.014055,61.364214,1.0,642.0,63.865417
2,"[0.1, 0.15)",665298.0,18339.0,0.126627,0.1,0.1495,0.014643,72.555537,1.0,1325.0,64.397227
3,"[0.15, 0.2)",647655.0,18973.0,0.174997,0.15,0.1995,0.014364,68.271228,1.0,1570.0,77.354214
4,"[0.2, 0.25)",444797.0,18959.0,0.222807,0.2,0.249,0.012994,46.92199,1.0,1150.0,61.050058
5,"[0.25, 0.3)",282136.0,18902.0,0.273639,0.25,0.2995,0.016389,29.852502,1.0,964.0,35.192288
6,"[0.3, 0.35)",133260.0,18770.0,0.326223,0.3,0.3495,0.015052,14.199254,1.0,454.0,16.972981
7,"[0.35, 0.4)",107469.0,18519.0,0.375233,0.35,0.3995,0.016578,11.60635,1.0,337.0,13.90522
8,"[0.4, 0.45)",102342.0,18522.0,0.423991,0.4,0.449,0.015216,11.050858,1.0,408.0,14.987204
9,"[0.45, 0.5)",53143.0,16929.0,0.474865,0.45,0.499,0.012316,6.278339,1.0,216.0,9.189202





'binned_degree_dist_df:'

Unnamed: 0,Binned Scored Value,Distribution
0,"[0.0, 0.05)","[840, 20, 283, 105, 92, 78, 63, 30, 106, 237, ..."
1,"[0.05, 0.1)","[28, 31, 111, 27, 8, 48, 3, 20, 123, 99, 56, 5..."
2,"[0.1, 0.15)","[6, 46, 35, 163, 40, 27, 48, 12, 42, 165, 106,..."
3,"[0.15, 0.2)","[3, 81, 37, 182, 65, 32, 58, 35, 38, 68, 149, ..."
4,"[0.2, 0.25)","[8, 68, 17, 142, 45, 17, 31, 34, 19, 44, 143, ..."
5,"[0.25, 0.3)","[3, 30, 22, 96, 28, 13, 18, 22, 14, 29, 70, 24..."
6,"[0.3, 0.35)","[1, 20, 9, 36, 14, 7, 5, 17, 7, 9, 28, 11, 14,..."
7,"[0.35, 0.4)","[12, 7, 29, 9, 5, 6, 7, 7, 9, 19, 4, 7, 8, 6, ..."
8,"[0.4, 0.45)","[13, 8, 18, 7, 7, 3, 7, 3, 13, 22, 3, 6, 8, 9,..."
9,"[0.45, 0.5)","[1, 3, 8, 14, 1, 3, 6, 3, 3, 1, 14, 2, 3, 5, 6..."



Constructed and saved outputs/binned_score_analysis/from_0.0_to_1.0_step_0.05/binned_score_stats_table.csv
Constructed and saved outputs/binned_score_analysis/from_0.0_to_1.0_step_0.05/Total Number of Protein Interactions per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.0_to_1.0_step_0.05/Number Unique Proteins per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.0_to_1.0_step_0.05/Mean Edge Score per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.0_to_1.0_step_0.05/Mean Number of Interactions (Degree) per Binned Scored Value_vbarplot.png
Constructed and saved outputs/binned_score_analysis/from_0.0_to_1.0_step_0.05/Avg Physical Combined Score Distribution Spread per Binned Scored Value.png
Constructed and saved outputs/binned_score_analysis/from_0.0_to_1.0_step_0.05/Distribution of Number of Interactions (Degree) per Binned Scored Value.png
Finished Ru