Input: - _size.txt - output from summarize notebook
       - *.err - output/error files from PCSF paramter sweeps
       - *_optimalForest.sif - output/optimal forest files from PCSF parameter sweeps

This uses the PCSF parameter sweeps and the output from the summarize notebook to produce statistics related to the connections in order to choose parameters for the PCSF noise sweeps.

# Load in libraries, Eliminate columns, Obtain only rows with steiner nodes > 2

In [4]:
import os.path
import glob
import matplotlib.pyplot as plt
import pandas as pd

#r-escapes the whole string
Location = r'Z:/Yeast/osmotic-stress/Debug/_size.txt'
error = r'C:/Users/dcronin/Desktop/PCSF-072916/*.err'
optimalForests = r'C:/Users/dcronin/Desktop/PCSF-072916/*_optimalForest.sif'

df = pd.read_csv(Location, sep = '\t', names = ["Name", "ForestSize", "Steiner", "Prizes", "TotalPrizes", "Quotient", "UBC"], skiprows = 0)
df = df.drop(['UBC'], 1)
df = df.drop([0])
df2 = df[((df.Steiner <= '2'))]
print "Number of items with 2 or less steiner nodes: ", len(df) - len(df2)
print "Number of items with more than 2 steiner nodes: ", len(df)

df2 = df[((df.Prizes == '0'))]
#num_files = len([f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))])

print "Number of error file: ", len(glob.glob(error))
print "Number of optimal forest files: ", len(glob.glob(optimalForests))  
    
df = df[((df.Steiner > '2'))]

Number of items with 2 or less steiner nodes:  2238
Number of items with more than 2 steiner nodes:  16799
Number of error file:  16800
Number of optimal forest files:  16799


In [3]:
print df3

                                                    Name ForestSize Steiner  \
1      prizes_beta0.25_mu0.050_omega4.5_seed122815_op...          0       0   
2      prizes_beta0.25_mu0.050_omega3.0_seed122815_op...          0       0   
3      prizes_beta0.25_mu0.010_omega5.0_seed122815_op...          0       0   
4      prizes_beta0.25_mu0.010_omega6.5_seed122815_op...          0       0   
5      prizes_beta0.25_mu0.020_omega3.0_seed122815_op...          0       0   
6      prizes_beta0.25_mu0.020_omega9.5_seed122815_op...          0       0   
7      prizes_beta0.25_mu0.055_omega5.0_seed122815_op...          0       0   
8      prizes_beta0.25_mu0.055_omega0.5_seed122815_op...          0       0   
9      prizes_beta0.25_mu0.020_omega3.5_seed122815_op...          0       0   
10     prizes_beta0.25_mu0.060_omega0.5_seed122815_op...          0       0   
11     prizes_beta0.25_mu0.050_omega8.0_seed122815_op...          0       0   
12     prizes_beta0.25_mu0.020_omega10.0_seed122815_

# Cast each column as float and divide prize nodes by forest size

In [8]:
for index,row in df.iterrows():
    row['ForestSize'] = float(row['ForestSize'])
    row['Prizes'] = float(row['Prizes'])
    row['Quotient'] = float(row['Quotient'])
    row['Quotient'] = row['Prizes']/row['ForestSize']
df

Unnamed: 0,Name,ForestSize,Steiner,Prizes,TotalPrizes,Quotient
40,prizes_beta0.50_mu0.005_omega8.0_seed122815_op...,127,5,122,1596,0.96063
48,prizes_beta0.50_mu0.015_omega7.5_seed122815_op...,108,3,105,1596,0.972222
50,prizes_beta0.50_mu0.020_omega2.0_seed122815_op...,105,3,102,1596,0.971429
52,prizes_beta0.50_mu0.020_omega3.0_seed122815_op...,106,3,103,1596,0.971698
65,prizes_beta0.50_mu0.005_omega7.5_seed122815_op...,128,5,123,1596,0.960938
66,prizes_beta0.50_mu0.000_omega3.0_seed122815_op...,140,5,135,1596,0.964286
70,prizes_beta0.50_mu0.010_omega4.5_seed122815_op...,113,3,110,1596,0.973451
71,prizes_beta0.50_mu0.015_omega5.0_seed122815_op...,108,3,105,1596,0.972222
76,prizes_beta0.50_mu0.015_omega2.0_seed122815_op...,108,3,105,1596,0.972222
107,prizes_beta0.75_mu0.010_omega7.0_seed122815_op...,197,5,192,1596,0.974619


# Find the maximum of the quotient

In [9]:
maximum = df['Quotient'].max()

# Define isclose function that is used in python 3

In [10]:
def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
    return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)

# Output rows with greatest quotient

In [11]:
for index,row in df.iterrows():
    if(isclose(row['Quotient'],maximum)):
        print row

Name           prizes_beta1.75_mu0.095_omega5.5_seed122815_op...
ForestSize                                                   246
Steiner                                                        3
Prizes                                                       243
TotalPrizes                                                 1596
Quotient                                                0.987805
Name: 405, dtype: object
Name           prizes_beta1.75_mu0.095_omega2.5_seed122815_op...
ForestSize                                                   246
Steiner                                                        3
Prizes                                                       243
TotalPrizes                                                 1596
Quotient                                                0.987805
Name: 2460, dtype: object
Name           prizes_beta1.75_mu0.095_omega3.0_seed122815_op...
ForestSize                                                   246
Steiner                                