Input: - _size.txt - output from summarize notebook
       - *.err - output/error files from PCSF paramter sweeps
       - *_optimalForest.sif - output/optimal forest files from PCSF parameter sweeps

This uses the PCSF parameter sweeps and the output from the summarize notebook to produce statistics related to the connections in order to choose parameters for the PCSF noise sweeps.

# Load in libraries and files

In [1]:
import os.path
import glob
import matplotlib.pyplot as plt
import pandas as pd

PCSF_Files = r'/home/dylan/Documents/HDD/Wisconsin/PCSF-06_30_17/'
DATA_Files = r'/home/dylan/Documents/HDD/Wisconsin/osmotic-stress/Notebooks/Summarization_Notebooks_and_Scripts/'


#r-escapes the whole string
Location = DATA_Files + '_size.txt'
error = PCSF_Files + '*.err'
optimalForests = PCSF_Files + '*_optimalForest.sif'

df = pd.read_csv(Location, sep = '\t', names = ["Name", "ForestSize", "Steiner", "Prizes", "TotalPrizes", "Quotient", "UBC"], skiprows = 0)


# Eliminate columns, Obtain only rows with steiner nodes > 2

In [2]:
df = df.drop(['UBC'], 1)
df = df.drop([0])
df2 = df[((df.Steiner <= '2'))]
print "Number of items with 2 or less steiner nodes: ", len(df) - len(df2)
print "Number of items with more than 2 steiner nodes: ", len(df)

df2 = df[((df.Prizes == '0'))]
#num_files = len([f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))])

print "Number of error files: ", len(glob.glob(error))
print "Number of optimal forest files: ", len(glob.glob(optimalForests))  
    
df = df[((df.Steiner > '2'))]

Number of items with 2 or less steiner nodes:  2238
Number of items with more than 2 steiner nodes:  16800
Number of error files:  16800
Number of optimal forest files:  16800


In [3]:
print df

                                                    Name ForestSize Steiner  \
42     prizes_beta0.50_mu0.000_omega3.5_seed122815_op...        140       5   
43     prizes_beta0.50_mu0.000_omega4.0_seed122815_op...        136       5   
44     prizes_beta0.50_mu0.000_omega7.5_seed122815_op...        139       6   
45     prizes_beta0.50_mu0.000_omega5.5_seed122815_op...        140       5   
46     prizes_beta0.50_mu0.005_omega1.0_seed122815_op...        127       5   
47     prizes_beta0.50_mu0.000_omega10.0_seed122815_o...        143       6   
48     prizes_beta0.50_mu0.005_omega9.0_seed122815_op...        126       5   
49     prizes_beta0.50_mu0.005_omega8.5_seed122815_op...        116       4   
53     prizes_beta0.50_mu0.005_omega4.0_seed122815_op...        127       5   
54     prizes_beta0.50_mu0.005_omega4.5_seed122815_op...        127       5   
55     prizes_beta0.50_mu0.020_omega2.5_seed122815_op...        105       3   
58     prizes_beta0.50_mu0.015_omega8.0_seed122815_o

# Cast each column as float and divide prize nodes by forest size

In [4]:
for index,row in df.iterrows():
    row['ForestSize'] = float(row['ForestSize'])
    row['Prizes'] = float(row['Prizes'])
    row['Quotient'] = float(row['Quotient'])
    row['Quotient'] = row['Prizes']/row['ForestSize']
df

Unnamed: 0,Name,ForestSize,Steiner,Prizes,TotalPrizes,Quotient
42,prizes_beta0.50_mu0.000_omega3.5_seed122815_op...,140,5,135,1596,0.964286
43,prizes_beta0.50_mu0.000_omega4.0_seed122815_op...,136,5,131,1596,0.963235
44,prizes_beta0.50_mu0.000_omega7.5_seed122815_op...,139,6,133,1596,0.956835
45,prizes_beta0.50_mu0.000_omega5.5_seed122815_op...,140,5,135,1596,0.964286
46,prizes_beta0.50_mu0.005_omega1.0_seed122815_op...,127,5,122,1596,0.96063
47,prizes_beta0.50_mu0.000_omega10.0_seed122815_o...,143,6,137,1596,0.958042
48,prizes_beta0.50_mu0.005_omega9.0_seed122815_op...,126,5,121,1596,0.960317
49,prizes_beta0.50_mu0.005_omega8.5_seed122815_op...,116,4,112,1596,0.965517
53,prizes_beta0.50_mu0.005_omega4.0_seed122815_op...,127,5,122,1596,0.96063
54,prizes_beta0.50_mu0.005_omega4.5_seed122815_op...,127,5,122,1596,0.96063


# Find the maximum of the quotient

In [5]:
maximum = df['Quotient'].max()

# Define isclose function that is used in python 3

In [7]:
def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
    return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)

# Output rows with greatest quotient

In [8]:
for index,row in df.iterrows():
    if(isclose(row['Quotient'],maximum)):
        print row

Name           prizes_beta1.75_mu0.095_omega2.0_seed122815_op...
ForestSize                                                   246
Steiner                                                        3
Prizes                                                       243
TotalPrizes                                                 1596
Quotient                                                0.987805
Name: 358, dtype: object
Name           prizes_beta1.75_mu0.095_omega0.5_seed122815_op...
ForestSize                                                   246
Steiner                                                        3
Prizes                                                       243
TotalPrizes                                                 1596
Quotient                                                0.987805
Name: 383, dtype: object
Name           prizes_beta1.75_mu0.095_omega7.5_seed122815_op...
ForestSize                                                   246
Steiner                                 