In [1]:
import pandas as pd
import bayes_net_utils as bn

# Bayesian network predictions

This notebook is closely based on the code in `06_Predict_with_BN_1Season.ipynb`. The R code required to run the Bayesian network has been refactored into the R function `bayes_net_predict` in `bayes_net_utils.R`. There is also a Python function of the same name in `bayes_net_utils.py`, which provides a simple "wrapper" around the R fucntion. This should make it easy to make predictions from the Bayesian network via Python.

**Note:** There is some computational overhead involved in interfacing between Python and R, but I don't think it will be a major problem.

In [2]:
# Read the "evidence" into a dataframe
ev_path = r'../Data/DataForPrediction/GaussianBN_DataForPrediction_1Season.csv'
ev_df = pd.read_csv(ev_path)
ev_df

Unnamed: 0,year,chla_prevSummer,colour_prevSummer,TP_prevSummer,wind_speed,rain
0,2020,12.866667,42,19.666667,3.968478,312.809003


In [3]:
# Loop over rows in evidence table and make predictions
df_list = []
for idx, row in ev_df.iterrows():
    # Run Bayesian network in R
    df = bn.bayes_net_predict(float(row['year']),
                              float(row['chla_prevSummer']),
                              float(row['colour_prevSummer']),
                              float(row['TP_prevSummer']),
                              float(row['wind_speed']),
                              float(row['rain']),
                             )
    
    # Add 'year' to results as unique identifier
    df['year'] = int(row['year'])
    
    df_list.append(df)
    
# Merge results
df = pd.concat(df_list, sort=True)

# Re-order cols
df = df[['year', 'node', 'threshold','prob_below_threshold', 
         'prob_above_threshold', 'expected_value', 'sd']]

df

Unnamed: 0,year,node,threshold,prob_below_threshold,prob_above_threshold,expected_value,sd
2,2020,chla,20.0,0.99,0.01,9.75,3.68
4,2020,colour,48.0,0.94,0.06,34.2,8.72
3,2020,cyano,1.0,0.83,0.17,0.342,0.756
1,2020,TP,29.5,0.97,0.03,22.8,3.72
