In [32]:
import pandas as pd, numpy as np, os

# Introduction

Notebook for carrying out visual and statistical comparison of observed and predicted seasonal water chemistry and ecology in Vanemfjorden. Takes predictions produced by notebook Predict_with_BN.ipynb and either compares predictions produced using different kinds of met data, or compares predictions to observations from the lake. 

In [37]:
# Paths to files and folders
obs_fpath = '../Data/DataMatrices/Seasonal_BN_obs/seasonal_obs_GBN_1981-2019.csv'
sim_era5_fpath = '../Data/Predictions/Historic/GBN_prediction_era5_1981-2019.csv'
sim_s5_folder = '../Data/Predictions/Historic/s5'

In [34]:
# Read in the observations
obs_wide = pd.read_csv(obs_fpath)

# Reformat obs to long format, same as the others
obs = pd.melt(obs_wide, id_vars=['year'], value_vars=['TP','chla','colour','cyano'],
         var_name='node', value_name='obs')

obs.head()

Unnamed: 0,year,node,obs
0,1981,TP,28.833333
1,1982,TP,26.988095
2,1983,TP,29.78125
3,1984,TP,26.5275
4,1985,TP,30.957143


In [35]:
# Read in predictions driven by ERA5 met data ('pseudoobs') and reformat to same format as obs
sim_era5 = pd.read_csv(sim_era5_fpath)

# Pick out just the expected value
sim_era5_e = sim_era5[['year','node','expected_value']]
sim_era5_e.columns = ['year','node','sim_era5'] # Rename columns
sim_era5_e.head()

Unnamed: 0,year,node,sim_era5
0,1981,chla,19.2
1,1981,colour,28.8
2,1981,cyano,2.06
3,1981,TP,37.0
4,1982,chla,11.1


In [36]:
# Read in BN predictions driven by s5 and reformat

member_li = ["%.2d" % i for i in range(1,26)] # List of S5 member numbers in format '01','02'... Should be present in s5 met data folder
season_li = ['summer', 'late_summer'] # Seasons of interest (must match filenames in s5 met data folder)
st_yr = 1993
end_yr = 2019

sim_s5_dict = {} # key: season, returns df with cols yr, node, expected values from member 01, 02, 03,...
for season in season_li:
    expected_value_li = []
    for member in member_li:

        # Read in predictions for this season and member
        s5fname = 'GBN_prediction_s5_%s-%s_%s_%s.csv' %(st_yr, end_yr, season, member)
        s5fpath = os.path.join(sim_s5_folder, s5fname)
        sim_s5 = pd.read_csv(s5fpath)
        
        # Extract just the expected value, rename, and add to list
        expected_values = sim_s5['expected_value']
        expected_values.name = 'sim_s5_%s' %member
        expected_value_li.append(expected_values)
    
    # Make dataframe and add to expected values dict for s5
    sim_s5_e = pd.concat([sim_s5[['year','node']]]+expected_value_li, axis=1)
    sim_s5_dict[season] = sim_s5_e

# Check of output
sim_s5_dict['summer'].head()

Unnamed: 0,year,node,sim_s5_01,sim_s5_02,sim_s5_03,sim_s5_04,sim_s5_05,sim_s5_06,sim_s5_07,sim_s5_08,...,sim_s5_16,sim_s5_17,sim_s5_18,sim_s5_19,sim_s5_20,sim_s5_21,sim_s5_22,sim_s5_23,sim_s5_24,sim_s5_25
0,1993,chla,14.6,14.5,15.6,17.0,15.9,16.9,17.0,16.4,...,16.6,17.2,15.4,17.0,13.9,17.8,16.5,16.5,17.8,15.3
1,1993,colour,23.8,23.5,23.8,22.4,30.4,22.0,22.8,24.1,...,23.8,22.6,25.6,24.7,24.5,19.6,22.8,21.2,25.2,27.1
2,1993,cyano,1.08,1.07,1.28,1.65,1.16,1.65,1.64,1.44,...,1.51,1.71,1.19,1.58,0.945,2.02,1.53,1.56,1.77,1.12
3,1993,TP,29.9,29.9,29.9,29.9,29.9,29.9,29.9,29.9,...,29.9,29.9,29.9,29.9,29.9,29.9,29.9,29.9,29.9,29.9
4,1994,chla,12.6,14.1,14.9,14.9,14.0,13.4,12.9,15.4,...,15.0,14.4,13.2,15.6,13.7,11.2,13.8,15.5,13.5,12.6
