In [26]:
!pip3 install --upgrade pip
!pip3 install --upgrade tensorflow

Collecting tensorflow
  Downloading tensorflow-2.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (588.3 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m588.3/588.3 MB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:03[0m
Collecting tensorboard<2.12,>=2.11
  Downloading tensorboard-2.11.0-py3-none-any.whl (6.0 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m
[?25hCollecting keras<2.12,>=2.11.0
  Downloading keras-2.11.0-py2.py3-none-any.whl (1.7 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0mm
Collecting tensorflow-estimator<2.12,>=2.11.0
  Downloading tensorflow_estimator-2.11.0-py2.py3-none-any.whl (439 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [1]:
import pandas as pd

import numpy as np

import tensorflow as tf
from tensorflow import keras

import matplotlib.pyplot as plot

from functools import partial

AUTOTUNE = tf.data.experimental.AUTOTUNE
n_steps = 8 # Number of time steps
n_lat = 10
n_lon = 6
n_bands = 12
n_depths = 3
n_windspeed = 10
n_winddir = 12
n_currents_var = 3
n_TS_var = 2

In [2]:
from scipy import stats
from sklearn import metrics as skm

def get_error_metrics(observed, predicted):
    
    '''Calculate linear error statistics from observed and predicted values.'''

    rmse = np.sqrt(skm.mean_squared_error(observed, predicted))
    
    r2_score = skm.r2_score(observed, predicted)
    
    slope, intercept, r, p, se = stats.linregress(observed, predicted)
    
    mae = skm.mean_absolute_error(observed, predicted)

#     mape = skm.mean_absolute_error(np.ones(len(observed)), predicted/observed)*100
    
    bias = np.nanmean(predicted-observed)

#     log_mae = 10**skm.mean_absolute_error(np.log10(observed), np.log10(predicted))
    
#     log_bias = 10**np.nanmean(np.log10(predicted)-np.log10(observed))
    error_metrics = {'rmse':rmse, 'r2':r2_score, 'slope':slope, 'intercept':intercept, 'mae':mae, 'bias':bias, 'n':len(observed)}

#     error_metrics = {'rmse':rmse, 'r2':r2_score, 'slope':slope, 'mae':mae, 'mape':mape, 'bias':bias, 'log_mae':log_mae, 'log_bias':log_bias, 'n':len(observed)}

    return error_metrics

In [48]:
def parse_tfrecord_fn(example_proto):
    # Create a dictionary describing the features.
    feature_description = {
                  'u': tf.io.FixedLenFeature([], tf.string),
                  'v': tf.io.FixedLenFeature([], tf.string),
                  'w': tf.io.FixedLenFeature([], tf.string),
                  'rhos': tf.io.FixedLenFeature([], tf.string),
                  'T': tf.io.FixedLenFeature([], tf.string),
                  'S': tf.io.FixedLenFeature([], tf.string),
                  'wind': tf.io.FixedLenFeature([], tf.string),
                  'total_depth': tf.io.FixedLenFeature([], tf.float32),
                  'sample_depth': tf.io.FixedLenFeature([], tf.float32),
                  'julian_day': tf.io.FixedLenFeature([], tf.int64),
                  'year': tf.io.FixedLenFeature([], tf.int64),
                  'DO': tf.io.FixedLenFeature([], tf.float32),
                }
    
    example = tf.io.parse_single_example(example_proto, feature_description)
    
    example['rhos'] = tf.io.parse_tensor(example['rhos'], tf.float64)
    example['u'] = tf.io.parse_tensor(example['u'], tf.float64)
    example['v'] = tf.io.parse_tensor(example['v'], tf.float64)
    example['w'] = tf.io.parse_tensor(example['w'], tf.float64)
    example['T'] = tf.io.parse_tensor(example['T'], tf.float64)
    example['S'] = tf.io.parse_tensor(example['S'], tf.float64)
    example['wind'] = tf.io.parse_tensor(example['wind'], tf.float64)
    
    return example

def replacenan(t):
    return tf.where(tf.math.is_nan(t), tf.zeros_like(t), t)

def compute_mask(t):
    # get mask of data, 1 for valid, 0 for nan
    return ~tf.math.is_nan(t)

# Define dataset helper functions
def prepare_sample(features):
    
    rhos = tf.transpose(features['rhos'], perm=[3,0,1,2]) # lat,lon,bands,step --> step,lat,lon,bands
#     # 10 bands
#     rhos = rhos[:,:,:,:10] # remove swir and 859 bands
    # 12 bands
    rhos = tf.concat((rhos[:,:,:,:11], rhos[:,:,:,-4:-3]), axis=-1) # remove swir and 859 bands
    rhos = tf.expand_dims(rhos, axis=-1)
    rhos = tf.reverse(rhos, [0]) # reverse the step dim to make early obs first in step
#     rhos_mask = compute_mask(rhos)
    rhos = replacenan(rhos)
#     rhos = tf.clip_by_4value(rhos, 0, 0.2)
    
    T = tf.transpose(features['T'], perm=[3,0,1,2]) # lat,lon,depth,step --> step,lat,lon,depth
    T = tf.expand_dims(T, axis=-1)
    T = tf.reverse(T, [0])
    
    S = tf.transpose(features['S'], perm=[3,0,1,2]) 
    S = tf.expand_dims(S, axis=-1)
    S = tf.reverse(S, [0])
    
    TS = tf.concat([T,S], axis=-2)
#     TS = replacenan(tf.concat([T,S], axis=-2))
    TS = replacenan(TS)
    
    u = tf.transpose(features['u'], perm=[3,0,1,2])
    u = tf.expand_dims(u, axis=-1)
    u = tf.reverse(u, [0])
    
    v = tf.transpose(features['v'], perm=[3,0,1,2])
    v = tf.expand_dims(v, axis=-1)
    v = tf.reverse(v, [0])
    
    w = tf.transpose(features['w'], perm=[3,0,1,2])
    w = tf.expand_dims(w, axis=-1)
    w = tf.reverse(w, [0])
    
#     uvw = tf.concat([u,v,w], axis=-2)
    uvw = replacenan(tf.concat([u,v,w], axis=-2))
    
    wind = tf.transpose(features['wind'], perm=[2,0,1]) # n_windspeed,n_winddir,n_steps --> n_steps,n_windspeed,n_winddir
#     wind = tf.expand_dims(wind, axis=-1)
    wind = tf.reverse(wind, [0]) # steps,windspeed,winddir
#     wind = replacenan(wind)
    
    jd = tf.cast(features['julian_day'], tf.float32)
    orb_phase = tf.math.sin(tf.constant(2*np.pi/365.25)*jd) # not working better
    jd_encodings = [tf.math.sin(orb_phase),tf.math.cos(orb_phase)]
    
#     anci_data = [features['sample_depth'],features['total_depth']]
    anci_data = [features['sample_depth'],features['total_depth'],jd]

#     return ({'reflectance': rhos, 'temperature_salinity': TS, 'currents': uvw, 'wind': wind, 'ancillary': anci_data, 'jd':jd_encodings},
#         {'do': features['DO']})

    if 'DO_vims_predicted' in features.keys():
        return ({'reflectance': rhos, 'temperature_salinity': TS, 'currents': uvw, 'wind': wind, 'ancillary': anci_data, 'jd':jd_encodings},
        {'do': features['DO']},
        {'do_vims': 2*features['DO_vims_predicted'], 'sample_depth':features['sample_depth'], 'total_depth':features['total_depth'], 
         'jd':features['julian_day'], 'year':features['year']})
    else:
        return ({'reflectance': rhos, 'temperature_salinity': TS, 'currents': uvw, 'wind': wind, 'ancillary': anci_data, 'jd':jd_encodings},
        {'do': features['DO']})


In [49]:
def get_dataset(filenames, batch_size):
    
    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False  # disable order, increase speed
    
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTOTUNE)  # automatically interleaves reads from multiple files
    
#     dataset = dataset.with_options(ignore_order)  # uses data as soon as it streams in, rather than in its original order
    
    # parse_tfrecord
    dataset = dataset.map(partial(parse_tfrecord_fn), num_parallel_calls=AUTOTUNE)
    
    # prepare_example
    dataset = dataset.map(prepare_sample, num_parallel_calls=AUTOTUNE)

#     dataset = dataset.shuffle(batch_size * 10)
    
#     dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    
    dataset = dataset.batch(batch_size)

    return dataset

# def get_dataset(filenames, batch_size):
    
#     ignore_order = tf.data.Options()
#     ignore_order.experimental_deterministic = False  # disable order, increase speed
    
#     dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTOTUNE)  # automatically interleaves reads from multiple files
    
#     dataset = dataset.with_options(ignore_order)  # uses data as soon as it streams in, rather than in its original order
    
#     # parse_tfrecord
#     dataset = dataset.map(partial(parse_tfrecord_fn), num_parallel_calls=AUTOTUNE)
    
#     # prepare_example
#     dataset = dataset.map(prepare_sample, num_parallel_calls=AUTOTUNE)

#     dataset = dataset.shuffle(batch_size * 10)
    
#     dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    
#     dataset = dataset.batch(batch_size)

#     return dataset

In [50]:
# Testset path
test_fnames = tf.io.gfile.glob('/home/gzheng/Documents/research/AI_hypoxia/data/tfrecords_v1_corr/test/*.tfrecords')

# Load testset
test_dataset = get_dataset(test_fnames, 64)

In [6]:
# Get cross val models

# df_cross_val_model = pd.read_csv('/home/gzheng/Documents/research/AI_hypoxia/models/summary/val_mae_best_of_each_rhos12_cross_val_model.csv',index_col=0)
# df_cross_val_model = pd.read_csv('/home/gzheng/Documents/research/AI_hypoxia/models/summary/val_mae_best_of_each_cross_val_model_ts_uvw_wind.csv',index_col=0)
df_cross_val_model = pd.read_csv('/home/gzheng/Documents/research/AI_hypoxia/models/summary/val_mae_best_of_each_cross_val_model_rhos12_ts_uvw_wind.csv',index_col=0)

cross_val_model_list = df_cross_val_model.columns.to_list()

In [51]:
# Load model 2
# model_version = 't_trial_0' #.89
model_version = 'ts_uvw_wind_trial_3' #.859
# model_version = 'ts_uvw_wind_jd_trial_1' #.848
# model_version = 'rhos12_ts_uvw_wind_jd_trial_4' #.853
# model_version = 'rhos12_ts_uvw_wind_trial_9' #.857
path_best_model = '/home/gzheng/Documents/research/AI_hypoxia/models/corr_perturbation_runs/'+model_version

model_2 = tf.keras.models.load_model(path_best_model)

test_predicted = model_2.predict(test_dataset)

  inputs = self._flatten_to_reference_inputs(inputs)




In [17]:
# Get test measured DO

DO = []
for element in test_dataset.as_numpy_iterator(): 
    DO.append(element[1]['do'])

all_test_DO = np.concatenate(DO)

In [19]:
df_error_cross_val = pd.DataFrame()

# Load cross val models
dir_cross_val = '/home/gzheng/Documents/research/AI_hypoxia/models/cross_val_runs/'
for model_version in cross_val_model_list:
    model_2 = tf.keras.models.load_model(dir_cross_val+model_version)
    
    test_predicted = model_2.predict(test_dataset)
    
    error_stats = get_error_metrics(all_test_DO, test_predicted.squeeze())
    sr_run = pd.Series(error_stats, name=model_version)
    df_error_cross_val = df_error_cross_val.append(sr_run)

  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




  inputs = self._flatten_to_reference_inputs(inputs)




In [20]:
from os import path
df_best_val_mae = pd.read_csv(path.join('/home/gzheng/Documents/research/AI_hypoxia/models/summary/val_mae_best_of_each_cross_val_model_rhos12_ts_uvw_wind.csv'))
df_best_train_mae = pd.read_csv(path.join('/home/gzheng/Documents/research/AI_hypoxia/models/summary/train_mae_best_of_each_cross_val_model_rhos12_ts_uvw_wind.csv'))

df_error_cross_val['val_mae'] = df_best_val_mae.min()

df_error_cross_val['train_mae'] = df_best_train_mae.min()

In [22]:
df_error_cross_val.to_csv('/home/gzheng/Documents/research/AI_hypoxia/data/test_results/rhos12_ts_uvw_wind_cross_val_testset_error_stats.csv')

In [21]:
df_error_cross_val
# rhos12_ts_uvw_wind

Unnamed: 0,bias,intercept,mae,n,r2,rmse,slope,val_mae,train_mae
2002_asValYear_rhos12_ts_uvw_wind_trial_8,0.033051,1.18952,0.954683,11618.0,0.827852,1.338957,0.833625,0.708455,0.653466
2003_asValYear_rhos12_ts_uvw_wind_trial_9,0.055812,0.658771,0.896281,11618.0,0.844281,1.273464,0.913256,1.096235,0.67352
2004_asValYear_rhos12_ts_uvw_wind_trial_0,0.28228,1.178695,0.893076,11618.0,0.847297,1.261068,0.871038,0.964375,0.72121
2005_asValYear_rhos12_ts_uvw_wind_trial_5,0.044852,1.057591,0.932429,11618.0,0.828847,1.335079,0.854303,1.036104,0.62791
2006_asValYear_rhos12_ts_uvw_wind_trial_9,0.000383,0.911453,0.881872,11618.0,0.847416,1.260577,0.868929,0.965736,0.66747
2007_asValYear_rhos12_ts_uvw_wind_trial_6,0.32491,1.583579,0.938727,11618.0,0.826395,1.344611,0.818922,0.907347,0.669505
2008_asValYear_rhos12_ts_uvw_wind_trial_0,0.110699,0.958293,0.887341,11618.0,0.842599,1.28032,0.878061,0.940869,0.641882
2009_asValYear_rhos12_ts_uvw_wind_trial_0,-0.073956,0.626249,0.897965,11618.0,0.847376,1.260741,0.899265,0.880369,0.707935
2010_asValYear_rhos12_ts_uvw_wind_trial_6,0.108611,1.071284,0.894583,11618.0,0.838414,1.297231,0.861506,0.863558,0.667485
2011_asValYear_rhos12_ts_uvw_wind_trial_3,0.236948,1.420013,0.89769,11618.0,0.842501,1.280718,0.829799,1.006137,0.643637


In [12]:
df_error_cross_val
# ts_uvw_wind

Unnamed: 0,bias,intercept,mae,n,r2,rmse,slope
2002_asValYear_ts_uvw_wind_trial_3,-0.141493,0.928555,0.906499,11618.0,0.84472,1.271664,0.846058
2003_asValYear_ts_uvw_wind_trial_6,0.117529,0.909536,0.917957,11618.0,0.830662,1.327983,0.886058
2004_asValYear_ts_uvw_wind_trial_2,0.142377,1.174169,0.893231,11618.0,0.842336,1.281389,0.851562
2005_asValYear_ts_uvw_wind_trial_0,0.154995,1.233954,0.886275,11618.0,0.845803,1.267224,0.844776
2006_asValYear_ts_uvw_wind_trial_2,0.004629,0.889014,0.896159,11618.0,0.844688,1.271797,0.872769
2007_asValYear_ts_uvw_wind_trial_8,0.281451,1.169718,0.953238,11618.0,0.820646,1.366692,0.87221
2008_asValYear_ts_uvw_wind_trial_7,0.069297,1.036682,0.900113,11618.0,0.845224,1.269602,0.860828
2009_asValYear_ts_uvw_wind_trial_8,0.055527,1.19647,0.908952,11618.0,0.842271,1.281655,0.835859
2010_asValYear_ts_uvw_wind_trial_9,-0.027355,0.803108,0.913325,11618.0,0.838458,1.297053,0.880526
2011_asValYear_ts_uvw_wind_trial_5,0.23526,1.111049,0.867763,11618.0,0.852933,1.237578,0.874005


In [22]:
df_error_cross_val
# rhos12_ts_uvw_wind_jd

Unnamed: 0,bias,intercept,mae,n,r2,rmse,slope,val_mae,train_mae
2002_asValYear_rhos12_ts_uvw_wind_jd_trial_5,0.005418,0.97497,0.899221,11618.0,0.848226,1.257228,0.860516,0.671613,0.652919
2003_asValYear_rhos12_ts_uvw_wind_jd_trial_7,0.040033,0.822103,0.943708,11618.0,0.823298,1.356549,0.887488,1.086936,0.629329
2004_asValYear_rhos12_ts_uvw_wind_jd_trial_5,0.048779,1.019937,0.869394,11618.0,0.858992,1.211816,0.860285,0.96752,0.768175
2005_asValYear_rhos12_ts_uvw_wind_jd_trial_3,0.154083,1.004743,0.861306,11618.0,0.851345,1.244244,0.87762,1.039139,0.699949
2006_asValYear_rhos12_ts_uvw_wind_jd_trial_9,0.095648,1.175328,0.904295,11618.0,0.841807,1.283538,0.844672,0.937669,0.706491
2007_asValYear_rhos12_ts_uvw_wind_jd_trial_8,0.196527,1.231831,0.927051,11618.0,0.828414,1.336766,0.851057,0.888706,0.631402
2008_asValYear_rhos12_ts_uvw_wind_jd_trial_1,0.15362,1.034728,0.873267,11618.0,0.850477,1.247871,0.87324,0.915314,0.74476
2009_asValYear_rhos12_ts_uvw_wind_jd_trial_1,-0.031988,1.064334,0.946515,11618.0,0.831277,1.325569,0.842278,0.819755,0.687493
2010_asValYear_rhos12_ts_uvw_wind_jd_trial_4,0.063212,0.968495,0.87358,11618.0,0.848904,1.254415,0.869762,0.841037,0.678271
2011_asValYear_rhos12_ts_uvw_wind_jd_trial_7,0.137278,1.123033,0.908283,11618.0,0.844426,1.272871,0.858185,0.949779,0.699747


In [109]:
df_error_cross_val
# ts_uvw_wind_jd

Unnamed: 0,bias,intercept,mae,n,r2,rmse,slope,val_mae
2002_asValYear_ts_uvw_wind_jd_trial_0,0.095515,1.464517,0.913927,11618.0,0.842529,1.280607,0.803049,0.734883
2003_asValYear_ts_uvw_wind_jd_trial_6,-0.002319,0.831424,0.899989,11618.0,0.846094,1.266029,0.880054,1.121666
2004_asValYear_ts_uvw_wind_jd_trial_8,0.230334,1.414301,0.905709,11618.0,0.837706,1.300067,0.829669,0.989505
2005_asValYear_ts_uvw_wind_jd_trial_9,-0.025656,0.769715,0.886829,11618.0,0.850036,1.24971,0.885574,1.017197
2006_asValYear_ts_uvw_wind_jd_trial_4,-0.068727,0.833976,0.87617,11618.0,0.853429,1.235491,0.870133,0.948227
2007_asValYear_ts_uvw_wind_jd_trial_7,0.179496,1.075341,0.841707,11618.0,0.857569,1.217914,0.87112,0.845593
2008_asValYear_ts_uvw_wind_jd_trial_7,0.073037,0.953194,0.862193,11618.0,0.856407,1.222873,0.873377,0.923624
2009_asValYear_ts_uvw_wind_jd_trial_6,-0.078381,1.002054,0.900994,11618.0,0.846702,1.263525,0.844564,0.83836
2010_asValYear_ts_uvw_wind_jd_trial_9,-0.072452,0.785719,0.88757,11618.0,0.851576,1.243275,0.87654,0.845291
2011_asValYear_ts_uvw_wind_jd_trial_7,0.27881,1.354977,0.911178,11618.0,0.833743,1.315845,0.845178,0.96307


In [178]:
list(test_dataset.as_numpy_iterator())[100][1]['DO']

array([0.69, 8.9 , 8.9 , 8.8 , 8.7 , 8.4 , 7.8 , 7.5 , 7.2 , 6.8 , 5.3 ,
       3.4 , 3.3 , 3.1 , 2.7 , 2.5 , 2.4 , 2.3 , 2.1 , 2.  , 1.8 , 1.5 ,
       1.3 , 1.03, 0.72, 0.58, 0.52, 0.54, 9.  , 9.  , 8.8 , 8.6 , 8.  ,
       7.6 , 7.3 , 5.3 , 4.5 , 4.1 , 3.8 , 3.6 , 3.6 , 3.  , 2.8 , 2.6 ,
       2.5 , 2.3 , 2.2 , 2.  , 2.  , 1.9 , 1.7 , 1.6 , 1.6 , 9.4 , 9.2 ,
       9.  , 8.5 , 8.2 , 7.9 , 7.7 , 6.9 , 6.2 , 6.  , 5.3 ],
      dtype=float32)

In [40]:
# Base testset data with VIMS DO
df_base = pd.read_csv('/home/gzheng/Documents/research/AI_hypoxia/data/test_results/train_3_best_model_091_no_jd_trial_4_testset_result.csv',index_col=0)

In [87]:
df = df_base.copy()
df['HypoxAI_predicted_DO'] = test_predicted.squeeze()

df.to_csv('/home/gzheng/Documents/research/AI_hypoxia/data/test_results/'+model_version+'_testset_result.csv')

In [116]:
sr_1 = pd.Series({'rmse': 1.2375429,
 'r2': 0.8529415020481225,
 'slope': 0.8570255827597884,
 'intercept': 1.0086237224305776,
 'mae': 0.87935656,
 'bias': 0.014809219,
 'n': 11618}, name='rhos12_ts_uvw_wind_trial_9')

sr_2 = pd.Series({'rmse': 1.3085444,
 'r2': 0.8355830567464066,
 'slope': 0.8519348671717808,
 'intercept': 0.9737272663093597,
 'mae': 0.95055085,
 'bias': -0.0554724,
 'n': 11618}, name='rhos12_ts_uvw_wind_jd_trial_4')

sr_3 = pd.Series({'rmse': 1.2629104,
 'r2': 0.8468508136412493,
 'slope': 0.8458690450461507,
 'intercept': 1.0685750580202527,
 'mae': 0.91338134,
 'bias': -0.0027878364,
 'n': 11618}, name='ts_uvw_wind_trial_3')

sr_4 = pd.Series({'rmse': 1.3297338,
 'r2': 0.8302150969544786,
 'slope': 0.8645486715242073,
 'intercept': 0.9283595205184128,
 'mae': 0.9485182,
 'bias': -0.013161618,
 'n': 11618}, name='ts_uvw_wind_jd_trial_1')

df_err_2018 = pd.concat([sr_1,sr_2,sr_3,sr_4],axis=1)

In [118]:
df_err_2018.to_csv('/home/gzheng/Documents/research/AI_hypoxia/data/test_results/2018asValSet_testset_error_stats.csv')

In [117]:
df_err_2018

Unnamed: 0,rhos12_ts_uvw_wind_trial_9,rhos12_ts_uvw_wind_jd_trial_4,ts_uvw_wind_trial_3,ts_uvw_wind_jd_trial_1
rmse,1.237543,1.308544,1.26291,1.329734
r2,0.852942,0.835583,0.846851,0.830215
slope,0.857026,0.851935,0.845869,0.864549
intercept,1.008624,0.973727,1.068575,0.92836
mae,0.879357,0.950551,0.913381,0.948518
bias,0.014809,-0.055472,-0.002788,-0.013162
n,11618.0,11618.0,11618.0,11618.0


In [81]:
# best model, rhos12_ts_uvw_wind_trial_9, 2018 as val set
# AI error

get_error_metrics(all_test_DO, test_predicted.squeeze())

{'rmse': 1.2375429,
 'r2': 0.8529415020481225,
 'slope': 0.8570255827597884,
 'intercept': 1.0086237224305776,
 'mae': 0.87935656,
 'bias': 0.014809219,
 'n': 11618}

In [71]:
# best model, rhos12_ts_uvw_wind_jd_trial_4, 2018 as val set
# AI error

get_error_metrics(all_test_DO, test_predicted.squeeze())

{'rmse': 1.3085444,
 'r2': 0.8355830567464066,
 'slope': 0.8519348671717808,
 'intercept': 0.9737272663093597,
 'mae': 0.95055085,
 'bias': -0.0554724,
 'n': 11618}

In [15]:
# best model, ts_uvw_wind_trial_3, 2018 as val set
# AI error

get_error_metrics(all_test_DO, test_predicted.squeeze())

{'rmse': 1.2629104,
 'r2': 0.8468508121344583,
 'slope': 0.8458690487143523,
 'intercept': 1.0685750325226104,
 'mae': 0.91338134,
 'bias': -0.0027878615,
 'n': 11618}

In [48]:
# best model, ts_uvw_wind_jd_trial_1, 2018 as val set
# AI error

get_error_metrics(all_test_DO, test_predicted.squeeze())

{'rmse': 1.3297338,
 'r2': 0.8302150969544786,
 'slope': 0.8645486715242073,
 'intercept': 0.9283595205184128,
 'mae': 0.9485182,
 'bias': -0.013161618,
 'n': 11618}

In [398]:
# '091 no jd tiral 4'
# AI error

get_error_metrics(all_DO, test_predicted[:,0])

{'rmse': 1.2554576,
 'r2': 0.8486530202154919,
 'slope': 0.8680073222527016,
 'mae': 0.90504646,
 'bias': 0.11860645,
 'n': 11618}

In [54]:
all_vims_DO = df_base['VIMS_predicted_DO']

In [55]:
idx = ~np.isnan(all_vims_DO)
x_DO = all_test_DO[idx]
y_DO = all_vims_DO[idx]

# VIMS error
get_error_metrics(x_DO, y_DO)

{'rmse': 1.5384878461635918,
 'r2': 0.7626039543244644,
 'slope': 0.8918980127046793,
 'intercept': 0.9256479090146135,
 'mae': 1.0981469929753966,
 'bias': 0.15506932932528203,
 'n': 10462}

In [57]:
# cases with non-nan VIMS prediction
# best model, ts_uvw_wind_trial_3, 2018 as val set
# AI error

get_error_metrics(all_test_DO[idx], test_predicted.squeeze()[idx])

{'rmse': 1.2699687,
 'r2': 0.8382398951405761,
 'slope': 0.8368629288959663,
 'intercept': 1.145853075814574,
 'mae': 0.9196301,
 'bias': -0.01702933,
 'n': 10462}

In [58]:
# cases with nan VIMS prediction
# best model, ts_uvw_wind_trial_3, 2018 as val set
# AI error

get_error_metrics(all_test_DO[~idx], test_predicted.squeeze()[~idx])

{'rmse': 1.1971393,
 'r2': 0.8761658924764912,
 'slope': 0.8978642732576325,
 'intercept': 0.6721868840570027,
 'mae': 0.8568294,
 'bias': 0.12609999,
 'n': 1156}

In [23]:
# VIMS error

get_error_metrics(x_DO, y_DO)

{'rmse': 1.5384878,
 'r2': 0.7626039549317537,
 'slope': 0.8918980126623903,
 'intercept': 0.9256486443077598,
 'mae': 1.098147,
 'bias': 0.15506934,
 'n': 10462}

In [30]:
tf.io.gfile.glob('/home/gzheng/Documents/research/AI_hypoxia/data/tfrecords_v1_corr/*'+str(year)+'*.tfrecords')


['/home/gzheng/Documents/research/AI_hypoxia/data/tfrecords_v1_corr/training_examples_2002.tfrecords']

In [36]:
# Number of examples by year
for year in range(2002,2018):
    ds_fnames = tf.io.gfile.glob('/home/gzheng/Documents/research/AI_hypoxia/data/tfrecords_v1_corr/*'+str(year)+'*.tfrecords')

    # Load testset
    ds = get_dataset(ds_fnames, 1)
    
    print(year, len(list(ds.as_numpy_iterator())))

2002 1578
2003 8657
2004 10016
2005 8415
2006 10393
2007 10573
2008 10047
2009 9110
2010 8125
2011 8834
2012 8844
2013 9303
2014 8949
2015 9307
2016 9300
2017 9185


2019 8839
2020 2779
