In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.feature_selection import VarianceThreshold
import random
import pickle
from sklearn.preprocessing import StandardScaler
import os
import tensorflow as tf
from tqdm.notebook import tqdm
from scipy.stats import multivariate_normal as mvn
import matplotlib.pyplot as plt
from scipy.linalg import block_diag

2023-06-12 21:31:55.752948: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-12 21:31:55.855961: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-06-12 21:31:55.861717: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :
2023-06-12 21:31:55.861734: I tensorflow/compiler/xla/stream_executor/cuda/cudart

In [2]:
def get_targets_with_weights(batch_data, initial_ensembles, size_ens, ann): 
    
    target_dim = 1
    
    weights_ann_1 = ann.get_weights()
    
    h1  = ann.layers[1].output.shape[-1]

    n_hidden_1 = len(weights_ann_1[0].ravel())
    
    hidden_weights_1 = initial_ensembles[:,:n_hidden_1].reshape( size_ens, batch_data.shape[1], h1)
    
    
    hidden_output_1 = np.einsum('ij,kjl->kil', batch_data, hidden_weights_1)

    
    hidden_layer_bias_1 = initial_ensembles[:,n_hidden_1:(n_hidden_1 + h1)].reshape(size_ens, 1,  h1)


    hidden_output_1 = hidden_output_1 + hidden_layer_bias_1

    n_pred_weights_1 = len(weights_ann_1[2].ravel())

    output_weights_1 = initial_ensembles[:,(n_hidden_1 + h1):(n_hidden_1 + h1 + n_pred_weights_1) ].reshape(size_ens, h1, target_dim)


    output_1 = np.einsum('ijk,ikl->ijl', hidden_output_1, output_weights_1)


    output_layer_bias_1 = initial_ensembles[:,(n_hidden_1 + h1 + n_pred_weights_1):(n_hidden_1 + h1 + n_pred_weights_1 + target_dim)].reshape(size_ens, 1, target_dim)


    final_output_1 = output_1 + output_layer_bias_1
    
    final_output_1 = final_output_1[:,:, 0]
    
    # print(final_output_1.shape, initial_ensembles.shape)
    
    stack = np.hstack((final_output_1, initial_ensembles))

    
    return final_output_1, stack

In [3]:
def ann(hidden = 32, input_shape = 256, output_shape = 1): 
    input_layer = tf.keras.layers.Input(shape = (input_shape))
    hidden_layer = tf.keras.layers.Dense(hidden)
    hidden_output = hidden_layer(input_layer)
    pred_layer = tf.keras.layers.Dense(output_shape, activation = "relu")
    pred_output = pred_layer(hidden_output)
#     pred_output = tf.keras.layers.Activation("softmax")(pred_output)
    model = tf.keras.models.Model(input_layer, pred_output)
    return model

In [4]:
def generate_initial_ensembles(num_weights, lambda1, size_ens):
    mean_vec = np.zeros((num_weights,))
    cov_matrix = lambda1*np.identity(num_weights)
    mvn_samp = mvn(mean_vec, cov_matrix)
    return mvn_samp.rvs(size_ens)

In [5]:
def expit(x):
    """Compute softmax values for each sets of scores in x."""
#     e_x = np.exp(x - np.max(x))
    return 1 / (1 + np.exp(-x))

In [6]:
def get_initial_X_t(data1, data2, size_ens, var_weights = 1.0):
    samp_ann =  ann(hidden = 16, input_shape = 32, output_shape = 1)
    
    initial_ensembles1 = generate_initial_ensembles(samp_ann.count_params(), var_weights, size_ens)
    data1_out1, data1_stack1 = get_targets_with_weights(data1, initial_ensembles1, size_ens = size_ens, ann = samp_ann)
    
    initial_ensembles2 = generate_initial_ensembles(samp_ann.count_params(), var_weights, size_ens)
    data1_out2, data1_stack2 = get_targets_with_weights(data1, initial_ensembles2, size_ens = size_ens, ann = samp_ann)
    
    initial_ensembles3 = generate_initial_ensembles(samp_ann.count_params(), var_weights, size_ens)
    data2_out1, data2_stack1 = get_targets_with_weights(data2, initial_ensembles3, size_ens = size_ens, ann = samp_ann)
    
    initial_ensembles4 = generate_initial_ensembles(samp_ann.count_params(), var_weights, size_ens)
    data2_out2, data2_stack2 = get_targets_with_weights(data2, initial_ensembles4, size_ens = size_ens, ann = samp_ann)   
    
    X_t = np.concatenate((np.expand_dims(data1_stack1, -1), np.expand_dims(data1_stack2, -1), 
                         np.expand_dims(data2_stack1, -1), np.expand_dims(data2_stack2, -1)), axis = -1)
    
    initial_ensembles_for_weights = generate_initial_ensembles(4, var_weights, size_ens)
    initial_ensembles_for_weights = np.expand_dims(initial_ensembles_for_weights,1)
    
    # print(X_t.shape, initial_ensembles_for_weights.shape)
    
    X_t = np.concatenate((X_t, initial_ensembles_for_weights), axis = 1)
    
    initial_ensembles = np.hstack((initial_ensembles1, initial_ensembles2, initial_ensembles3, initial_ensembles4))
    
    return X_t, initial_ensembles, initial_ensembles_for_weights[:,0,:]

In [7]:
def get_weighted_targets_with_weights(batch_data, initial_ensembles, size_ens, ann, weights): 
    
    target_dim = 1
    
    weights_ann_1 = ann.get_weights()
    
    h1  = ann.layers[1].output.shape[-1]

    n_hidden_1 = len(weights_ann_1[0].ravel())
    
    hidden_weights_1 = initial_ensembles[:,:n_hidden_1].reshape( size_ens, batch_data.shape[1], h1)
    
    
    hidden_output_1 = np.einsum('ij,kjl->kil', batch_data, hidden_weights_1)

    
    hidden_layer_bias_1 = initial_ensembles[:,n_hidden_1:(n_hidden_1 + h1)].reshape(size_ens, 1,  h1)


    hidden_output_1 = hidden_output_1 + hidden_layer_bias_1

    n_pred_weights_1 = len(weights_ann_1[2].ravel())

    output_weights_1 = initial_ensembles[:,(n_hidden_1 + h1):(n_hidden_1 + h1 + n_pred_weights_1) ].reshape(size_ens, h1, target_dim)


    output_1 = np.einsum('ijk,ikl->ijl', hidden_output_1, output_weights_1)


    output_layer_bias_1 = initial_ensembles[:,(n_hidden_1 + h1 + n_pred_weights_1):(n_hidden_1 + h1 + n_pred_weights_1 + target_dim)].reshape(size_ens, 1, target_dim)


    final_output_1 = output_1 + output_layer_bias_1
    
    final_output_1 = final_output_1[:,:, 0]
    
    final_output_1 = final_output_1*weights
    
    # print(final_output_1.shape, initial_ensembles.shape)
    
    stack = np.hstack((final_output_1, initial_ensembles))

    
    return final_output_1, stack

In [8]:
alogp_bottleneck = np.load("..//Data/small_mol_phase_3_features_for_both.npy")

In [9]:
alogp_bottleneck.shape

(959, 64)

In [10]:
# y_valid

In [11]:
y_valid = pd.read_csv("..//Data/smiles_with_rdkit_with_small_phase_3_outputs.csv")

In [12]:
np.sqrt(((y_valid.iloc[:,1:] - y_valid.iloc[:,1:].mean())**2).mean(0))

AlogP                  2.690962
Polar Surface Area    54.234785
dtype: float64

In [13]:
y_valid = np.load('..//Data/smiles_0.7_rdkit_0.3_signal_plus_noise.npy')

In [14]:
y_valid.shape

(959, 2)

In [15]:
y_valid_actual = np.load('..//Data/smiles_0.7_rdkit_0.3_signal.npy')

In [16]:
# y_valid

In [17]:
std_targets = pickle.load( open('..//Data//target_scaler.pkl', 'rb'))

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [18]:
# y_valid

In [19]:
y_valid.shape

(959, 2)

In [20]:
y_valid_actual.shape

(959, 2)

In [21]:
y_train = np.hstack((y_valid, y_valid_actual))

In [22]:
y_train.shape

(959, 4)

In [23]:
y_train_full = pd.read_csv("..//Data//y_train.csv")
# y_train_full = std_targets.transform(y_train_full)

In [24]:
y_train_full_transform = std_targets.transform(y_train_full)

In [25]:
R_t = np.cov(y_train_full_transform.T)

In [26]:
R_t

array([[ 1.00000059, -0.30301738],
       [-0.30301738,  1.00000059]])

In [27]:
from sklearn.model_selection import train_test_split

In [28]:
x_train, x_valid, y_train, y_valid = train_test_split(alogp_bottleneck, y_train, test_size = 0.25, shuffle = True, 
                                                     random_state = 42)

In [29]:
x_train.shape

(719, 64)

In [30]:
y_train, y_train_actual = y_train[:,:2], y_train[:,2:]

In [31]:
y_valid, y_valid_actual = y_valid[:,:2], y_valid[:,2:]

In [32]:
# R_t = np.cov(y_train.T)

In [33]:
# R_t

In [34]:
var1 = R_t[0,0]
var2 = R_t[1,1]
cov = R_t[1,0]

In [35]:
ul = var1*np.identity(x_train.shape[0])
lr = var2*np.identity(x_train.shape[0])
ur = cov*np.identity(x_train.shape[0])
ll = ur.T

In [36]:
first_row = np.hstack((ul, ur))
second_row = np.hstack((ll, lr))

In [37]:
first_row.shape

(719, 1438)

In [38]:
second_row.shape

(719, 1438)

In [39]:
R_t = np.vstack((first_row, second_row))

In [40]:
R_t.shape

(1438, 1438)

In [41]:
smiles_feats_train = x_train[:, :32]

In [42]:
rdkit_feats_train = x_train[:, 32:]

In [43]:
smiles_feats_valid = x_valid[:, :32]
rdkit_feats_valid = x_valid[:, 32:]

In [44]:
def forward_operation(data1, data2, combined_ensembles , size_ens ):
    samp_ann =  ann(hidden = 16, input_shape = 32, output_shape = 1)
    params = samp_ann.count_params()
    initial_ensembles1 = combined_ensembles[:, :params]
    initial_ensembles2 = combined_ensembles[:, params:(2*params)]
    initial_ensembles3 = combined_ensembles[:, (2*params):(3*params)]
    initial_ensembles4 = combined_ensembles[:, (3*params):(4*params)]

    
    initial_ensembles_for_weights = combined_ensembles[:, (4*params):]
    softmax_weights = tf.math.softmax(initial_ensembles_for_weights).numpy()
    
    model_1 = softmax_weights[:,:2].sum(1).reshape(-1,1)
    
    model_2 = softmax_weights[:,2:].sum(1).reshape(-1,1)
    
    data1_out1, data1_stack1 = get_weighted_targets_with_weights(data1, initial_ensembles1, size_ens = size_ens,
                                                                 ann = samp_ann, weights=model_1)
    
    data1_out2, data1_stack2 = get_weighted_targets_with_weights(data1, initial_ensembles2, size_ens = size_ens,
                                                                 ann = samp_ann, weights=model_1)
    
    data2_out1, data2_stack1 = get_weighted_targets_with_weights(data2, initial_ensembles3, size_ens = size_ens,
                                                                 ann = samp_ann, weights=model_2)
    
    data2_out2, data2_stack2 = get_weighted_targets_with_weights(data2, initial_ensembles4, size_ens = size_ens,
                                                                 ann = samp_ann, weights=model_2)   
    
    X_t = np.concatenate((np.expand_dims(data1_stack1, -1), np.expand_dims(data1_stack2, -1), 
                         np.expand_dims(data2_stack1, -1), np.expand_dims(data2_stack2, -1)), axis = -1)
    
    initial_ensembles = np.hstack((initial_ensembles1, initial_ensembles2, initial_ensembles3, initial_ensembles4, 
                        initial_ensembles_for_weights))
    
    # print(X_t.shape)
    
    initial_ensembles_for_weights = np.expand_dims(initial_ensembles_for_weights,1)
    
    # print(initial_ensembles_for_weights.shape)
    
    X_t = np.concatenate((X_t, initial_ensembles_for_weights), axis = 1)
    
    weighted_alogp = data1_out1 + data2_out1
    
    weighted_psa = data1_out2 + data2_out2
    
    return X_t, initial_ensembles, weighted_alogp, weighted_psa, model_1, model_2

In [45]:
samp_ann =  ann(hidden = 16, input_shape = 32, output_shape = 1)

2023-06-12 21:31:58.690635: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :
2023-06-12 21:31:58.690660: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-06-12 21:31:58.690674: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (c2518.crane.hcc.unl.edu): /proc/driver/nvidia/version does not exist
2023-06-12 21:31:58.690866: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [46]:
total_weights = samp_ann.count_params()*4 + 4

In [47]:
reduction = 10

In [48]:
size_ens = total_weights//reduction

In [49]:
size_ens

218

In [50]:
X_t, initial_ensembles, initial_ensembles_for_weights = get_initial_X_t(smiles_feats_train, rdkit_feats_train, size_ens = size_ens)

In [51]:
initial_ensembles = np.hstack((initial_ensembles, initial_ensembles_for_weights))

In [52]:
G_t = [[1, 0, 1, 0], [0, 1, 0, 1]]
G_t = np.array(G_t).T

In [53]:
G_t

array([[1, 0],
       [0, 1],
       [1, 0],
       [0, 1]])

In [54]:
def get_predictions(data1, data2, initial_ensembles): 
    _,_, weighted_alogp, weighted_psa, w1, w2 = forward_operation(data1, data2, initial_ensembles, size_ens = size_ens)
    weighted_alogp = np.expand_dims(weighted_alogp,-1)
    weighted_psa = np.expand_dims(weighted_psa,-1)
    preds = np.concatenate((weighted_alogp, weighted_psa),-1)
    return preds, w1, w2

In [55]:
def calculate_mu_bar_G_bar(data1, data2, initial_ensembles):
    H_t = np.hstack((np.identity(data1.shape[0]), np.zeros((data1.shape[0], samp_ann.count_params() + 1))))
    mu_bar = initial_ensembles.mean(0)
    X_t,_, _, _, _, _ = forward_operation(data1, data2, initial_ensembles, size_ens = size_ens)
    X_t = X_t.transpose((0,2,1))
    X_t = X_t.reshape(X_t.shape[0], X_t.shape[1]*X_t.shape[2])
    script_H_t = np.kron(G_t.T, H_t)
    G_u = (script_H_t@X_t.T)
    G_u = G_u.T
    G_bar = (G_u.mean(0)).ravel()
    return mu_bar.reshape(-1,1), G_bar.reshape(-1,1), G_u

In [56]:
def calculate_C_u(initial_ensembles, mu_bar, G_bar, G_u): 
    u_j_minus_u_bar = initial_ensembles - mu_bar.reshape(1,-1)
    G_u_minus_G_bar = G_u -  G_bar.reshape(1,-1)
    c = np.zeros((total_weights, G_bar.shape[0]))
    for i in range(0, size_ens): 
        c += np.kron(u_j_minus_u_bar[i, :].T.reshape(-1,1), G_u_minus_G_bar[i,:].reshape(-1,1).T)
    return c/size_ens, G_u_minus_G_bar

In [57]:
def calculate_D_u( G_bar, G_u): 
    G_u_minus_G_bar = G_u -  G_bar.reshape(1,-1)
    d = np.zeros((G_bar.shape[0], G_bar.shape[0]))
    for i in range(0, size_ens): 
        d += np.kron(G_u_minus_G_bar[i,:].T.reshape(-1,1), G_u_minus_G_bar[i,:].reshape(-1,1).T)
    return d/size_ens

In [58]:
def get_updated_ensemble(data1, data2, initial_ensembles, size_ens = size_ens, inflation_factor = 1.02):
    mu_bar, G_bar, G_u = calculate_mu_bar_G_bar(data1, data2, initial_ensembles)
    C, G_u_minus_G_bar = calculate_C_u(initial_ensembles, mu_bar, G_bar, G_u)
    D = calculate_D_u( G_bar, G_u)
    # _, R_t = create_cov(data1.shape[0],initial_ensembles)
    # all_covs = np.array(all_covs)
    inflation = np.identity(R_t.shape[0])*inflation_factor
    D_plus_cov = D + (R_t *inflation_factor)
    D_plus_cov_inv = np.linalg.inv(D_plus_cov)
    mid_quant = C@D_plus_cov_inv
    noise_vec_mean = np.zeros((R_t.shape[0], ))
    noise_mvn = mvn(noise_vec_mean, R_t)
    fudging = noise_mvn.rvs(size_ens)
    interim = (y_train.T.flatten().reshape(1,-1) + fudging)
    right_quant = interim - G_u
    # print(mid_quant.shape, right_quant.shape)
    mid_times_right = mid_quant@right_quant.T
    updated_ensemble = (initial_ensembles + mid_times_right.T)
    return updated_ensemble

In [59]:
target_dim = 2

In [60]:
lambda_D = 1

In [61]:
def inverse_transform(data, idx):
    data_cur = data[idx, :, :]
    inv_data_cur = std_targets.inverse_transform(data_cur)
    return inv_data_cur

In [62]:
from joblib import Parallel, delayed

In [None]:
w1_catch = []
w2_catch = []
w1_sd_catch = []
w2_sd_catch = []
for i in range(0,10000):
    
    initial_ensembles = get_updated_ensemble(smiles_feats_train, rdkit_feats_train, initial_ensembles)
    G_u_train, w1, w2 = get_predictions(smiles_feats_train, rdkit_feats_train, initial_ensembles)
    
    w1_catch.append(w1.mean())
    w1_sd_catch.append(w1.std())
    
    w2_catch.append(w2.mean())
    w2_sd_catch.append(w2.std())  
    
    print([w1.mean(), w1.std()])
    # print(w2.mean(), w2.std())
    
    # G_u_train = get_targets_with_weights(smiles_feats_train, rdkit_feats_train, initial_ensembles, size_ens = size_ens)
    catch = Parallel(n_jobs = 15, verbose = 0)(delayed(inverse_transform)(G_u_train, i)  for i in range(G_u_train.shape[0]))
    G_u_train = np.array(catch)
    
    y_train_cur = std_targets.inverse_transform(y_train_actual)
    
    li_train = np.percentile(G_u_train, axis = 0, q = (2.5, 97.5))[0,:,:]   
    ui_train = np.percentile(G_u_train, axis = 0, q = (2.5, 97.5))[1,:,:]
    
    width_train = ui_train - li_train
    avg_width_train = width_train.mean(0)
    
    ind_train = (y_train_cur >= li_train) & (y_train_cur <= ui_train)
    coverage_train= ind_train.mean(0)
    
    averaged_targets_train = G_u_train.mean(0)
    rmse_train = np.sqrt(((y_train_cur -averaged_targets_train)**2).mean(0))
    # print(rmse_train, coverage_train, avg_width_train)
    
    G_u_test, _, _ = get_predictions(smiles_feats_valid, rdkit_feats_valid, initial_ensembles)
    
    catch = Parallel(n_jobs = 15, verbose = 0)(delayed(inverse_transform)(G_u_test, i)  for i in range(G_u_test.shape[0]))
    G_u_test = np.array(catch)
    
    y_valid_cur = std_targets.inverse_transform(y_valid_actual)    
    
    li_test = np.percentile(G_u_test, axis = 0, q = (2.5, 97.5))[0,:,:]   
    ui_test = np.percentile(G_u_test, axis = 0, q = (2.5, 97.5))[1,:,:]
    
    width_test = ui_test - li_test
    avg_width_test = width_test.mean(0)
    
    ind_test = (y_valid_cur >= li_test) & (y_valid_cur <= ui_test)
    coverage_test= ind_test.mean(0)
    
    averaged_targets_test = G_u_test.mean(0)
    rmse_test = np.sqrt(((y_valid_cur -averaged_targets_test)**2).mean(0))    
    
#     plt.scatter(y_valid_cur[:, 0], averaged_targets_test[:,0])
#     plt.axline((0,0), slope = 1, c= "black")
#     plt.show()
    
#     plt.scatter(y_valid_cur[:,1], averaged_targets_test[:, 1])
#     plt.axline((0,0), slope = 1, c= "black")
#     plt.show()
    
    # if coverage_train.mean() < 0.95:
    #     break
    print(rmse_train, coverage_train, avg_width_train)
    print(rmse_test, coverage_test, avg_width_test)

[0.5557693296043877, 0.18819909589029235]
[ 2.95620194 77.74091529] [1. 1.] [  88.59076873 2049.52155638]
[ 2.24238792 72.32664916] [1. 1.] [  88.44222162 2028.68107386]
[0.607140568450584, 0.15223947755334757]
[ 2.50629915 40.14037782] [1. 1.] [  57.33286934 1228.54456706]
[ 2.09565917 37.92861338] [1. 1.] [  56.70959498 1217.37281817]
[0.6132935922716222, 0.12948962889659085]
[ 1.66929276 39.71466258] [1. 1.] [ 41.64673612 913.12921657]
[ 1.3693206  35.31480238] [1. 1.] [ 41.52224419 904.90605339]
[0.629065496269045, 0.11046336564520817]
[ 1.73803029 35.72476084] [1. 1.] [ 32.6008677 674.0642341]
[ 1.53590875 31.45236286] [1. 1.] [ 32.34683112 664.80818299]
[0.6669729706470664, 0.09425972874675698]
[ 1.59630734 34.89358734] [1. 1.] [ 21.37879881 500.53254476]
[ 1.5019889  31.71997876] [1. 1.] [ 21.1921768  491.15782294]
[0.6616142056502576, 0.08691381558089438]
[ 2.3984956  36.62396722] [1. 1.] [ 17.21982491 408.44031581]
[ 2.29505946 33.85781055] [1. 1.] [ 17.19546278 402.68353886]


In [None]:
G_u_test.shape

In [None]:
import random

In [None]:
random_idx = random.sample(range(y_valid_cur.shape[0]), k = 8)

In [None]:
fig, axs = plt.subplots(8, 2,figsize=(15, 15))
# axs = axs.ravel()
# counter = 0
for idx, i in enumerate(random_idx):
    # print(counter)
    truth = y_valid_cur[i,:]
    preds = G_u_test[:, i,:]
    percts = np.percentile(preds, axis = 0, q = (2.5, 97.5))
    lis = percts[0,:]
    uis = percts[1,:]
    
    
    axs[idx, 0].hist(preds[:,0])
    axs[idx, 0].axvline(truth[0], color='green', linewidth=2)
    axs[idx, 0].axvline(lis[0], color='red', linewidth=2)
    axs[idx, 0].axvline(uis[0], color='red', linewidth=2)
    
    axs[idx, 1].hist(preds[:,1])
    axs[idx, 1].axvline(truth[1], color='green', linewidth=2)
    axs[idx, 1].axvline(lis[1], color='red', linewidth=2)
    axs[idx, 1].axvline(uis[1], color='red', linewidth=2)
    
    # counter+=2
    # print(counter)
    
    # plt.show()
plt.savefig('prediction_intervals.png', bbox_inches='tight')

In [None]:
plt.scatter(y_valid_cur[:, 0], averaged_targets_test[:,0])
plt.axline((0,0), slope = 1, c= "black")
plt.show()
plt.scatter(y_valid_cur[:,1], averaged_targets_test[:, 1])
plt.axline((0,0), slope = 1, c= "black")
plt.show()

In [None]:
plt.errorbar(range(0, len(w1_catch)), w1_catch, w1_sd_catch)