#### Load necessary packages

In [88]:
import os
import sys
sys.path.insert(0,'../mocsy')

In [10]:
import mocsy
from mocsy import mvars
import numpy as np

In [87]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

#### Calculate fCO2 training data with mocsy routine 

In [41]:
def calc_fCO2(alk, dic, tem, sal, sil, phos):
    # input units
    # alk in mol / kg
    # dic in mol / kg
    # tem in °C
    # sal in PSU
    # sil in mol / kg
    # phos in mol / kg
    n = len(alk)
    return mvars(alk=alk,
                     dic=dic,
                     temp=tem,
                     sal=sal,
                     sil=sil,
                     phos=phos,
                     patm=tuple(1 for _ in range(n)),
                     depth=tuple(5 for _ in range(n)),
                     lat=tuple(np.nan for _ in range(n)),
                     optcon='mol/kg',
                     optt='Tpot',
                     optp='db',
                     optk1k2='l',
                     optb='u74',
                     optkf='pf',
                     opts='Sprc')[2]

In [59]:
np.random.seed(0)
samples = []
sample_size = 6000_000
for i in range(sample_size):
    # alk between 1700e-6 and 2700e-6 mol kg-1
    alk = np.random.uniform(low=1700e-6, high=2700e-6)
    # dic between 1700e-6 mol kg-1 and alk
    dic = np.random.uniform(low=1700e-6, high=alk)
    # tem between 2 and 35 °C
    tem = np.random.uniform(low=2, high=35)
    # sal between 19 and 43 PSU
    sal = np.random.uniform(low=19, high=43)
    # sil between 0 and 134 mumol kg-1
    sil = np.random.uniform(low=0, high=134e-6)
    # phos between 0 and 4 mumol kg-1
    phos = np.random.uniform(low=0, high=4e-6)
    sample = (alk, dic, tem, sal, sil, phos)
    samples.append(sample)

In [75]:
sample_alk, sample_dic, sample_tem, sample_sal, sample_sil, sample_phos =\
zip(*samples)
sample_fco2 = calc_fCO2(sample_alk, sample_dic, sample_tem, sample_sal, sample_sil, sample_phos)

In [77]:
print(np.median(sample_fco2))
print(calc_fCO2(tuple([2200e-6]), tuple([1950e-6]), tuple([18.5]), tuple([31]), tuple([67e-6]), tuple([2e-6])))
print(sample_fco2.shape)
print(type(sample_fco2))

453.50214571167703
[328.11596538]
(6000000,)
<class 'numpy.ndarray'>


In [78]:
ntrain = 5000_000

train_fco2, valid_fco2 = np.split(sample_fco2, [ntrain])
train_alk, valid_alk = np.split(np.array(sample_alk), [ntrain])
train_dic, valid_dic = np.split(np.array(sample_dic), [ntrain])
train_tem, valid_tem = np.split(np.array(sample_tem), [ntrain])
train_sal, valid_sal = np.split(np.array(sample_sal), [ntrain])
train_sil, valid_sil = np.split(np.array(sample_sil), [ntrain])
train_phos, valid_phos = np.split(np.array(sample_phos), [ntrain])

In [71]:
sample_alk_mean = (1700e-6 + 2700e-6) / 2
sample_alk_std = (2700e-6 - 1700e-6) / np.sqrt(12)

sample_dic_mean = 1700e-6 + (2700e-6 - 1700e-6) / 4
sample_dic_std = (2700e-6 - 1700e-6) * np.sqrt(7 / 144)

sample_tem_mean = (2 + 35) / 2
sample_tem_std = (35 - 2) / np.sqrt(12)

sample_sal_mean = (19 + 43) / 2
sample_sal_std = (43 - 19) / np.sqrt(12)

sample_sil_mean = (134e-6 + 0e-6) / 2
sample_sil_std = (134e-6 - 0e-6) / np.sqrt(12)

sample_phos_mean = (4e-6 + 0e-6) / 2
sample_phos_std = (4e-6 - 0e-6) / np.sqrt(12)

print("Some statistics:")
print("Mean for alk samples: {:.6e}, expected: {:.6e}".format(np.mean(train_alk), sample_alk_mean))
print("Std for alk samples: {:.6e}, expected: {:.6e}".format(np.std(train_alk), sample_alk_std))
print("-----")
print("Mean for dic samples: {:.6e}, expected: {:.6e}".format(np.mean(train_dic), sample_dic_mean))
print("Std for dic samples: {:.6e}, expected: {:.6e}".format(np.std(train_dic), sample_dic_std))
print("-----")
print("Mean for tem samples: {:.6e}, expected: {:.6e}".format(np.mean(train_tem), sample_tem_mean))
print("Std for tem samples: {:.6e}, expected: {:.6e}".format(np.std(train_tem), sample_tem_std))
print("-----")
print("Mean for sal samples: {:.6e}, expected: {:.6e}".format(np.mean(sample_sal), sample_sal_mean))
print("Std for sal samples: {:.6e}, expected: {:.6e}".format(np.std(sample_sal), sample_sal_std))
print("-----")
print("Mean for sil samples: {:.6e}, expected: {:.6e}".format(np.mean(train_sil), sample_sil_mean))
print("Std for sil samples: {:.6e}, expected: {:.6e}".format(np.std(train_sil), sample_sil_std))
print("-----")
print("Mean for phos samples: {:.6e}, expected: {:.6e}".format(np.mean(train_phos), sample_phos_mean))
print("Std for phos samples: {:.6e}, expected: {:.6e}".format(np.std(train_phos), sample_phos_std))

Some statistics:
Mean for alk samples: 2.200027e-03, expected: 2.200000e-03
Std for alk samples: 2.886953e-04, expected: 2.886751e-04
-----
Mean for dic samples: 1.950093e-03, expected: 1.950000e-03
Std for dic samples: 2.205415e-04, expected: 2.204793e-04
-----
Mean for tem samples: 1.849863e+01, expected: 1.850000e+01
Std for tem samples: 9.530802e+00, expected: 9.526279e+00
-----
Mean for sal samples: 3.099651e+01, expected: 3.100000e+01
Std for sal samples: 6.927757e+00, expected: 6.928203e+00
-----
Mean for sil samples: 6.697189e-05, expected: 6.700000e-05
Std for sil samples: 3.870030e-05, expected: 3.868247e-05
-----
Mean for phos samples: 1.999234e-06, expected: 2.000000e-06
Std for phos samples: 1.154432e-06, expected: 1.154701e-06


#### Normalize samples and train neural network with pytorch

In [81]:
sample_means = np.array([sample_alk_mean, sample_dic_mean, sample_tem_mean,
                         sample_sal_mean, sample_sil_mean, sample_phos_mean])
sample_stds = np.array([sample_alk_std, sample_dic_std, sample_tem_std,
                         sample_sal_std, sample_sil_std, sample_phos_std])

In [99]:
train_features = np.concatenate([train_alk[:, np.newaxis], train_dic[:, np.newaxis], train_tem[:, np.newaxis],
                               train_sal[:, np.newaxis], train_sil[:, np.newaxis], train_phos[:, np.newaxis]], axis=1)

valid_features = np.concatenate([valid_alk[:, np.newaxis], valid_dic[:, np.newaxis], valid_tem[:, np.newaxis],
                               valid_sal[:, np.newaxis], valid_sil[:, np.newaxis], valid_phos[:, np.newaxis]], axis=1)

print(train_features.shape)
train_features_normalized = (train_features - sample_means) / sample_stds
valid_features_normalized = (valid_features - sample_means) / sample_stds

(5000000, 6)


In [100]:
print("Verify that data is normalized.")
print(np.mean(train_features_normalized, axis=0))
print(np.std(train_features_normalized, axis=0))

Verify that data is normalized.
[ 9.47580989e-05  4.23186847e-04 -1.44276512e-04 -4.32910793e-04
 -7.26599596e-04 -6.63078680e-04]
[1.00006983 1.00028241 1.00047477 0.99986704 1.00046103 0.99976755]


In [97]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.device = torch.device("cuda")
        self.linear1 = nn.Linear(input_size, hidden_size, device=self.device)
        self.linear2 = nn.Linear(hidden_size, hidden_size, device=self.device)
        self.linear3 = nn.Linear(hidden_size, hidden_size, device=self.device)
        self.linear4 = nn.Linear(hidden_size, output_size, device=self.device)

    def forward(self, x):
        x = x.to(self.device)
        x = F.elu(self.linear1(x))
        x = F.elu(self.linear2(x))
        x = F.elu(self.linear3(x))
        x = F.elu(self.linear4(x))
        return x

    def save(self, file_name='model.pth'):
        model_folder_path = './model'
        if not os.path.exists(model_folder_path):
            os.makedirs(model_folder_path)

        file_name = os.path.join(model_folder_path, file_name)
        torch.save(self.state_dict(), file_name)

In [98]:
model = MLP(6, 64, 1)
print(model)
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Number of trainable parameters in the model:", pytorch_total_params)

MLP(
  (linear1): Linear(in_features=6, out_features=64, bias=True)
  (linear2): Linear(in_features=64, out_features=64, bias=True)
  (linear3): Linear(in_features=64, out_features=64, bias=True)
  (linear4): Linear(in_features=64, out_features=1, bias=True)
)
Number of trainable parameters in the model: 8833


In [101]:
batch_size = 1000
learning_rate = 1e-3
n_epochs = 100

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_function = nn.MSELoss()

train_loss = []
valid_loss = []

for epoch in range(n_epochs):
    batch_indices = np.random.randint(ntrain, size=batch_size)
    
    batch_features = np.take(train_features_normalized, batch_indices, axis=0)
    batch_features_torch = torch.from_numpy(batch_features)

    batch_labels = np.take(train_fco2, batch_indices)
    batch_labels_torch = torch.from_numpy(batch_labels)

    valid_features_torch = torch.from_numpy(valid_features_normalized)
    valid_labels_torch = torch.from_numpy(valid_fco2)

In [None]:
def run_epoch(features, labels, lr, optim, loss, optimal=False):

	losses = list()
	rewards = list()

	for x in range(_maxsteps):

		nsteps += 1

		if done:
			break

		if render:
			env.render()

		curr_state = observation

		# training time
		if not optimal:

			# Epsilon-greedy policy
			randnum = np.random.rand(1)
			if randnum < epsilon:
				action = env.action_space.sample()
			else:
				pass
				# TODO - feed the state into the Linear Q function and get the (best) action according to the Q-Learning formula
				action_vals = Q(torch.from_numpy(observation.astype("float32")))#.detach().cpu().numpy()
				action = np.argmax(action_vals.detach().cpu().numpy())

				observation, reward, done, _, info = env.step(action)

				# TODO - compute the MSE (i.e., the loss function between targets and predictions) according to the Q-Learning formula
				optim.zero_grad()
				estimate = torch.max(action_vals)
				Q_next_state = Q(torch.from_numpy(observation.astype("float32")))#.detach().cpu().numpy()
				target = reward + discount * torch.max(Q_next_state)
				l = loss(target, estimate)
				# the targets should be: reward + discount * np.max(Q(next_state))
				# the estimate should be: current estimate of Q(, action)
				#loss = crit(targets, estimate)
				l.backward()
				optim.step()
				losses.append(l.detach().cpu().numpy())
				rewards.append(reward)

		# evaluation time
		else:
			# TODO - feed the state into the Linear Q function and get the (best) action according to the Q-Learning formula
			action_vals = Q(torch.from_numpy(observation.astype("float32")))#.detach().cpu().numpy()
			action = np.argmax(action_vals.detach().cpu().numpy())
			observation, reward, done, _, info = env.step(action)

	return rewards, losses