In [1]:
import pandas as pd
from statsmodels.tsa.seasonal import seasonal_decompose

In [2]:
df = pd.read_csv("R_data/NO2_final.csv", parse_dates=["date"]).set_index("date")

In [3]:
df.head()

Unnamed: 0_level_0,nox,no2,site,code,latitude,longitude,site_type
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1996-08-08 00:00:00,43.93,43.93,"- National Physical Laboratory, Teddington",TD0,51.424304,-0.345715,Suburban
1996-08-08 01:00:00,55.39,53.48,"- National Physical Laboratory, Teddington",TD0,51.424304,-0.345715,Suburban
1996-08-08 02:00:00,45.84,45.84,"- National Physical Laboratory, Teddington",TD0,51.424304,-0.345715,Suburban
1996-08-08 03:00:00,36.29,34.38,"- National Physical Laboratory, Teddington",TD0,51.424304,-0.345715,Suburban
1996-08-08 04:00:00,34.38,32.47,"- National Physical Laboratory, Teddington",TD0,51.424304,-0.345715,Suburban


In [5]:
daily_df = df.groupby(by=["code"]).resample("D").mean().reset_index()
daily_df["date"] = daily_df["date"].apply(lambda x: x.strftime("%Y-%m-%d"))
daily_df.set_index("date", inplace=True)

KeyboardInterrupt: 

In [None]:
daily_df.head()

In [6]:
monthly_df = df.groupby(by=["code"]).resample("M", convention="start").mean().reset_index()
monthly_df["date"] = monthly_df["date"].apply(lambda x: x.strftime("%Y-%m"))
monthly_df.set_index("date", inplace=True)

In [7]:
monthly_df.head()

Unnamed: 0_level_0,code,nox,no2,latitude,longitude
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1997-03,A30,247.941875,60.56043,51.373553,-0.29197
1997-04,A30,263.677406,71.724523,51.373553,-0.29197
1997-05,A30,251.253255,69.521366,51.373553,-0.29197
1997-06,A30,240.40677,61.292412,51.373553,-0.29197
1997-07,A30,257.308118,54.153329,51.373553,-0.29197


In [8]:
yearly_df = df.groupby(by=["code"]).resample("Y").mean().reset_index()
yearly_df["date"] = yearly_df["date"].apply(lambda x: x.strftime("%Y"))
yearly_df.set_index("date", inplace=True)

In [9]:
yearly_df.head()

Unnamed: 0_level_0,code,nox,no2,latitude,longitude
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1997,A30,314.269389,65.982131,51.373553,-0.29197
1998,A30,291.840156,56.801657,51.373553,-0.29197
1999,A30,256.120856,58.470782,51.373553,-0.29197
2000,A30,212.220329,54.903419,51.373553,-0.29197
2001,A30,184.025497,53.48605,51.373553,-0.29197


In [4]:
import gpflow
from gpflow.utilities import print_summary
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler  
import numpy as np

In [120]:
gp_df = df.groupby(by=["code"]).resample("M", convention="start").mean().reset_index()
#monthly_df["date"] = monthly_df["date"].apply(lambda x: x.strftime("%Y-%m"))
#monthly_df.set_index("date", inplace=True)

In [121]:
gp_df = gp_df.loc[gp_df["date"] >= "2020"]

In [122]:
n_months = 12
start_year = gp_df["date"].min().year
print(n_months, start_year)

12 2020


In [123]:
gp_df['t'] = gp_df.apply(lambda row: (row.date.year-start_year)*n_months + (row.date.month%n_months), axis=1)
gp_df

Unnamed: 0,code,date,nox,no2,latitude,longitude,t
443,BG1,2020-01-31,45.319394,23.566781,51.563752,0.177891,1
444,BG1,2020-02-29,26.180653,16.929943,51.563752,0.177891,2
445,BG1,2020-03-31,25.280873,18.318843,51.563752,0.177891,3
446,BG1,2020-04-30,18.320308,15.154924,51.563752,0.177891,4
447,BG1,2020-05-31,14.108337,12.496027,51.563752,0.177891,5
...,...,...,...,...,...,...,...
26252,WMD,2021-06-30,40.656934,27.068800,51.492248,-0.147115,18
26253,WMD,2021-07-31,38.901821,26.037352,51.492248,-0.147115,19
26254,WMD,2021-08-31,39.403907,24.043739,51.492248,-0.147115,20
26255,WMD,2021-09-30,50.550462,32.154287,51.492248,-0.147115,21


In [124]:
feature_scaler = StandardScaler()
X = gp_df[["latitude", "longitude", "t"]].values
#X[:, 0:2] = feature_scaler.fit_transform(X[:, 0:2] + np.random.normal(0, 1, size=X[:, 0:2].shape))
X[:, 0:2] = X[:, 0:2] + np.random.normal(0, 1, size=X[:, 0:2].shape)*.01
X[:, 0:2] = feature_scaler.fit_transform(X[:, 0:2])
Y = gp_df[["no2"]].values
Y = feature_scaler.fit_transform(Y)

In [125]:
N, M = X.shape
print(X, Y)

[[ 9.46854817e-01  2.00111765e+00  1.00000000e+00]
 [ 9.39099967e-01  2.07251347e+00  2.00000000e+00]
 [ 1.15442241e+00  2.13639560e+00  3.00000000e+00]
 ...
 [-1.36460250e-01 -4.26506984e-01  2.00000000e+01]
 [ 5.53557063e-02 -3.62226983e-01  2.10000000e+01]
 [ 2.18162079e-02 -5.21876371e-01  2.20000000e+01]] [[-0.47281583]
 [-1.06155202]
 [-0.9383464 ]
 ...
 [-0.43050621]
 [ 0.28895884]
 [ 0.03870942]]


In [126]:
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.25 )
print(X_train.shape, Y_train.shape, X_val.shape, Y_val.shape)

(1582, 3) (1582, 1) (528, 3) (528, 1)


In [127]:
#k = gpflow.kernels.Matern52(3, lengthscales=[1.0, 1.0, 1.0])
#k = gpflow.kernels.White() + gpflow.kernels.Periodic(gpflow.kernels.IsotropicStationary(), period=12)
#k = gpflow.kernels.Matern52(active_dims=[2]) + gpflow.kernels.Matern52(active_dims=[0, 1])
#k = gpflow.kernels.White(1) +\
#            gpflow.kernels.RBF(2, active_dims=[0,1], lengthscales=1.0) +\
#            gpflow.kernels.RBF(1, active_dims=[2], lengthscales=1.0) +\
#            gpflow.kernels.Periodic(gpflow.kernels.RBF(1, active_dims=[2], lengthscales=1.0), period=12.0)
#k = gpflow.kernels.White()
k = gpflow.kernels.White() + gpflow.kernels.Matern52(active_dims=[0]) + gpflow.kernels.Matern52(active_dims=[1]) \
+ gpflow.kernels.Matern52(active_dims=[2])

In [128]:
print_summary(k)

╒═════════════════════════════╤═══════════╤═════════════╤═════════╤═════════════╤═════════╤═════════╤═════════╕
│ name                        │ class     │ transform   │ prior   │ trainable   │ shape   │ dtype   │   value │
╞═════════════════════════════╪═══════════╪═════════════╪═════════╪═════════════╪═════════╪═════════╪═════════╡
│ Sum.kernels[0].variance     │ Parameter │ Softplus    │         │ True        │ ()      │ float64 │       1 │
├─────────────────────────────┼───────────┼─────────────┼─────────┼─────────────┼─────────┼─────────┼─────────┤
│ Sum.kernels[1].variance     │ Parameter │ Softplus    │         │ True        │ ()      │ float64 │       1 │
├─────────────────────────────┼───────────┼─────────────┼─────────┼─────────────┼─────────┼─────────┼─────────┤
│ Sum.kernels[1].lengthscales │ Parameter │ Softplus    │         │ True        │ ()      │ float64 │       1 │
├─────────────────────────────┼───────────┼─────────────┼─────────┼─────────────┼─────────┼─────────┼───

In [129]:
meanf = gpflow.mean_functions.Linear()

In [130]:
m = gpflow.models.GPR((X_train, Y_train), kernel=k, mean_function=None)#gpflow.mean_functions.Zero())

In [131]:
m.likelihood.variance.assign(0.01)
#m.kernel.lengthscales.assign(np.array([0.3, 0.3, 0.3]))

<tf.Variable 'UnreadVariable' shape=() dtype=float64, numpy=-4.600266525158521>

In [132]:
opt = gpflow.optimizers.Scipy()

In [133]:
opt_logs = opt.minimize(m.training_loss, m.trainable_variables, options=dict(maxiter=100))
#opt_logs = opt.minimize(m)
print_summary(m)

InvalidArgumentError:  Input matrix is not invertible.
	 [[node triangular_solve/MatrixTriangularSolve
 (defined at /Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/gpflow/logdensities.py:97)
]] [Op:__inference__tf_eval_12294]

Errors may have originated from an input operation.
Input Source operations connected to node triangular_solve/MatrixTriangularSolve:
In[0] Cholesky (defined at /Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/gpflow/models/gpr.py:87)	
In[1] sub (defined at /Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/gpflow/logdensities.py:96)

Operation defined at: (most recent call last)
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/runpy.py", line 193, in _run_module_as_main
>>>     "__main__", mod_spec)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/runpy.py", line 85, in _run_code
>>>     exec(code, run_globals)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
>>>     app.launch_new_instance()
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/traitlets/config/application.py", line 846, in launch_instance
>>>     app.start()
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 677, in start
>>>     self.io_loop.start()
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 199, in start
>>>     self.asyncio_loop.run_forever()
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
>>>     self._run_once()
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
>>>     handle._run()
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/asyncio/events.py", line 88, in _run
>>>     self._context.run(self._callback, *self._args)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 457, in dispatch_queue
>>>     await self.process_one()
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 446, in process_one
>>>     await dispatch(*args)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 353, in dispatch_shell
>>>     await result
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 648, in execute_request
>>>     reply_content = await reply_content
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/ipykernel/ipkernel.py", line 353, in do_execute
>>>     res = shell.run_cell(code, store_history=store_history, silent=silent)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
>>>     return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2902, in run_cell
>>>     raw_cell, store_history, silent, shell_futures)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2947, in _run_cell
>>>     return runner(coro)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
>>>     coro.send(None)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3173, in run_cell_async
>>>     interactivity=interactivity, compiler=compiler, result=result)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3364, in run_ast_nodes
>>>     if (await self.run_code(code, result,  async_=asy)):
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3444, in run_code
>>>     exec(code_obj, self.user_global_ns, self.user_ns)
>>> 
>>>   File "/var/folders/6c/3n2fd4rj4l3gj84562cjz0kr0000gn/T/ipykernel_16524/1513312791.py", line 1, in <module>
>>>     opt_logs = opt.minimize(m.training_loss, m.trainable_variables, options=dict(maxiter=100))
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/gpflow/optimizers/scipy.py", line 91, in minimize
>>>     func, initial_params, jac=True, method=method, **scipy_kwargs
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/scipy/optimize/_minimize.py", line 624, in minimize
>>>     callback=callback, **options)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/scipy/optimize/lbfgsb.py", line 308, in _minimize_lbfgsb
>>>     finite_diff_rel_step=finite_diff_rel_step)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/scipy/optimize/optimize.py", line 262, in _prepare_scalar_function
>>>     finite_diff_rel_step, bounds, epsilon=epsilon)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/scipy/optimize/_differentiable_functions.py", line 140, in __init__
>>>     self._update_fun()
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/scipy/optimize/_differentiable_functions.py", line 233, in _update_fun
>>>     self._update_fun_impl()
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/scipy/optimize/_differentiable_functions.py", line 137, in update_fun
>>>     self.f = fun_wrapped(self.x)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/scipy/optimize/_differentiable_functions.py", line 134, in fun_wrapped
>>>     return fun(np.copy(x), *args)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/scipy/optimize/optimize.py", line 74, in __call__
>>>     self._compute_if_needed(x, *args)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/scipy/optimize/optimize.py", line 68, in _compute_if_needed
>>>     fg = self.fun(x, *args)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/gpflow/optimizers/scipy.py", line 113, in _eval
>>>     loss, grad = _tf_eval(tf.convert_to_tensor(x))
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/gpflow/optimizers/scipy.py", line 106, in _tf_eval
>>>     loss, grads = _compute_loss_and_gradients(closure, variables)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/gpflow/optimizers/scipy.py", line 172, in _compute_loss_and_gradients
>>>     loss = loss_closure()
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/gpflow/models/training_mixins.py", line 63, in training_loss
>>>     return self._training_loss()
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/gpflow/models/model.py", line 56, in _training_loss
>>>     return -(self.maximum_log_likelihood_objective(*args, **kwargs) + self.log_prior_density())
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/gpflow/models/gpr.py", line 67, in maximum_log_likelihood_objective
>>>     return self.log_marginal_likelihood()
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/gpflow/models/gpr.py", line 91, in log_marginal_likelihood
>>>     log_prob = multivariate_normal(Y, m, L)
>>> 
>>>   File "/Users/alexherrera/opt/anaconda3/envs/london-aq/lib/python3.7/site-packages/gpflow/logdensities.py", line 97, in multivariate_normal
>>>     alpha = tf.linalg.triangular_solve(L, d, lower=True)
>>> 