New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
online covariance - example #43
Comments
Although not @brookehus, perhaps I can be of help as well. Sorry for the lack of documentation at this point, it will follow shortly. Once you have set up the package by from sktime.covariance.online_covariance import OnlineCovariance
cov_estimator = OnlineCovariance(lagtime=10, compute_c00=True, compute_c0t=True,
compute_ctt=True, remove_data_mean=False, reversible=False, bessels_correction=True)
# data is numpy array of (T, N) where T is number timesteps and N the dimension
cov_estimator.fit(data)
# or do it in online fashion with (B, N), B < T fragments
for chunk in data:
cov_estimator.partial_fit(chunk)
# fetch results
model = cov_estimator.fetch_model()
# model has estimated properties
print(model.cov_00, model.cov_0t, model.cov_tt, model.mean_0, model.mean_t,
model.bessels_correction) |
Hi @clonker !! Thank you for your help! I have tried the following - import yfinance as yf
data = yf.download("SPY GOOGL", start="2014-01-01", end="2019-04-30")
data
return_target=data['Close'].pct_change().dropna()
rt=return_target.to_numpy()
from sktime.covariance.online_covariance import OnlineCovariance
cov_estimator = OnlineCovariance(lagtime=10, compute_c00=True, compute_c0t=True,
compute_ctt=True, remove_data_mean=False, reversible=False, bessels_correction=True)
# data is numpy array of (T, N) where T is number timesteps and N the dimension
r=cov_estimator.fit(rt)
for chunk in rt:
cov_estimator.partial_fit(chunk)
# fetch results
model = cov_estimator.fetch_model()
# model has estimated properties
print(model.cov_00, model.cov_0t, model.cov_tt, model.mean_0, model.mean_t,
model.bessels_correction) |
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-5-f9f9a240ad2e> in <module>
1 for chunk in rt:
----> 2 cov_estimator.partial_fit(chunk)
3 # fetch results
4 model = cov_estimator.fetch_model()
5 # model has estimated properties
~/scikit-time/sktime/base.py in __call__(self, *args, **kwargs)
191 # here we invoke the immutable setting context manager.
192 with self:
--> 193 return self.fit_method(*args, **kwargs)
194
195
~/scikit-time/sktime/covariance/online_covariance.py in partial_fit(self, data, weights, column_selection)
196 # TODO: types, shapes checking!
197 try:
--> 198 self._rc.add(x, y, column_selection=column_selection, weights=weights)
199 except MemoryError:
200 raise MemoryError('Covariance matrix does not fit into memory. '
~/scikit-time/sktime/covariance/util/running_moments.py in add(self, X, Y, weights, column_selection)
291 w, s, C = moments_block(X, Y, remove_mean=self.remove_mean,
292 sparse_mode=self.sparse_mode, modify_data=self.modify_data,
--> 293 column_selection=column_selection, diag_only=self.diag_only)
294 # make copy in order to get independently mergeable moments
295 if column_selection is not None:
~/scikit-time/sktime/covariance/util/moments.py in moments_block(X, Y, remove_mean, modify_data, sparse_mode, sparse_tol, column_selection, diag_only)
904 sparse_mode = 'dense'
905 # sparsify
--> 906 X0, mask_X, xconst = _sparsify(X, sparse_mode=sparse_mode, sparse_tol=sparse_tol)
907 Y0, mask_Y, yconst = _sparsify(Y, sparse_mode=sparse_mode, sparse_tol=sparse_tol)
908 is_sparse = mask_X is not None and mask_Y is not None
~/scikit-time/sktime/covariance/util/moments.py in _sparsify(X, remove_mean, modify_data, sparse_mode, sparse_tol)
137 # This is a rough heuristic to choose a minimum column number for which sparsity may pay off.
138 # This heuristic is good for large number of samples, i.e. it may be inadequate for small matrices X.
--> 139 if X.shape[1] < 250:
140 min_const_col_number = X.shape[1] - 0.25 * X.shape[1]
141 elif X.shape[1] < 1000:
IndexError: tuple index out of range |
I get the above. |
Also how to I print out the covariance matrix. |
I think we still assume, that the shape of the input arrays is at least two dimensional (or exactly 2d?!). You should try reshaping with np.atleast_2d(chunk) |
you are fitting your data twice, you can remove the
bit, this is also what fails because it expects chunks and not single data frames. the print statement at the very end of your script also prints the estimated values. |
Amazing thank you! import yfinance as yf
data = yf.download("SPY GOOGL", start="2014-01-01", end="2019-04-30")
data
return_target=data['Close'].pct_change().dropna()
rt=return_target.to_numpy()
from sktime.covariance.online_covariance import OnlineCovariance
cov_estimator = OnlineCovariance(lagtime=10, compute_c00=True, compute_c0t=True,
compute_ctt=True, remove_data_mean=False, reversible=False, bessels_correction=True)
# data is numpy array of (T, N) where T is number timesteps and N the dimension
r=cov_estimator.fit(rt)
# fetch results
model = cov_estimator.fetch_model()
# model has estimated properties
print(model.cov_00, model.cov_0t, model.cov_tt, model.mean_0, model.mean_t,
model.bessels_correction)
model.cov_tt
[*********************100%***********************] 2 of 2 completed
[[0. 0.]
[0. 0.]] [[ 0. 0.]
[-0. -0.]] [[0. 0.]
[0. 0.]] [0.001 0. ] [0.001 0. ] True
array([[0., 0.],
[0., 0.]]) |
hi @clonker !! hope your well! import yfinance as yf
data = yf.download("SPY GOOGL", start="2014-01-01", end="2019-04-30")
data
return_target=data['Close'].pct_change().dropna()
rt=return_target.to_numpy()
from sktime.covariance.online_covariance import OnlineCovariance
cov_estimator = OnlineCovariance(lagtime=10, compute_c00=True, compute_c0t=True,
compute_ctt=True, remove_data_mean=False, reversible=False, bessels_correction=True)
# data is numpy array of (T, N) where T is number timesteps and N the dimension
r=cov_estimator.fit(rt)
# fetch results
model = cov_estimator.fetch_model()
# model has estimated properties
print(model.cov_00, model.cov_0t, model.cov_tt, model.mean_0, model.mean_t,
model.bessels_correction)
rr=r.fetch_model()
rr.cov_tt
import yfinance as yf
data = yf.download("SPY GOOGL", start="2014-01-01", end="2019-04-30")
data
return_target=data['Close'].pct_change().dropna()
rt=return_target.to_numpy()
from sktime.covariance.online_covariance import OnlineCovariance
cov_estimator = OnlineCovariance(lagtime=10, compute_c00=True, compute_c0t=True,
compute_ctt=True, remove_data_mean=False, reversible=False, bessels_correction=True)
# data is numpy array of (T, N) where T is number timesteps and N the dimension
r=cov_estimator.fit(rt)
# fetch results
model = cov_estimator.fetch_model()
# model has estimated properties
print(model.cov_00, model.cov_0t, model.cov_tt, model.mean_0, model.mean_t,
model.bessels_correction)
rr=r.fetch_model()
rr.cov_tt
[*********************100%***********************] 2 of 2 completed
[[2.15058341e-04 8.28774027e-05]
[8.28774027e-05 6.98308863e-05]] [[ 6.20407004e-06 2.29663026e-06]
[-2.43678216e-06 -8.93559386e-07]] [[2.14642598e-04 8.26568283e-05]
[8.26568283e-05 6.95681211e-05]] [0.00069288 0.00037487] [0.00071807 0.00038442] True
array([[2.14642598e-04, 8.26568283e-05],
[8.26568283e-05, 6.95681211e-05]] |
Dear Andrew, I am glad you got it to work! Since it is the covariance matrix, you can expect it to be a 2x2 matrix for (N,2)-dimensional data (this is what you meant with two values, right?). The diagonal elements are variances (ie covariances with itself). You can also look at |
Hey @clonker !! Thank you so much for your help! so is rr.co_tt the full covariance matrix? |
Hey, yes
|
Thank you @clonker amazing stuff!! |
Hi @andrewczgithub, you are welcome and likewise! |
hi @brookehus !
This looks like a very exciting library!
Is there an example of the online covariance calulation.
Its a problem I am looking at, at the moment.
Kind regards,
Andrew
The text was updated successfully, but these errors were encountered: