In [2]:
import torch
import gpytorch
from data import PVWeatherLoader

In [3]:
# data parameters
DAY_INIT = 40
DAY_MIN = 8
DAY_MAX = 16
N_DAYS = 10
MINUTE_INTERVAL = 60
DAILY_DATA_POINTS = (DAY_MAX - DAY_MIN) * 60 / MINUTE_INTERVAL
N_SYSTEMS = 10
# create a tuple of 4 coordinates that form a polygon in the uk
# and a circle with a radius of r
CIRCLE_COORDS = (55, -1.5)
RADIUS = 0.3
POLY_COORDS = ((50, -6), (50.5, 1.9), (57.6, -5.5), (58, 1.9))

In [4]:
loader = PVWeatherLoader(
    # number of days to get data for
    n_days=N_DAYS,
    # initial day of the data for that season
    # look at the data frame to see which day it is
    day_init=DAY_INIT,
    # number of systems to extract
    n_systems=N_SYSTEMS,
    coords=CIRCLE_COORDS,
    radius=RADIUS,
    # the minute interval our data is sampled at 
    # (e.g. 60 for hourly, 30 for half hourly, 15 for 15 minutes, etc.)
    minute_interval=MINUTE_INTERVAL,
    # the minimum and maximum hour of the day to use
    # (e.g. 8 and 15 for 8am to 3pm)
    day_min=DAY_MIN,
    day_max=DAY_MAX,
    folder_name='pv_data',
    file_name='pv_and_weather.csv',
    distance_method='circle',
    # optionally use a season
    season='winter',
    # optionally drop series with nan values
    drop_nan=False
)

==> Loading data
==> Loaded data in: 0 m : 1 sec



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('season', axis=1, inplace=True)


In [5]:
import torch
import gpytorch
import numpy as np
from torch.nn import Parameter
from gpytorch.distributions import base_distributions
from gpytorch.constraints import Positive, Interval
from gpytorch.priors import Prior
from typing import Optional

class BetaLikelihood_MeanParametrization(gpytorch.likelihoods.BetaLikelihood):
    
    def __init__(self, 
                 scale : Optional[torch.Tensor] = 30,
                 correcting_scale  : Optional[float] = 1,
                 correcting_scale_lower_bound : Optional[float] = 0.1,
                 correcting_scale_upper_bound : Optional[float] = 0.9,
                 *args, **kwargs):
        
        super().__init__(*args, **kwargs)
        
        assert scale > 0, 'scale must be positive'
        assert correcting_scale > 0, 'scale must be positive'
        assert 0 <= correcting_scale_lower_bound <= 1, 'lower bound must be in [0, 1]'
        assert 0 <= correcting_scale_upper_bound <= 1, 'upper bound must be in [0, 1]'
        assert correcting_scale_lower_bound < correcting_scale_upper_bound, 'lower bound must be smaller than upper bound'
        
        self.scale = scale        
        self.correcting_scale = Parameter(torch.tensor(correcting_scale, dtype=torch.float64), 
                                            requires_grad=False)

    
    def forward(self, function_samples, *args, **kwargs):
        
        mixture = torch.distributions.Normal(0, 1).cdf(function_samples)
        print(mixture.shape)
        print(self.scale.shape)
        
        alpha = mixture * self.scale 
        beta = self.scale - alpha 

        print(mixture.shape)
        print(alpha.shape)
        print(beta.shape)
        
        self.alpha = torch.clamp(alpha, 1e-10, 1e10)
        self.beta = torch.clamp(beta, 1e-10, 1e10)

        return base_distributions.Beta(concentration1=self.alpha, concentration0=self.beta)
    
    def mode(self):
        """ 
        Calculate the mode of a beta distribution given the alpha and beta parameters

        Args:
            alpha (torch.Tensor): alpha parameter
            beta (torch.Tensor): beta parameter
        
        Returns:
            result (torch.Tensor): modes of the beta distribution drawn from MC samples
        """
        # detach alpha and beta from the graph
        result = np.zeros_like(self.alpha)  # Initialize an array of zeros with the same shape as alpha

        mask_alpha_gt_1 = self.alpha > 1
        mask_beta_gt_1 = self.beta > 1
        mask_alpha_eq_beta = self.alpha == self.beta
        mask_alpha_le_1 = self.alpha <= 1
        mask_beta_le_1 = self.beta <= 1

        result[mask_alpha_gt_1 & mask_beta_gt_1] = (self.alpha[mask_alpha_gt_1 & mask_beta_gt_1] - 1) / (self.alpha[mask_alpha_gt_1 & mask_beta_gt_1] + self.beta[mask_alpha_gt_1 & mask_beta_gt_1] - 2)
        result[mask_alpha_eq_beta] = 0.5
        result[mask_alpha_le_1 & mask_beta_gt_1] = 0
        result[mask_alpha_gt_1 & mask_beta_le_1] = 1

        return result

class MultitaskBetaLikelihood(BetaLikelihood_MeanParametrization):
    """ 
    A multitask BetaLikelihood that supports multitask GP regression.
    """
    def __init__(
        self,
        num_tasks: int,
        scale = 15,
        correcting_scale = 1,
        batch_shape: torch.Size = torch.Size([]),
        scale_prior: Optional[Prior] = None,
        scale_constraint: Optional[Interval] = None,
    ) -> None:
        super().__init__(scale=scale, correcting_scale=correcting_scale)

        if scale_constraint is None:
            scale_constraint = Positive()

        self.raw_scale = torch.nn.Parameter(torch.ones(*batch_shape, 1, num_tasks))
        if scale_prior is not None:
            self.register_prior("scale_prior", scale_prior, lambda m: m.scale, lambda m, v: m._set_scale(v))

        self.register_constraint("raw_scale", scale_constraint)

    def expected_log_prob(self, observations, function_dist, *args, **kwargs):
        ret = super().expected_log_prob(observations, function_dist, *args, **kwargs)
        
        num_event_dim = len(function_dist.event_shape)
        
        if num_event_dim > 1:  # Do appropriate summation for multitask likelihood
            ret = ret.sum(list(range(-1, -num_event_dim, -1)))
        return ret

In [6]:
from gpytorch.variational import LMCVariationalStrategy

In [7]:
num_tasks = 8
from gpytorch.kernels import IndexKernel

class IndependentMultitaskGPModel(gpytorch.models.ApproximateGP):
    def __init__(self, x_train, y_train, num_tasks):
        # Let's use a different set of inducing points for each task
        y_train = y_train

        # We have to mark the CholeskyVariationalDistribution as batch
        # so that we learn a variational distribution for each task
        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(
            x_train.size(-2), batch_shape=torch.Size([num_tasks])
        )

        variational_strategy = gpytorch.variational.IndependentMultitaskVariationalStrategy(
            gpytorch.variational.VariationalStrategy(
                self, x_train, variational_distribution, learn_inducing_locations=True
            ),
            num_tasks=num_tasks,
        )

        super().__init__(variational_strategy)

        # The mean and covariance modules should be marked as batch
        # so we learn a different set of hyperparameters
        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([num_tasks]))
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_tasks])),
            batch_shape=torch.Size([num_tasks])
        )

    def forward(self, x):
        # The forward function should be written as if we were dealing with each output
        # dimension in batch
        mean_x = self.mean_module(x)
        
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)



In [35]:
from gpytorch.constraints import Positive
from kernels import get_mean_covar
from gpytorch.means import ZeroMean

# input for hadamard model
dict_input = {'input' : [],
              'output' : [],
              'task_indices' : []}

# containts the data for each task at same time intervals
for i, (X, y) in enumerate(loader):
    n = X.shape[0]
    dict_input['input'].append(X)
    task_index = torch.ones(n, dtype=torch.long) * i
    dict_input['task_indices'].append(task_index)
    dict_input['output'].append(y)

task_indices = torch.cat(dict_input['task_indices'])
x = torch.cat(dict_input['input'], dim=0)
y = torch.stack(dict_input['output'], dim=-1)

num_tasks = 8
mean, covar = get_mean_covar(num_latent=num_tasks)
likelihood = MultitaskBetaLikelihood(batch_shape=torch.Size([num_tasks]), num_tasks=y.size(-1))

# TODO should have task indices specified in forward method
# TODO should have index kernel for task covariance?
# TODO one dim or multi dim likelihood?
# TODO independent multitask variational strategy?
# TODO make class that satisfies above requirements

model = IndependentMultitaskGPModel(x, y, num_tasks=num_tasks)

In [36]:
x.shape

torch.Size([640, 5])

In [37]:
model.train()
likelihood.train()

mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=y.size(0))
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

for i in range(10):
    optimizer.zero_grad()
    output_dist = model(x, task_indices=task_indices)
    print(output_dist.shape)
    loss = -mll(output_dist, y)
    loss.backward()
    print('Iter %d/%d - Loss: %.3f' % (i + 1, 10, loss.item()))
    optimizer.step()



<bound method TorchDistributionMixin.shape of MultivariateNormal(loc: torch.Size([640]))>
torch.Size([20, 640])
torch.Size([8, 1, 8])


RuntimeError: The size of tensor a (640) must match the size of tensor b (8) at non-singleton dimension 2