In [2]:
import torch
import gpytorch
from torch import nn
import pandas as pd
import numpy as np

class IndicatorEncoder(nn.Module):
    def __init__(self, indicator_dim, context_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(indicator_dim, 128),
            nn.ReLU(),
            nn.Linear(128, context_dim)
        )
    
    def forward(self, indicators):
        return self.net(indicators)

class ContextualKernel(gpytorch.kernels.Kernel):
    def __init__(self, base_kernel, context_dim, param_dim):
        super().__init__()
        self.base_kernel = base_kernel
        self.context_dim = context_dim
        self.param_dim = param_dim
        self.context_scaling = nn.Linear(context_dim, 1)
    
    def forward(self, x1, x2, diag=False, **params):
        # x1 and x2 are (parameters, context embeddings)
        p1, c1 = x1[..., :self.param_dim], x1[..., self.param_dim:]
        p2, c2 = x2[..., :self.param_dim], x2[..., self.param_dim:]
        
        base_k = self.base_kernel(p1, p2, diag=diag, **params)
        
        
        if diag:
            # Compute context similarity just between matching points
            context_diff = c1 - c2  # [N, context_dim]
            context_similarity = torch.exp(-torch.norm(context_diff, dim=-1))  # [N]
            
            scaling_c1 = self.context_scaling(c1).sigmoid().squeeze(-1)  # [N]
            scaling_c2 = self.context_scaling(c2).sigmoid().squeeze(-1)  # [N]
            scaling = scaling_c1 * scaling_c2  # [N]
            
            return base_k * context_similarity * scaling
        else:
            # Full matrix case
            c1_exp = c1.unsqueeze(-2)  # [N, 1, context_dim]
            c2_exp = c2.unsqueeze(-3)  # [1, M, context_dim]
            context_diff = c1_exp - c2_exp  # [N, M, context_dim]
            context_similarity = torch.exp(-torch.norm(context_diff, dim=-1))  # [N, M]
            
            scaling_c1 = self.context_scaling(c1).sigmoid()  # [N, 1]
            scaling_c2 = self.context_scaling(c2).sigmoid()  # [M, 1]
            scaling = scaling_c1 * scaling_c2.transpose(-1, -2)  # [N, M]
            
            return base_k * context_similarity * scaling
        

class RewardVariationalGPModel(gpytorch.models.ApproximateGP):
    def __init__(self, inducing_points, context_dim):
        # Define variational distribution + strategy
        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(
            inducing_points.size(0)
        )
        variational_strategy = gpytorch.variational.VariationalStrategy(
            self, inducing_points, variational_distribution, learn_inducing_locations=True
        )
        super().__init__(variational_strategy)
        
        self.mean_module = gpytorch.means.ConstantMean()
        base_kernel = gpytorch.kernels.RBFKernel()
        self.covar_module = ContextualKernel(base_kernel, context_dim, param_dim)
    
    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x, x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


class NN_VariationalGP_Model(nn.Module):
    def __init__(self, indicator_dim, param_dim, context_dim, num_inducing=128):
        super().__init__()
        self.indicator_encoder = IndicatorEncoder(indicator_dim, context_dim)
        self.likelihood = gpytorch.likelihoods.GaussianLikelihood()
        self.num_inducing = num_inducing
        self.gp_model = None  # will create dynamically
    
    def initialize_gp(self, indicators, parameters):
        context_embeddings = self.indicator_encoder(indicators)
        train_x = torch.cat([parameters, context_embeddings], dim=-1)
        
        # Choose random inducing points from training data
        rand_idx = torch.randperm(train_x.size(0))[:self.num_inducing]
        inducing_points = train_x[rand_idx]
        
        self.gp_model = RewardVariationalGPModel(inducing_points, context_dim=context_embeddings.shape[-1])
    
    def forward(self, parameters, indicators):
        context_embeddings = self.indicator_encoder(indicators)
        test_x = torch.cat([parameters, context_embeddings], dim=-1)
        
        self.gp_model.eval()
        self.likelihood.eval()
        with torch.no_grad():
            pred = self.likelihood(self.gp_model(test_x))
        return pred


In [3]:
example_df = pd.DataFrame(columns=['indicators', 'samples'])

indicators = np.arange(0, 10)

for indicator in indicators:
    parameters = np.arange(-0.5, 0.5, 0.001)
    values = parameters + indicator

    samples = np.array([[param, param + indicator] for param in parameters])
    
    example_df = pd.concat([example_df, pd.DataFrame({'indicators': [[indicator, 10 - indicator]], 'samples': [samples]})], ignore_index=True)


In [24]:
example_df = pd.DataFrame(columns=['indicators', 'samples'])

# Make 10 different indicators
for indicator in range(10):
    for indicator2 in range(10):
        # For each indicator, generate 20 parameters between 0 and 1
        parameters = np.linspace(0, 1, 20)
        
        # Let's define the reward as:
        # reward = sin(2π * param) + 0.5 * indicator + noise
        rewards = np.sin(2 * np.pi * parameters) + 0.5 * indicator + 0.9 * indicator2 - 0.1 * indicator * indicator2 + np.random.normal(0, 0.1, size=parameters.shape)
        
        samples = np.stack([parameters, rewards], axis=-1)  # shape (20, 2)
        
        example_df = pd.concat([
            example_df,
            pd.DataFrame({'indicators': [[indicator, indicator2, indicator*0.5, indicator+indicator2, indicator-indicator2, indicator2*0.3]], 'samples': [samples]})
        ], ignore_index=True)

In [26]:
# Load your training data the same way
# train_indicators, train_parameters, train_rewards

indicator_dim = 6
param_dim = 1
context_dim = 2

# Assume you already have your dataframe
# df = pd.read_pickle("your_dataframe.pkl")  # or however you load
df = example_df.copy()

# Sample training points
train_indicators = []
train_parameters = []
train_rewards = []

for idx, row in df.iterrows():
    indicators = torch.tensor(row['indicators'], dtype=torch.float32)
    param_reward_array = row['samples']  # array of shape (N_samples, param_dim + 1)
    
    for sample in param_reward_array:
        param_vec = torch.tensor(sample[:-1], dtype=torch.float32)
        reward_val = torch.tensor(sample[-1], dtype=torch.float32)
        
        train_indicators.append(indicators)
        train_parameters.append(param_vec)
        train_rewards.append(reward_val)

train_indicators = torch.stack(train_indicators)
train_parameters = torch.stack(train_parameters)
train_rewards = torch.stack(train_rewards)

In [34]:
# Pretrain the IndicatorEncoder

class PretrainNet(nn.Module):
    def __init__(self, indicator_dim, param_dim, context_dim):
        super().__init__()
        self.encoder = IndicatorEncoder(indicator_dim, context_dim)
        self.head = nn.Sequential(
            nn.Linear(context_dim + param_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
    
    def forward(self, indicators, parameters):
        context = self.encoder(indicators)
        cat = torch.cat([parameters, context], dim=-1)
        return self.head(cat).squeeze(-1)  # output shape [batch_size]

# Create pretrain model
pretrain_model = PretrainNet(indicator_dim, param_dim, context_dim)

# Optimizer
pretrain_optimizer = torch.optim.Adam(pretrain_model.parameters(), lr=0.01)
loss_fn = nn.MSELoss()

# Put your data here (already built above)
# train_indicators: tensor of shape [N, indicator_dim]
# train_parameters: tensor of shape [N, param_dim]
# train_rewards: tensor of shape [N]

# Pretraining loop
pretrain_model.train()
for i in range(3000):  # 3000 steps should be enough
    pretrain_optimizer.zero_grad()
    preds = pretrain_model(train_indicators, train_parameters)
    loss = loss_fn(preds, train_rewards)
    loss.backward()
    pretrain_optimizer.step()
    
    if (i + 1) % 500 == 0:
        print(f"[Pretraining] Iter {i+1}: Loss = {loss.item():.4f}")

# After pretraining, extract the encoder
pretrained_encoder = pretrain_model.encoder


[Pretraining] Iter 500: Loss = 0.1613
[Pretraining] Iter 1000: Loss = 0.0184
[Pretraining] Iter 1500: Loss = 0.0137
[Pretraining] Iter 2000: Loss = 0.0146
[Pretraining] Iter 2500: Loss = 0.0128
[Pretraining] Iter 3000: Loss = 0.0133


In [36]:
# Load your training data the same way
# train_indicators, train_parameters, train_rewards

model = NN_VariationalGP_Model(indicator_dim, param_dim, context_dim)
model.indicator_encoder.load_state_dict(pretrained_encoder.state_dict())
model.initialize_gp(train_indicators, train_parameters)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
mll = gpytorch.mlls.VariationalELBO(model.likelihood, model.gp_model, num_data=train_indicators.size(0))

model.train()
for i in range(1000):
    optimizer.zero_grad()
    
    cat = torch.cat([train_parameters, model.indicator_encoder(train_indicators)], dim=-1)
    # print(cat)

    # output = model.likelihood(model.gp_model(cat))
    output = model.gp_model(cat)
    loss = -mll(output, train_rewards)
    loss.backward()
    optimizer.step()
    
    if (i + 1) % 100 == 0:
        print(f"Iter {i + 1}: Loss = {loss.item():.4f}")


Iter 100: Loss = 1.7279
Iter 200: Loss = 1.1483
Iter 300: Loss = 1.1268
Iter 400: Loss = 1.1175
Iter 500: Loss = 1.0980
Iter 600: Loss = 1.0165
Iter 700: Loss = 0.9465
Iter 800: Loss = 0.8939
Iter 900: Loss = 0.8170
Iter 1000: Loss = 1.0749


In [37]:
# Given new indicator vector and candidate parameters
# new_indicators = torch.tensor([[...]], dtype=torch.float32)
# new_parameters = torch.tensor([[...]], dtype=torch.float32)

new_indicators = train_indicators
new_parameters = train_parameters

# cat = torch.cat([train_parameters, model.indicator_encoder(train_indicators)], dim=-1)
# predictions = model(cat)
predictions = model(new_parameters, new_indicators)
pred_mean = predictions.mean
pred_var = predictions.variance


In [38]:
pred_mean, pred_var

(tensor([1.3486, 1.3590, 1.3427,  ..., 3.9174, 4.0133, 4.1056]),
 tensor([0.6445, 0.6404, 0.6387,  ..., 0.7252, 0.7308, 0.7408]))

In [11]:
for idx, row in df.iterrows():
    indicators = torch.tensor(row['indicators'], dtype=torch.float32)
    param_reward_array = row['samples']  # array of shape (N_samples, param_dim + 1)
    
    for sample in param_reward_array:
        param_vec = torch.tensor(sample[:-1], dtype=torch.float32)
        reward_val = torch.tensor(sample[-1], dtype=torch.float32)
        
        print(reward_val)

tensor(-0.5000)
tensor(-0.4990)
tensor(-0.4980)
tensor(-0.4970)
tensor(-0.4960)
tensor(-0.4950)
tensor(-0.4940)
tensor(-0.4930)
tensor(-0.4920)
tensor(-0.4910)
tensor(-0.4900)
tensor(-0.4890)
tensor(-0.4880)
tensor(-0.4870)
tensor(-0.4860)
tensor(-0.4850)
tensor(-0.4840)
tensor(-0.4830)
tensor(-0.4820)
tensor(-0.4810)
tensor(-0.4800)
tensor(-0.4790)
tensor(-0.4780)
tensor(-0.4770)
tensor(-0.4760)
tensor(-0.4750)
tensor(-0.4740)
tensor(-0.4730)
tensor(-0.4720)
tensor(-0.4710)
tensor(-0.4700)
tensor(-0.4690)
tensor(-0.4680)
tensor(-0.4670)
tensor(-0.4660)
tensor(-0.4650)
tensor(-0.4640)
tensor(-0.4630)
tensor(-0.4620)
tensor(-0.4610)
tensor(-0.4600)
tensor(-0.4590)
tensor(-0.4580)
tensor(-0.4570)
tensor(-0.4560)
tensor(-0.4550)
tensor(-0.4540)
tensor(-0.4530)
tensor(-0.4520)
tensor(-0.4510)
tensor(-0.4500)
tensor(-0.4490)
tensor(-0.4480)
tensor(-0.4470)
tensor(-0.4460)
tensor(-0.4450)
tensor(-0.4440)
tensor(-0.4430)
tensor(-0.4420)
tensor(-0.4410)
tensor(-0.4400)
tensor(-0.4390)
tensor(-