In [25]:
import numpy as np
import pickle

In [26]:
class LinUCB:
    def __init__(self, num_arms, num_features, alpha=1.0):
        self.num_arms = num_arms
        self.num_features = num_features
        self.alpha = alpha

        self.A = {a: np.identity(num_features) for a in range(num_arms)}
        self.A_inv = {a: np.identity(num_features) for a in range(num_arms)}
        self.b = {a: np.zeros((num_features, 1)) for a in range(num_arms)}

    def select_top_n(self, context_matrix, n=10):
        """Selects top-N arms based on UCB scores."""
        ucb_scores = []
        for a in range(self.num_arms):
            context = context_matrix[a].reshape(-1, 1)
            theta = np.dot(self.A_inv[a], self.b[a])
            mean_reward = np.dot(theta.T, context)
            uncertainty = self.alpha * np.sqrt(np.dot(context.T, np.dot(self.A_inv[a], context)))
            ucb_value = mean_reward + uncertainty
            
            ucb_scores.append((a, ucb_value.item()))

        # Sort arms by UCB score and return top-N
        ucb_scores.sort(key=lambda x: x[1], reverse=True)
        return [arm[0] for arm in ucb_scores[:n]]
    
    def update(self, action, context, reward):
        """Updates the model after receiving feedback."""
        context = context.reshape(-1, 1)
        self.A[action] += np.dot(context, context.T)
        self.b[action] += reward * context
        
        # Efficient A_inv update using the Sherman-Morrison formula
        A_inv = self.A_inv[action]
        v = np.dot(A_inv, context)
        self.A_inv[action] = A_inv - (np.dot(v, v.T) / (1 + np.dot(context.T, v)))


In [27]:
num_arms = 100
num_features = 10

In [28]:
# Model Instance
linucb_model = LinUCB(num_arms = num_arms, num_features = num_features)

# Saving Model for Training in GCP
model_filename = 'LinUCB_Model_Final.pkl'
with open(model_filename, "wb") as f:
    pickle.dump(linucb_model, f)
print(f"Model saved as {model_filename}")


Model saved as LinUCB_Model_Final.pkl
