# CB bake-off

e-greedy algo + IWR

## Read data

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.datasets import load_svmlight_file
import re

In [4]:
data_root = "./data/multilabel"
nb_feat = {"yeast": 103, "scene": 294, "mediamill": 120, "rcv1": 47236, "tmc": 30438}

paths_train = {"yeast": f'{data_root}/yeast_train.svm', 
              "scene": f'{data_root}/scene_train', 
              "mediamill": f'{data_root}/train-exp1.svm',
              "rcv1": f'{data_root}/rcv1_topics_train.svm',
              "tmc": f'{data_root}/tmc2007_train.svm'}

paths_test = {"yeast": f'{data_root}/yeast_test.svm',
              "scene": f'{data_root}/scene_test', 
              "mediamill": f'{data_root}/test-exp1.svm',
              "rcv1": '',
              "tmc": f'{data_root}/tmc2007_test.svm'}

def read_multilabel_data(dataset, split="train"):
    if split == "train":
        svmfile = paths_train[dataset]
    elif split == "test":
        svmfile = paths_test[dataset]
    n_features = nb_feat[dataset]
    with open(svmfile, "rb") as f:
        feats, labels = load_svmlight_file(f, n_features=n_features, multilabel=True)
    mlb = MultiLabelBinarizer()
    labels = mlb.fit_transform(labels)
    feats = np.array(feats.todense())
    feats = np.ascontiguousarray(feats)
    return feats, labels

curr_dataset = "yeast"
X, y = read_multilabel_data(curr_dataset, split="train")

nactions = y.shape[1]

## Models

In [5]:
class EpsilonGreedy():
    """e-greedy algorithm.
    """
    def __init__(self, oracles, nactions, epsilon, random_state=42):
        # epsilon = 0: greedy approach 
        assert epsilon >= 0.0 and epsilon <= 1.0
        
        self.oracles = oracles
        self.nactions = nactions
        self.epsilon = epsilon
        
        self.probs = [self.epsilon/self.nactions] * nactions
        self.counts = [0] * self.nactions
        self.estimates = [1.0] * nactions
        
        self.random_state = random_state
        self.selected_action = None
        self.test = 1
        
        np.random.seed(self.random_state)
    
    def update_probs(self, tied_actions):
        prob = epsilon / nactions
        for i in range(self.nactions):
            self.probs[i] = prob
        for a in tied_actions:
            self.probs[a] += (1.0 - epsilon) / len(tied_actions)
    
    def get_probs(self):
        return self.probs
    
    def get_counts(self):
        return self.counts
    
    def select_action(self, rewards, only_first=True, step_size=0.01):
        """Online update
        # Can add decay
        """
        # exploration uniformly and randomly for a fraction of time equal to epsilon
        if np.random.random() < self.epsilon:
            # exploration
            action = np.random.randint(0, self.nactions)
            potential_actions = [action]
        else:
            # exploitation
            max_reward = max(rewards)
            if only_first:
                action = max(range(self.nactions), key=lambda x: rewards[x])
                potential_actions = [action]
            else:
                #break tie by randomly choosing an action
                potential_actions = [i for i, v in enumerate(rewards) if v == max_reward]
                action = np.random.choice(potential_actions)
        
        self.selected_action = action
        self.update_probs(potential_actions)
        self.counts[action] += 1
        return action

In [2]:
from copy import deepcopy
from typing import List
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.utils.validation import check_is_fitted

from abc import ABC, abstractmethod


class BaseOracle(ABC):
    def __init__(self):
        pass
    
#     @abstractmethod
#     def __predict__(self, oracle, X, y):
#         pass

#     def predict(self, X, y):
#         out = {i: self.__predict__(i, X, y) for i in self.nactions}
#         return pd.Series(out)


class IWR(BaseOracle):
    """Normalized + importance weight-aware linear regressor.
    """
    def __init__(self, nactions, min_r):
        self.min_r = min_r
        self.nactions = nactions
        self.scalers = {a: StandardScaler() for a in self.nactions}
        self.oracles = {a: SGDRegressor() for a in self.nactions}
    
    def fit(self, X, y, probs):
        for a in self.nactions:
            self.scalers[a] = self.scalers[a].partial_fit(X)
            _X = self.scalers[a].transform(X)
            self.oracles[a] = self.oracles[a].partial_fit(_X, y, 1./probs)

    def predict(self, X, y):
        pred = {}
        for a in self.nactions:
            try:
                check_is_fitted(oracles[a])
            except:
                pred[a] = self.min_r
            _X = self.scalers[a].transform(X)
            pred[a] = self.oracles[a].predict(_X)[0]
        return pred

In [3]:
import collections

epsilon_range = [0, 0.02, 0.05, 0.1]

for epsilon in epsilon_range:
    lactions = list(range(nactions))
    base = IWR(lactions, 1)
    model = EpsilonGreedy(deepcopy(base), nactions, epsilon)
    
    rewards, actions, probas = [[] for _ in range(3)]

    for start in range(X.shape[0]):
        X_curr = X[start:start+1, :]
        y_curr = y[start:start+1, :]

        # get oracle predictions
        oracle_preds = base.predict(X_curr, y_curr)

        # decide which action to take
        if epsilon != 0.0:
            action_to_take = model.select_action(oracle_preds)
        else:
            action_to_take = model.select_action(oracle_preds, only_first=False)

        rewards.append(y_curr[np.arange(y_curr.shape[0]), action_to_take].sum())

        actions = np.append(actions, action_to_take)
        
        probas = np.append(probas, model.probs[action_to_take])
        base.fit(X_curr, y_curr[np.arange(y_curr.shape[0]), action_to_take], model.probs[action_to_take])

NameError: name 'nactions' is not defined