## Código original

In [1]:
import sys
import os

parent_path = '../..'
sys.path.append(parent_path)

import src

In [2]:
# Code adapted from: https://github.com/fidelity/mabwiser/blob/master/mabwiser/linear.py
# The difference is that the original code accept different types of arms (strings, integers, etc) and the modified code only accept sequential integers as arms. With this modification, the code can be optimized.

from mabwiser.linear import _Linear
from mabwiser.utils import Num, _BaseRNG
from typing import List, Optional
import numpy as np
import time

class LinearArmEncoded(_Linear):

    def __init__(self, rng: _BaseRNG, num_arms: int, n_jobs: int, backend: Optional[str],
                 alpha: Num, epsilon: Num, l2_lambda: Num, regression: str, scale: bool):
        super().__init__(rng, np.arange(num_arms).tolist(), n_jobs, backend, alpha, epsilon, l2_lambda, regression, scale)
        self.num_arms = num_arms

    def _vectorized_predict_context(self, contexts: np.ndarray, is_predict: bool) -> List:

        arms = np.arange(self.num_arms)

        # Initializing array with expectations for each arm
        num_contexts = contexts.shape[0]
        arm_expectations = np.empty((num_contexts, self.num_arms), dtype=float)

        # With epsilon probability, assign random flag to context
        random_values = self.rng.rand(num_contexts)
        random_mask = np.array(random_values < self.epsilon)
        random_indices = random_mask.nonzero()[0]

        # For random indices, generate random expectations
        arm_expectations[random_indices] = self.rng.rand((random_indices.shape[0], self.num_arms))

        # For non-random indices, get expectations for each arm
        nonrandom_indices = np.where(~random_mask)[0]
        nonrandom_context = contexts[nonrandom_indices]
        start_time = time.time()
        arm_expectations[nonrandom_indices] = np.array([self.arm_to_model[arm].predict(nonrandom_context)
                                                        for arm in arms]).T
        print(f'Gerar as predições demorou {time.time() - start_time} segundos')

        return arm_expectations if len(arm_expectations) > 1 else arm_expectations[0]
    
    def fit(self, decisions: np.ndarray, rewards: np.ndarray, contexts: np.ndarray = None) -> None:

        start_time = time.time()
        # Initialize each model by arm
        self.num_features = contexts.shape[1]
        for arm in self.arms:
            self.arm_to_model[arm].init(num_features=self.num_features)
        print(f'init demorou {time.time() - start_time} segundos')

        start_time = time.time()
        # Reset warm started arms
        # self._reset_arm_to_status()
        print(f'reset_arm_to_status demorou {time.time() - start_time} segundos')

        start_time = time.time()
        # Perform parallel fit
        self._parallel_fit(decisions, rewards, contexts)
        print(f'parallel_fit demorou {time.time() - start_time} segundos')

        # Update trained arms
        start_time = time.time()
        # Removi o código abaixo pois parece que ele não é usado para o nosso caso...
        # Ele parece ser usado apenas no contexto de tentar fazer "warm" start
        # Basicamente, copiando os mesmos parâmetros de um arm já treinado para um novo (cold) por proximidade de features...

        # Otimizar essa função não parece ser algo tão trivial, já que teria que mudar a estrutura do arm_to_status, tendo que mudar vários outros códigos por causa disso
        # self._set_arms_as_trained(decisions=decisions, is_partial=False)
    
    def partial_fit(self, decisions: np.ndarray, rewards: np.ndarray, contexts: np.ndarray = None) -> None:
        # Perform parallel fit
        self._parallel_fit(decisions, rewards, contexts)

        # Update trained arms
        # self._set_arms_as_trained(decisions=decisions, is_partial=True)
    
    def _parallel_fit(self, decisions: np.ndarray, rewards: np.ndarray,
                      contexts: Optional[np.ndarray] = None):
        
        # Compute effective number of jobs
        #n_jobs = self._effective_jobs(len(self.arms), self.n_jobs)
        # Perform parallel fit
        #Parallel(n_jobs=n_jobs, require='sharedmem')(
        #                  delayed(self._fit_arm)(
        #                      arm, decisions, rewards, contexts)
        #                 for arm in self.arms)
        
        for arm in self.arms:
            self._fit_arm(arm, decisions, rewards, contexts)

In [3]:
# Code adapted from: https://github.com/fidelity/mabwiser/blob/master/mabwiser/mab.py
# The difference is that the original code accept different types of arms (strings, integers, etc) and the modified code only accept sequential integers as arms. With this modification, the code can be optimized.

from mabwiser.mab import MAB, LearningPolicyType, NeighborhoodPolicyType

from mabwiser.utils import Constants, check_true, create_rng
from mab2rec import LearningPolicy
from mab2rec import NeighborhoodPolicy
import numpy as np
from mabwiser.neighbors import _KNearest, _Radius
from mabwiser.treebandit import _TreeBandit
from mabwiser.clusters import _Clusters
from mabwiser.approximate import _LSHNearest

class MABArmEncoded(MAB):
    def __init__(self,
                 num_arms: int,  # The list of arms
                 learning_policy: LearningPolicyType,  # The learning policy
                 neighborhood_policy: NeighborhoodPolicyType = None,  # The context policy, optional
                 seed: int = Constants.default_seed,  # The random seed
                 n_jobs: int = 1,  # Number of parallel jobs
                 backend: str = None,  # Parallel backend implementation
                 ):
        """Initializes a multi-armed bandit (MAB) with the given arguments.

        Validates the arguments and raises exception in case there are violations.

        Parameters
        ----------
        arms : List[Union[int, float, str]]
            The list of all the arms available for decisions.
            Arms can be integers, strings, etc.
        learning_policy : LearningPolicyType
            The learning policy.
        neighborhood_policy : NeighborhoodPolicyType, optional
            The context policy. Default value is None.
        seed : numbers.Rational, optional
            The random seed to initialize the random number generator.
            Default value is set to Constants.default_seed.value
        n_jobs: int, optional
            This is used to specify how many concurrent processes/threads should be used for parallelized routines.
            Default value is set to 1.
            If set to -1, all CPUs are used.
            If set to -2, all CPUs but one are used, and so on.
        backend: str, optional
            Specify a parallelization backend implementation supported in the joblib library. Supported options are:
            - “loky” used by default, can induce some communication and memory overhead when exchanging input and
              output data with the worker Python processes.
            - “multiprocessing” previous process-based backend based on multiprocessing.Pool. Less robust than loky.
            - “threading” is a very low-overhead backend but it suffers from the Python Global Interpreter Lock if the
              called function relies a lot on Python objects.
            Default value is None. In this case the default backend selected by joblib will be used.

        Raises
        ------
        TypeError:  Arms were not provided in a list.
        TypeError:  Learning policy type mismatch.
        TypeError:  Context policy type mismatch.
        TypeError:  Seed is not an integer.
        TypeError:  Number of parallel jobs is not an integer.
        TypeError:  Parallel backend is not a string.
        TypeError:  For EpsilonGreedy, epsilon must be integer or float.
        TypeError:  For LinGreedy, epsilon must be an integer or float.
        TypeError:  For LinGreedy, l2_lambda must be an integer or float.
        TypeError:  For LinTS, alpha must be an integer or float.
        TypeError:  For LinTS, l2_lambda must be an integer or float.
        TypeError:  For LinUCB, alpha must be an integer or float.
        TypeError:  For LinUCB, l2_lambda must be an integer or float.
        TypeError:  For Softmax, tau must be an integer or float.
        TypeError:  For ThompsonSampling, binarizer must be a callable function.
        TypeError:  For UCB, alpha must be an integer or float.
        TypeError:  For LSHNearest, n_dimensions must be an integer or float.
        TypeError:  For LSHNearest, n_tables must be an integer or float.
        TypeError:  For LSHNearest, no_nhood_prob_of_arm must be None or List that sums up to 1.0.
        TypeError:  For Clusters, n_clusters must be an integer.
        TypeError:  For Clusters, is_minibatch must be a boolean.
        TypeError:  For Radius, radius must be an integer or float.
        TypeError:  For Radius, no_nhood_prob_of_arm must be None or List that sums up to 1.0.
        TypeError:  For KNearest, k must be an integer or float.

        ValueError: Invalid number of arms.
        ValueError: Invalid values (None, NaN, Inf) in arms.
        ValueError: Duplicate values in arms.
        ValueError: Number of parallel jobs is 0.
        ValueError: For EpsilonGreedy, epsilon must be between 0 and 1.
        ValueError: For LinGreedy, epsilon must be between 0 and 1.
        ValueError: For LinGreedy, l2_lambda cannot be negative.
        ValueError: For LinTS, alpha must be greater than zero.
        ValueError: For LinTS, l2_lambda must be greater than zero.
        ValueError: For LinUCB, alpha cannot be negative.
        ValueError: For LinUCB, l2_lambda cannot be negative.
        ValueError: For Softmax, tau must be greater than zero.
        ValueError: For UCB, alpha must be greater than zero.
        ValueError: For LSHNearest, n_dimensions must be gerater than zero.
        ValueError: For LSHNearest, n_tables must be gerater than zero.
        ValueError: For LSHNearest, if given, no_nhood_prob_of_arm list should sum up to 1.0.
        ValueError: For Clusters, n_clusters cannot be less than 2.
        ValueError: For Radius and KNearest, metric is not supported by scipy.spatial.distance.cdist.
        ValueError: For Radius, radius must be greater than zero.
        ValueError: For Radius, if given, no_nhood_prob_of_arm list should sum up to 1.0.
        ValueError: For KNearest, k must be greater than zero.
        """

        # Validate arguments
        # MAB._validate_mab_args(arms, learning_policy, neighborhood_policy, seed, n_jobs, backend)

        # Save the arguments
        self.arms = np.arange(num_arms)
        self.num_arms = num_arms
        self.seed = seed
        self.n_jobs = n_jobs
        self.backend = backend

        # Create the random number generator
        self._rng = create_rng(self.seed)
        self._is_initial_fit = False

        # Create the learning policy implementor
        lp = None
        if isinstance(learning_policy, LearningPolicy.LinGreedy):
            lp = LinearArmEncoded(self._rng, num_arms, self.n_jobs, self.backend, 0, learning_policy.epsilon,
                         learning_policy.l2_lambda, "ridge", learning_policy.scale)
        elif isinstance(learning_policy, LearningPolicy.LinTS):
            lp = LinearArmEncoded(self._rng, num_arms, self.n_jobs, self.backend, learning_policy.alpha, 0,
                         learning_policy.l2_lambda, "ts", learning_policy.scale)
        elif isinstance(learning_policy, LearningPolicy.LinUCB):
            lp = LinearArmEncoded(self._rng, num_arms, self.n_jobs, self.backend, learning_policy.alpha, 0,
                         learning_policy.l2_lambda, "ucb", learning_policy.scale)
        else:
            check_true(False, ValueError("Undefined learning policy " + str(learning_policy)))

        if neighborhood_policy:
            # Do not use parallel fit or predict for Learning Policy when contextual
            lp.n_jobs = 1

            if isinstance(neighborhood_policy, NeighborhoodPolicy.Clusters):
                self._imp = _Clusters(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                      neighborhood_policy.n_clusters, neighborhood_policy.is_minibatch)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.LSHNearest):
                self._imp = _LSHNearest(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                        neighborhood_policy.n_dimensions, neighborhood_policy.n_tables,
                                        neighborhood_policy.no_nhood_prob_of_arm)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.KNearest):
                self._imp = _KNearest(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                      neighborhood_policy.k, neighborhood_policy.metric)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.Radius):
                self._imp = _Radius(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                    neighborhood_policy.radius, neighborhood_policy.metric,
                                    neighborhood_policy.no_nhood_prob_of_arm)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.TreeBandit):
                self._imp = _TreeBandit(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                        neighborhood_policy.tree_parameters)
            else:
                check_true(False, ValueError("Undefined context policy " + str(neighborhood_policy)))
        else:
            self._imp = lp
        
        self.is_contextual = True

In [4]:
# Code adapted from: https://github.com/fidelity/mab2rec/blob/main/mab2rec/rec.py
# The difference is that the original code accept different types of arms (strings, integers, etc) and the modified code only accept sequential integers as arms. With this modification, the code can be optimized.

from typing import List, Tuple, Union
from mabwiser.utils import Arm, Num
from scipy.special import expit
from mab2rec import BanditRecommender
from mab2rec import LearningPolicy
from mabwiser.utils import Arm
import numpy as np
import time
import pandas as pd

filter_matrix_original = None

class BanditRecommenderArmEncoded(BanditRecommender):

    def __init__(self, learning_policy: Union[LearningPolicy.LinGreedy,
                                              LearningPolicy.LinTS,
                                              LearningPolicy.LinUCB],
                 neighborhood_policy: Union[None] = None,
                 top_k: int = 10,
                 seed: int = src.RANDOM_STATE,
                 n_jobs: int = 1,
                 backend: str = None):
        """Initializes bandit recommender with the given arguments.

        Validates the arguments and raises exception in case there are violations.

        Parameters
        ----------
        learning_policy : LearningPolicy
            The learning policy.
        neighborhood_policy : NeighborhoodPolicy, default=None
            The context policy.
        top_k : int, default=10
            The number of items to recommend.
        seed : numbers.Rational, default=Constants.default_seed
            The random seed to initialize the random number generator.
            Default value is set to Constants.default_seed.value
        top_k : int, default=10
            The number of items to recommend.
        n_jobs : int, default=1
            This is used to specify how many concurrent processes/threads should be used for parallelized routines.
            If set to -1, all CPUs are used.
            If set to -2, all CPUs but one are used, and so on.
        backend : str, default=None
            Specify a parallelization backend implementation supported in the joblib library. Supported options are:
            - “loky” used by default, can induce some communication and memory overhead when exchanging input and
              output data with the worker Python processes.
            - “multiprocessing” previous process-based backend based on multiprocessing.Pool. Less robust than loky.
            - “threading” is a very low-overhead backend but it suffers from the Python Global Interpreter Lock if the
              called function relies a lot on Python objects.
            Default value is None. In this case the default backend selected by joblib will be used.
        """
        super().__init__(learning_policy, neighborhood_policy, top_k, seed, n_jobs, backend)
    
    def _init(self, num_arms: int) -> None:
        """Initializes recommender with given list of arms.

        Parameters
        ----------
        arms : List[Union[Arm]]
            The list of all of the arms available for decisions.
            Arms can be integers, strings, etc.

        Returns
        -------
        Returns nothing
        """
        self.mab = MABArmEncoded(num_arms, self.learning_policy, self.neighborhood_policy, self.seed, self.n_jobs, self.backend)
    
    def fit(self, decisions: Union[List[Arm], np.ndarray, pd.Series],
            rewards: Union[List[Num], np.ndarray, pd.Series],
            contexts: Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame] = None) -> None:
        """Fits the recommender the given *decisions*, their corresponding *rewards* and *contexts*, if any.
        If the recommender arms has not been initialized using the `set_arms`, the recommender arms will be set
        to the list of arms in *decisions*.

        Validates arguments and raises exceptions in case there are violations.

        This function makes the following assumptions:
            - each decision corresponds to an arm of the bandit.
            - there are no ``None``, ``Nan``, or ``Infinity`` values in the contexts.

        Parameters
        ----------
         decisions : Union[List[Arm], np.ndarray, pd.Series]
            The decisions that are made.
         rewards : Union[List[Num], np.ndarray, pd.Series]
            The rewards that are received corresponding to the decisions.
         contexts : Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame], default=None
            The context under which each decision is made.

        Returns
        -------
        Returns nothing.
        """
        if self.mab is None:
            self._init(np.unique(decisions).shape[0])
        self.mab.fit(decisions, rewards, contexts)
    
    def recommend(self, contexts: Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame] = None,
                  excluded_arms: List[List[Arm]] = None, return_scores: bool = False, apply_sigmoid: bool = True) \
            -> Union[Union[List[Arm], Tuple[List[Arm], List[Num]],
                     Union[List[List[Arm]], Tuple[List[List[Arm]], List[List[Num]]]]]]:
        self._validate_mab(is_fit=True)
        self._validate_get_rec(contexts, excluded_arms)

        start_time = time.time()
        # Get predicted expectations
        num_contexts = len(contexts) if contexts is not None else 1
        if num_contexts == 1:
            expectations = np.array([self.mab.predict_expectations(contexts)])
        else:
            expectations = self.mab.predict_expectations(contexts)
        
        if not isinstance(expectations, np.ndarray):
            expectations = np.array(expectations)

        print(f'predict_expectations demorou {time.time() - start_time} segundos')

        if apply_sigmoid:
            expectations = expit(expectations)

        # Create an exclusion mask, where exclusion_mask[context_ind][arm_ind] denotes if the arm with the
        # index arm_ind was excluded for context with the index context_ind.
        # The value will be True if it is excluded and those arms will not be returned as part of the results.
        arm_to_index = {arm: arm_ind for arm_ind, arm in enumerate(self.mab.arms)}
        exclude_mask = np.zeros((num_contexts, len(self.mab.arms)), dtype=bool)
        if excluded_arms is not None:
            for context_ind, excluded in enumerate(excluded_arms):
                exclude_mask[context_ind][[arm_to_index[arm] for arm in excluded if arm in arm_to_index]] = True
        global filter_matrix_original
        filter_matrix_original = exclude_mask

        # Set excluded item scores to -1, so they automatically get placed lower in best results
        expectations[exclude_mask] = -1.

        start_time = time.time()
        # Get best `top_k` results by sorting the expectations
        arm_inds = np.argpartition(-expectations, self.top_k - 1, axis=1)[:, :self.top_k]
        arm_inds = arm_inds[np.arange(arm_inds.shape[0]).reshape(-1, 1), np.argsort(-expectations[np.arange(expectations.shape[0]).reshape(-1, 1), arm_inds], axis=1)]
        print(f'Ordenação top-K demorou {time.time() - start_time} segundos')
        

        start_time = time.time()
        # Get the list of top_k recommended items and corresponding expectations for each context
        recommendations = [[]] * num_contexts
        scores = [[]] * num_contexts
        for context_ind in range(num_contexts):
            recommendations[context_ind] = [self.mab.arms[arm_ind] for arm_ind in arm_inds[context_ind]]
            if len(recommendations[context_ind]) != self.top_k:
                print('Warning: the number of recommendations is less than the top_k value. ')
            if return_scores:
                scores[context_ind] = [expectations[context_ind, arm_ind] for arm_ind in arm_inds[context_ind]]
        print(f'gerar lista de recomendações demorou {time.time() - start_time} segundos')
        # Return recommendations and scores
        if return_scores:
            if num_contexts > 1:
                return recommendations, scores
            else:
                return recommendations[0], scores[0]
        else:
            if num_contexts > 1:
                return recommendations
            else:
                return recommendations[0]

In [5]:
from abc import ABC
import pandas as pd
import numpy as np
from typing import Union
from mab2rec import LearningPolicy

class Mab2RecRecommender(ABC):
    '''
    Classe base para os recomendadores da biblioteca [mab2rec](https://github.com/fidelity/mab2rec).

    Essa classe é responsável por encapsular a lógica de inicialização e treinamento dos algoritmos da biblioteca mab2rec.

    Essa classe não deve ser utilizada diretamente. Para utilizar um algoritmo da biblioteca mab2rec, utilize uma das classes filhas dessa classe.

    Para implementar um novo algoritmo da biblioteca mab2rec, basta na classe filha implementar o método __init__ chamando o super().__init__ e inicializar o atributo self.recommender com o algoritmo da biblioteca mab2rec (pelo menos, a ideia inicial é ser simples assim).
    '''

    def __init__(self, user_column: str=src.COLUMN_USER_ID, item_column: str=src.COLUMN_ITEM_ID, rating_column: str=src.COLUMN_RATING, hyperparameters: dict={}):
        '''
        Inicializa o recomendador.

        params:
            user_column: Nome da coluna que representa o usuário.
            item_column: Nome da coluna que representa o item.
            rating_column: Nome da coluna que representa a avaliação.
        '''
        self.user_column = user_column
        self.item_column = item_column
        self.rating_column = rating_column

        self.interactions_by_user: pd.DataFrame = None
        self.recommender: LearningPolicy = None

    def train(self, interactions_df: pd.DataFrame, contexts):
        '''
        Treina "do zero" o recomendador com base nas interações passadas. Utilizar apenas na primeira chamada de treinamento, caso deseje treinar incrementalmente.

        params:
            interactions_df: DataFrame contendo as interações usuário-item.
        '''
        self.interactions_by_user = self.__group_interactions_by_user(interactions_df)
        self.recommender.fit(
            decisions=interactions_df[self.item_column],
            rewards=interactions_df[self.rating_column],
            contexts=contexts
        )

    def partial_train(self, interactions_df: pd.DataFrame, contexts):
        '''
        Treina o recomendador incrementalmente com base nas interações passadas. Deve ser utilizado após a primeira chamada de treinamento, o novo conhecimento será incorporado ao modelo, sem esquecer o conhecimento anterior.

        params:
            interactions_df: DataFrame contendo as interações usuário-item.
        '''
        self.interactions_by_user = self.__merge_interactions_by_user(self.interactions_by_user, self.__group_interactions_by_user(interactions_df))
        self.recommender.partial_fit(
            decisions=interactions_df[self.item_column],
            rewards=interactions_df[self.rating_column],
            contexts=contexts
        )
    
    def recommend(self, users_ids: 'Union[list[int], np.ndarray]', contexts) -> 'tuple[list[int], list[float]]':
        '''
        Gera recomendações para uma lista de usuários.

        params:
            users_ids: Lista de IDs dos usuários para os quais deseja-se gerar recomendações.
            topn: Número máximo de recomendações a serem geradas por `user_id`.

        returns:
            Tupla contendo dois arrays: o primeiro contém os IDs dos itens recomendados e o segundo contém a pontuação de cada item.
        '''
        filters = pd.DataFrame({self.user_column: users_ids})\
            .merge(self.interactions_by_user, how='left', on=self.user_column)[['interactions']].values.squeeze(axis=1)
        # A variável filters mapeia o id de cada user nas interações de teste para uma lista de ids de itens já consumidos por ele.
        # Isso é importante para evitar que sejam recomendados itens que o usuário já consumiu
        return self.recommender.recommend(contexts, filters, apply_sigmoid=False, return_scores=True)


    
    def __group_interactions_by_user(self, interactions_df: pd.DataFrame) -> pd.DataFrame:
        '''
        Agrupa as interações por usuário. Será gerado um DataFrame em que uma coluna é o ID do usuário e a outra coluna possui uma lista de interações
        que cada usuário fez.

        params:
            interactions_df: DataFrame contendo as interações usuário-item.
        
            
        returns:
            Um DataFrame em que uma coluna é o ID do usuário (coluna self.user_column) e a outra coluna possui uma 
            lista de interações que cada usuário fez (coluna interactions).
        '''
        interactions_by_user = interactions_df\
                        .groupby(self.user_column)[[self.item_column]]\
                        .apply(lambda df_user: df_user[self.item_column].tolist())\
                        .reset_index(name='interactions')
        interactions_by_user = interactions_by_user.reset_index(drop=True)
        return interactions_by_user

    def __merge_interactions_by_user(self, interactions_by_user_x: pd.DataFrame, interactions_by_user_y: pd.DataFrame) -> pd.DataFrame:
        '''
        Mescla dois DataFrames contendo interações por usuário. O resultado final será outro DataFrame em que uma 
        coluna é o ID do usuário e a outra coluna possui uma lista de interações que cada usuário fez, sendo que esta lista
        terá as interações de ambos DataFrames usados na mesclagem. Por exemplo, se um usuário consumiu os itens 1 e 5 no
        DataFrame X e os itens 2 e 9 no DataFrame Y, o resultado final para aquele usuário será uma lista contendo os itens
        1, 2, 5 e 9.

        params:
            interactions_by_user_x: primeiro DataFrame contendo as interações agrupadas por usuário.
            interactions_by_user_y: segundo DataFrame contendo as interações agrupadas por usuário.
        
        returns:
            Uma mesclagem entre os dois DataFrames passados. O resultado será Um DataFrame em que uma coluna é o ID do usuário 
            (coluna self.user_column) e a outra coluna possui uma lista de interações que cada usuário fez (coluna interactions).
        '''
        def concat_user_interactions(row):
            final_interactions = []
            if isinstance(row['interactions_x'], list):
                final_interactions += row['interactions_x']
            if isinstance(row['interactions_y'], list):
                final_interactions += row['interactions_y']
            row['interactions'] = final_interactions
            return row

        merge_df = interactions_by_user_x.merge(interactions_by_user_y, how='outer', on=self.user_column).apply(concat_user_interactions, axis=1)
        merge_df = merge_df.drop(['interactions_x', 'interactions_y'], axis=1)
        return merge_df

In [6]:

from mab2rec import LearningPolicy
import src

class Lin(Mab2RecRecommender):

    def __init__(self, user_column: str=src.COLUMN_USER_ID, item_column: str=src.COLUMN_ITEM_ID, rating_column: str=src.COLUMN_RATING, hyperparameters: dict={}):
        super().__init__(user_column, item_column, rating_column)

        self.recommender = BanditRecommenderArmEncoded(
            learning_policy=LearningPolicy.LinGreedy(epsilon=0),
            top_k=src.TOP_N,
            seed=src.RANDOM_STATE
        )



class LinGreedy(Mab2RecRecommender):

    def __init__(self, user_column: str=src.COLUMN_USER_ID, item_column: str=src.COLUMN_ITEM_ID, rating_column: str=src.COLUMN_RATING, logger=None, hyperparameters: dict={}):
        super().__init__(user_column, item_column, rating_column)

        self.recommender = BanditRecommenderArmEncoded(
            learning_policy=LearningPolicy.LinGreedy(**hyperparameters),
            top_k=src.TOP_N,
            seed=src.RANDOM_STATE
        )

class LinTS(Mab2RecRecommender):

    def __init__(self, user_column: str=src.COLUMN_USER_ID, item_column: str=src.COLUMN_ITEM_ID, rating_column: str=src.COLUMN_RATING, logger=None, hyperparameters: dict={}):
        super().__init__(user_column, item_column, rating_column, logger)

        self.recommender = BanditRecommenderArmEncoded(
            learning_policy=LearningPolicy.LinTS(**hyperparameters),
            top_k=src.TOP_N,
            seed=src.RANDOM_STATE,
        )

class LinUCB(Mab2RecRecommender):

    def __init__(self, user_column: str=src.COLUMN_USER_ID, item_column: str=src.COLUMN_ITEM_ID, rating_column: str=src.COLUMN_RATING, logger=None, hyperparameters: dict={}):
        super().__init__(user_column, item_column, rating_column, logger)

        self.recommender = BanditRecommenderArmEncoded(
            learning_policy=LearningPolicy.LinUCB(**hyperparameters),
            top_k=src.TOP_N,
            seed=src.RANDOM_STATE
        )

## Versão otimizada

In [7]:
# Code adapted from: https://github.com/fidelity/mabwiser/blob/master/mabwiser/linear.py
# The difference is that the original code accept different types of arms (strings, integers, etc) and the modified code only accept sequential integers as arms. With this modification, the code can be optimized.

ITEMS_PER_BATCH = 10_000
INTERACTIONS_PER_BATCH_LINTS = 1_000

from mabwiser.linear import _Linear
from mabwiser.utils import Arm, Num, _BaseRNG
from typing import Callable, Dict, List, Optional, Union
import numpy as np
import time
import torch

class _RidgeRegressionOptimized:

    def __init__(self, rng: _BaseRNG, alpha: Num = 1.0, l2_lambda: Num = 1.0, scale: bool = False):

        # Ridge Regression: https://onlinecourses.science.psu.edu/stat857/node/155/
        self.rng = rng                      # random number generator
        self.alpha = alpha                  # exploration parameter
        self.l2_lambda = l2_lambda          # regularization parameter
        self.scale = scale                  # scale contexts

        self.beta = None                    # (XtX + l2_lambda * I_d)^-1 * Xty = A^-1 * Xty
        self.A = None                       # (XtX + l2_lambda * I_d)
        self.A_inv = None                   # (XtX + l2_lambda * I_d)^-1
        self.Xty = None
        self.scaler = None

    def init(self, num_features: int, num_arms: int):
        # By default, assume that
        # A is the identity matrix and Xty is set to 0
        start_time = time.time()
        self.Xty = torch.zeros((num_arms, num_features), device='cuda', dtype=torch.double)
        self.A = torch.eye(num_features, device='cuda', dtype=torch.double).unsqueeze(0).repeat(num_arms, 1, 1) * self.l2_lambda
        #self.A_inv = self.A.clone()
        self.beta = torch.zeros((num_arms, num_features), device='cuda', dtype=torch.double)
        print(f'init demorou {time.time() - start_time} segundos')
        #self.scaler = StandardScaler() if self.scale else None

    def fit(self, decisions: np.ndarray, X: np.ndarray, y: np.ndarray):

        # Scale
        #if self.scaler is not None:
        #    X = X.astype('float64')
        #    if not hasattr(self.scaler, 'scale_'):
        #        self.scaler.fit(X)
        #    else:
        #        self.scaler.partial_fit(X)
        #    fix_small_variance(self.scaler)
        #    X = self.scaler.transform(X)
        start_time = time.time()
        X_device = torch.tensor(X, device='cuda')
        y_device = torch.tensor(y, device='cuda')
        decisions_device = torch.tensor(decisions, device='cuda')
        print(f'passar para cuda demorou {time.time() - start_time} segundos')
        # Update A
        #start_time = time.time()
        #outer = torch.einsum('ni,nj->nij', X_device, X_device)  # (n, d, d)
        #print(f'outer demorou {time.time() - start_time} segundos')

        # Scatter add outer products into self.A based on decisions
        start_time = time.time()
        self.A.index_add_(0, decisions_device, torch.einsum('ni,nj->nij', X_device, X_device))
        print(f'A add demorou {time.time() - start_time} segundos')

        # Add X * y to Xty
        start_time = time.time()
        self.Xty.index_add_(0, decisions_device, X_device * y_device.view(-1, 1))
        print(f'Xty demorou {time.time() - start_time} segundos')

        # Invert each A matrix
        for j in range(0, self.beta.shape[0], ITEMS_PER_BATCH):            
            start_time = time.time()
            self.beta[j:j+ITEMS_PER_BATCH] = torch.linalg.solve(
                self.A[j:j+ITEMS_PER_BATCH],
                self.Xty[j:j+ITEMS_PER_BATCH]
            )
            print(f'beta demorou {time.time() - start_time} segundos')

    def predict(self, x: np.ndarray):

        # Calculate default expectation y = x * b
        return torch.matmul(torch.tensor(x, device='cuda', dtype=torch.double), self.beta.T)


class _LinTSOptimized2(_RidgeRegressionOptimized):

    def __init__(self, rng: _BaseRNG, alpha: Num = 1.0, l2_lambda: Num = 1.0, scale: bool = False):
        super().__init__(rng, alpha, l2_lambda, scale)
        torch.manual_seed(src.RANDOM_STATE)
    
    def predict(self, x: np.ndarray):

        # Randomly sample coefficients from multivariate normal distribution
        # Covariance is enhanced with the exploration factor
        # Generates  random samples for all contexts in one single go. type(beta_sampled): np.ndarray
        # beta_sampled = self.rng.multivariate_normal(self.beta, np.square(self.alpha) * self.A_inv, size=x.shape[0])
        mvn = torch.distributions.MultivariateNormal(loc=self.beta[0], covariance_matrix=((self.alpha ** 2) * torch.linalg.inv(self.A))[0])
        beta_sampled = mvn.sample((x.shape[0],))
        print('beta sampled torch')
        print(beta_sampled)

        beta_sampled2 = self.rng.multivariate_normal(self.beta.to('cpu').numpy()[0], ((self.alpha ** 2) * torch.linalg.inv(self.A)).to('cpu').numpy()[0], size=x.shape[0])
        print('beta sample numpy')
        print(beta_sampled2)
        #scale_tril = torch.linalg.cholesky((self.alpha ** 2) * self.A_inv)  # [D, D]
        #print(scale_tril.shape)

        #z = torch.randn((x.shape[0], scale_tril.shape[0]), generator=self.rng, dtype=torch.float32, device='cuda')  # [N, D]
        #print(z.shape)

        #print(self.beta.shape)
        #beta_sampled = self.beta + z @ scale_tril.T  # [N, D]
        
        # Calculate expectation y = x * beta_sampled
        return torch.einsum('bji,bi->bj', beta_sampled, torch.tensor(x, device='cuda', dtype=torch.double))

class _LinTSOptimized(_RidgeRegressionOptimized):

    def __init__(self, rng: _BaseRNG, alpha: Num = 1.0, l2_lambda: Num = 1.0, scale: bool = False):
        super().__init__(rng, alpha, l2_lambda, scale)
        self.torch_rng = torch.Generator(device='cuda').manual_seed(src.RANDOM_STATE)
    
    def predict(self, x: np.ndarray):
        x_torch = torch.tensor(x, device='cuda', dtype=torch.double)  # [B, D]

        num_arms, num_features = self.beta.shape
        num_contexts = x.shape[0]

        scores = torch.zeros((num_contexts, num_arms), device='cuda', dtype=torch.double)

        # z_all = torch.randn((num_arms, B, d), generator=self.torch_rng, device='cuda', dtype=torch.double)

        for start in range(0, num_arms, ITEMS_PER_BATCH):
            end = min(start + ITEMS_PER_BATCH, num_arms)
            chunk_size = end - start

            beta_chunk = self.beta[start:end]            # [chunk_size, D]
            A_chunk = self.A[start:end]                  # [chunk_size, D, D]
            A_inv_chunk = torch.linalg.inv(A_chunk)      # [chunk_size, D, D]

            eps = torch.from_numpy(self.rng.standard_normal(size=(num_contexts, num_features))).cuda()  # [B, D]
            L_chunk = torch.linalg.cholesky((self.alpha ** 2) * A_inv_chunk)  # [A, D, D]
            beta_sampled = torch.einsum('bd,add->bad', eps, L_chunk) + beta_chunk  # [B, A, D]

            scores[:, start:end] = torch.einsum('bd,bad->ba', x_torch, beta_sampled)

        return scores  # shape: [B, M]



class _LinUCBOptimized(_RidgeRegressionOptimized):

    def predict(self, x: np.ndarray):

        x = torch.tensor(x, device='cuda')

        scores = torch.matmul(x, self.beta.T)

        for j in range(0, self.beta.shape[0], ITEMS_PER_BATCH):
            x_A_inv = torch.matmul(x, torch.linalg.inv(self.A[j: j+ITEMS_PER_BATCH]))

            # Upper confidence bound = alpha * sqrt(x A^-1 xt). Notice that, x = xt
            # ucb values are claculated for all the contexts in one single go. type(ucb): np.ndarray
            ucb = self.alpha * torch.sqrt(torch.sum(x_A_inv * x, axis=2))

            # Calculate linucb expectation y = x * b + ucb
            scores[:, j: j+ITEMS_PER_BATCH] += ucb.T
        
        return scores

class LinearArmEncodedOptimized:
    factory = {
        "ts": _LinTSOptimized, 
        "ucb": _LinUCBOptimized, 
        "ridge": _RidgeRegressionOptimized
    }

    def __init__(self, rng: _BaseRNG, num_arms: int, num_features:int, n_jobs: int, backend: Optional[str],
                 alpha: Num, epsilon: Num, l2_lambda: Num, regression: str, scale: bool):
        self.alpha = alpha
        self.epsilon = epsilon
        self.l2_lambda = l2_lambda
        self.regression = regression
        self.scale = scale
        self.n_jobs = n_jobs
        self.backend = backend
        self.rng = rng
        self.num_arms = num_arms
        self.num_features = num_features
        self.model = self.factory[regression](rng, alpha, l2_lambda, scale)
        self.model.init(self.num_features, self.num_arms)

    def _vectorized_predict_context(self, contexts: np.ndarray, is_predict: bool) -> List:

        # Initializing array with expectations for each arm
        num_contexts = contexts.shape[0]
        arm_expectations = self.model.predict(contexts)

        # With epsilon probability, assign random flag to context
        random_values = self.rng.rand(num_contexts)
        random_mask = np.array(random_values < self.epsilon)
        random_indices = random_mask.nonzero()[0]

        # For random indices, generate random expectations
        arm_expectations[random_indices] = torch.tensor(self.rng.rand((random_indices.shape[0], self.num_arms)), device='cuda')

        # arm_expectations[nonrandom_indices] = np.array([self.arm_to_model[arm].predict(nonrandom_context)
        #                                                for arm in arms]).T
        # arm_expectations[nonrandom_indices] = self.model.predict(nonrandom_context)
        # ARRUMAR ISSO, DESSA FORMA O LINGREEDY NAO IRA FUNCIONAR (ORIGINAL EMCIMA)
        return arm_expectations if len(arm_expectations) > 1 else arm_expectations[0]
    
    def fit(self, decisions: np.ndarray, rewards: np.ndarray, contexts: np.ndarray = None) -> None:

        #start_time = time.time()
        # Initialize each model by arm
        #self.num_features = contexts.shape[1]
        #self.model.init(self.num_features, self.num_arms)
        #print(f'init demorou {time.time() - start_time} segundos')

        start_time = time.time()
        # Perform parallel fit
        self._fit(decisions, rewards, contexts)
        print(f'parallel_fit demorou {time.time() - start_time} segundos')

        # Update trained arms
        # Removi o código abaixo pois parece que ele não é usado para o nosso caso...
        # Ele parece ser usado apenas no contexto de tentar fazer "warm" start
        # Basicamente, copiando os mesmos parâmetros de um arm já treinado para um novo (cold) por proximidade de features...

        # Otimizar essa função não parece ser algo tão trivial, já que teria que mudar a estrutura do arm_to_status, tendo que mudar vários outros códigos por causa disso
        # self._set_arms_as_trained(decisions=decisions, is_partial=False)
    
    def partial_fit(self, decisions: np.ndarray, rewards: np.ndarray, contexts: np.ndarray = None) -> None:
        # Perform parallel fit
        self._fit(decisions, rewards, contexts)

        # Update trained arms
        # self._set_arms_as_trained(decisions=decisions, is_partial=True)
    
    def _fit(self, decisions: np.ndarray, rewards: np.ndarray,
                      contexts: Optional[np.ndarray] = None):
        
        # Compute effective number of jobs
        #n_jobs = self._effective_jobs(len(self.arms), self.n_jobs)
        # Perform parallel fit
        #Parallel(n_jobs=n_jobs, require='sharedmem')(
        #                  delayed(self._fit_arm)(
        #                      arm, decisions, rewards, contexts)
        #                 for arm in self.arms)
        self.model.fit(decisions, contexts, rewards)
    
    def predict_expectations(self, contexts: np.ndarray = None) -> Union[Dict[Arm, Num], List[Dict[Arm, Num]]]:
        # Return predict expectations for the given context
        return self._vectorized_predict_context(contexts, is_predict=False)

In [8]:
# Code adapted from: https://github.com/fidelity/mabwiser/blob/master/mabwiser/mab.py
# The difference is that the original code accept different types of arms (strings, integers, etc) and the modified code only accept sequential integers as arms. With this modification, the code can be optimized.

from mabwiser.mab import MAB, LearningPolicyType, NeighborhoodPolicyType

from mabwiser.utils import Constants, check_true, create_rng
from mab2rec import LearningPolicy
from mab2rec import NeighborhoodPolicy
import numpy as np
from mabwiser.neighbors import _KNearest, _Radius
from mabwiser.treebandit import _TreeBandit
from mabwiser.clusters import _Clusters
from mabwiser.approximate import _LSHNearest

from mabwiser.approximate import _LSHNearest
from mabwiser.clusters import _Clusters
from mabwiser.greedy import _EpsilonGreedy
from mabwiser.linear import _Linear
from mabwiser.neighbors import _KNearest, _Radius
from mabwiser.popularity import _Popularity
from mabwiser.rand import _Random
from mabwiser.softmax import _Softmax
from mabwiser.thompson import _ThompsonSampling
from mabwiser.treebandit import _TreeBandit
from mabwiser.ucb import _UCB1

class MABArmEncodedOptimized(MAB):
    def __init__(self,
                 num_arms: int,  # The list of arms
                 num_features: int,
                 learning_policy: LearningPolicyType,  # The learning policy
                 neighborhood_policy: NeighborhoodPolicyType = None,  # The context policy, optional
                 seed: int = Constants.default_seed,  # The random seed
                 n_jobs: int = 1,  # Number of parallel jobs
                 backend: str = None,  # Parallel backend implementation
                 ):
        """Initializes a multi-armed bandit (MAB) with the given arguments.

        Validates the arguments and raises exception in case there are violations.

        Parameters
        ----------
        arms : List[Union[int, float, str]]
            The list of all the arms available for decisions.
            Arms can be integers, strings, etc.
        learning_policy : LearningPolicyType
            The learning policy.
        neighborhood_policy : NeighborhoodPolicyType, optional
            The context policy. Default value is None.
        seed : numbers.Rational, optional
            The random seed to initialize the random number generator.
            Default value is set to Constants.default_seed.value
        n_jobs: int, optional
            This is used to specify how many concurrent processes/threads should be used for parallelized routines.
            Default value is set to 1.
            If set to -1, all CPUs are used.
            If set to -2, all CPUs but one are used, and so on.
        backend: str, optional
            Specify a parallelization backend implementation supported in the joblib library. Supported options are:
            - “loky” used by default, can induce some communication and memory overhead when exchanging input and
              output data with the worker Python processes.
            - “multiprocessing” previous process-based backend based on multiprocessing.Pool. Less robust than loky.
            - “threading” is a very low-overhead backend but it suffers from the Python Global Interpreter Lock if the
              called function relies a lot on Python objects.
            Default value is None. In this case the default backend selected by joblib will be used.

        Raises
        ------
        TypeError:  Arms were not provided in a list.
        TypeError:  Learning policy type mismatch.
        TypeError:  Context policy type mismatch.
        TypeError:  Seed is not an integer.
        TypeError:  Number of parallel jobs is not an integer.
        TypeError:  Parallel backend is not a string.
        TypeError:  For EpsilonGreedy, epsilon must be integer or float.
        TypeError:  For LinGreedy, epsilon must be an integer or float.
        TypeError:  For LinGreedy, l2_lambda must be an integer or float.
        TypeError:  For LinTS, alpha must be an integer or float.
        TypeError:  For LinTS, l2_lambda must be an integer or float.
        TypeError:  For LinUCB, alpha must be an integer or float.
        TypeError:  For LinUCB, l2_lambda must be an integer or float.
        TypeError:  For Softmax, tau must be an integer or float.
        TypeError:  For ThompsonSampling, binarizer must be a callable function.
        TypeError:  For UCB, alpha must be an integer or float.
        TypeError:  For LSHNearest, n_dimensions must be an integer or float.
        TypeError:  For LSHNearest, n_tables must be an integer or float.
        TypeError:  For LSHNearest, no_nhood_prob_of_arm must be None or List that sums up to 1.0.
        TypeError:  For Clusters, n_clusters must be an integer.
        TypeError:  For Clusters, is_minibatch must be a boolean.
        TypeError:  For Radius, radius must be an integer or float.
        TypeError:  For Radius, no_nhood_prob_of_arm must be None or List that sums up to 1.0.
        TypeError:  For KNearest, k must be an integer or float.

        ValueError: Invalid number of arms.
        ValueError: Invalid values (None, NaN, Inf) in arms.
        ValueError: Duplicate values in arms.
        ValueError: Number of parallel jobs is 0.
        ValueError: For EpsilonGreedy, epsilon must be between 0 and 1.
        ValueError: For LinGreedy, epsilon must be between 0 and 1.
        ValueError: For LinGreedy, l2_lambda cannot be negative.
        ValueError: For LinTS, alpha must be greater than zero.
        ValueError: For LinTS, l2_lambda must be greater than zero.
        ValueError: For LinUCB, alpha cannot be negative.
        ValueError: For LinUCB, l2_lambda cannot be negative.
        ValueError: For Softmax, tau must be greater than zero.
        ValueError: For UCB, alpha must be greater than zero.
        ValueError: For LSHNearest, n_dimensions must be gerater than zero.
        ValueError: For LSHNearest, n_tables must be gerater than zero.
        ValueError: For LSHNearest, if given, no_nhood_prob_of_arm list should sum up to 1.0.
        ValueError: For Clusters, n_clusters cannot be less than 2.
        ValueError: For Radius and KNearest, metric is not supported by scipy.spatial.distance.cdist.
        ValueError: For Radius, radius must be greater than zero.
        ValueError: For Radius, if given, no_nhood_prob_of_arm list should sum up to 1.0.
        ValueError: For KNearest, k must be greater than zero.
        """

        # Validate arguments
        # MAB._validate_mab_args(arms, learning_policy, neighborhood_policy, seed, n_jobs, backend)

        # Save the arguments
        self.arms = np.arange(num_arms)
        self.num_arms = num_arms
        self.num_features = num_features
        self.seed = seed
        self.n_jobs = n_jobs
        self.backend = backend

        # Create the random number generator
        self._rng = create_rng(self.seed)
        self._is_initial_fit = False

        # Create the learning policy implementor
        lp = None
        if isinstance(learning_policy, LearningPolicy.LinGreedy):
            lp = LinearArmEncodedOptimized(self._rng, num_arms, self.num_features, self.n_jobs, self.backend, 0, learning_policy.epsilon,
                         learning_policy.l2_lambda, "ridge", learning_policy.scale)
        elif isinstance(learning_policy, LearningPolicy.LinTS):
            lp = LinearArmEncodedOptimized(self._rng, num_arms, self.num_features, self.n_jobs, self.backend, learning_policy.alpha, 0,
                         learning_policy.l2_lambda, "ts", learning_policy.scale)
        elif isinstance(learning_policy, LearningPolicy.LinUCB):
            lp = LinearArmEncodedOptimized(self._rng, num_arms, self.num_features, self.n_jobs, self.backend, learning_policy.alpha, 0,
                         learning_policy.l2_lambda, "ucb", learning_policy.scale)
        else:
            check_true(False, ValueError("Undefined learning policy " + str(learning_policy)))

        if neighborhood_policy:
            # Do not use parallel fit or predict for Learning Policy when contextual
            lp.n_jobs = 1

            if isinstance(neighborhood_policy, NeighborhoodPolicy.Clusters):
                self._imp = _Clusters(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                      neighborhood_policy.n_clusters, neighborhood_policy.is_minibatch)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.LSHNearest):
                self._imp = _LSHNearest(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                        neighborhood_policy.n_dimensions, neighborhood_policy.n_tables,
                                        neighborhood_policy.no_nhood_prob_of_arm)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.KNearest):
                self._imp = _KNearest(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                      neighborhood_policy.k, neighborhood_policy.metric)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.Radius):
                self._imp = _Radius(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                    neighborhood_policy.radius, neighborhood_policy.metric,
                                    neighborhood_policy.no_nhood_prob_of_arm)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.TreeBandit):
                self._imp = _TreeBandit(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                        neighborhood_policy.tree_parameters)
            else:
                check_true(False, ValueError("Undefined context policy " + str(neighborhood_policy)))
        else:
            self._imp = lp
        
        self.is_contextual = True
    
    @property
    def learning_policy(self):
        """
        Creates named tuple of the learning policy based on the implementor.

        Returns
        -------
        The learning policy.

        Raises
        ------
        NotImplementedError: MAB learning_policy property not implemented for this learning policy.

        """
        if isinstance(self._imp, (_LSHNearest, _KNearest, _Radius, _TreeBandit)):
            lp = self._imp.lp
        elif isinstance(self._imp, _Clusters):
            lp = self._imp.lp_list[0]
        else:
            lp = self._imp

        if isinstance(lp, _EpsilonGreedy):
            if issubclass(type(lp), _Popularity):
                return LearningPolicy.Popularity()
            else:
                return LearningPolicy.EpsilonGreedy(lp.epsilon)
        elif isinstance(lp, _Linear):
            if lp.regression == 'ridge':
                return LearningPolicy.LinGreedy(lp.epsilon, lp.l2_lambda, lp.scale)
            elif lp.regression == 'ts':
                return LearningPolicy.LinTS(lp.alpha, lp.l2_lambda, lp.scale)
            elif lp.regression == 'ucb':
                return LearningPolicy.LinUCB(lp.alpha, lp.l2_lambda, lp.scale)
            else:
                check_true(False, ValueError("Undefined regression " + str(lp.regression)))
        elif isinstance(lp, LinearArmEncodedOptimized):
            if lp.regression == 'ridge':
                return _RidgeRegressionOptimized(lp.rng, lp.alpha, lp.l2_lambda, lp.scale)
            elif lp.regression == 'ts':
                return _LinTSOptimized(lp.alpha, lp.l2_lambda, lp.scale)
            elif lp.regression == 'ucb':
                return _LinUCBOptimized(lp.alpha, lp.l2_lambda, lp.scale)
            else:
                check_true(False, ValueError("Undefined regression " + str(lp.regression)))
        elif isinstance(lp, _Random):
            return LearningPolicy.Random()
        elif isinstance(lp, _Softmax):
            return LearningPolicy.Softmax(lp.tau)
        elif isinstance(lp, _ThompsonSampling):
            return LearningPolicy.ThompsonSampling(lp.binarizer)
        elif isinstance(lp, _UCB1):
            return LearningPolicy.UCB1(lp.alpha)
        else:
            raise NotImplementedError("MAB learning_policy property not implemented for this learning policy.")

In [9]:
# Code adapted from: https://github.com/fidelity/mab2rec/blob/main/mab2rec/rec.py
# The difference is that the original code accept different types of arms (strings, integers, etc) and the modified code only accept sequential integers as arms. With this modification, the code can be optimized.

from typing import List, Tuple, Union
from mabwiser.utils import Arm, Num
from scipy.special import expit
from mab2rec import BanditRecommender
from mab2rec import LearningPolicy
from mabwiser.utils import Arm
import numpy as np
import time
import pandas as pd
import torch

class BanditRecommenderArmEncodedOptimized(BanditRecommender):

    def __init__(self, learning_policy: Union[LearningPolicy.LinGreedy,
                                              LearningPolicy.LinTS,
                                              LearningPolicy.LinUCB],
                 num_arms: int,
                 num_features: int,
                 neighborhood_policy: Union[None] = None,
                 top_k: int = 10,
                 seed: int = src.RANDOM_STATE,
                 n_jobs: int = 1,
                 backend: str = None):
        """Initializes bandit recommender with the given arguments.

        Validates the arguments and raises exception in case there are violations.

        Parameters
        ----------
        learning_policy : LearningPolicy
            The learning policy.
        neighborhood_policy : NeighborhoodPolicy, default=None
            The context policy.
        top_k : int, default=10
            The number of items to recommend.
        seed : numbers.Rational, default=Constants.default_seed
            The random seed to initialize the random number generator.
            Default value is set to Constants.default_seed.value
        top_k : int, default=10
            The number of items to recommend.
        n_jobs : int, default=1
            This is used to specify how many concurrent processes/threads should be used for parallelized routines.
            If set to -1, all CPUs are used.
            If set to -2, all CPUs but one are used, and so on.
        backend : str, default=None
            Specify a parallelization backend implementation supported in the joblib library. Supported options are:
            - “loky” used by default, can induce some communication and memory overhead when exchanging input and
              output data with the worker Python processes.
            - “multiprocessing” previous process-based backend based on multiprocessing.Pool. Less robust than loky.
            - “threading” is a very low-overhead backend but it suffers from the Python Global Interpreter Lock if the
              called function relies a lot on Python objects.
            Default value is None. In this case the default backend selected by joblib will be used.
        """
        self.num_arms = num_arms
        self.num_features = num_features
        super().__init__(learning_policy, neighborhood_policy, top_k, seed, n_jobs, backend)
    
    def _init(self) -> None:
        """Initializes recommender with given list of arms.

        Parameters
        ----------
        arms : List[Union[Arm]]
            The list of all of the arms available for decisions.
            Arms can be integers, strings, etc.

        Returns
        -------
        Returns nothing
        """
        self.mab = MABArmEncodedOptimized(self.num_arms, self.num_features, self.learning_policy, self.neighborhood_policy, self.seed, self.n_jobs, self.backend)
    
    def fit(self, decisions: Union[List[Arm], np.ndarray, pd.Series],
            rewards: Union[List[Num], np.ndarray, pd.Series],
            contexts: Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame] = None) -> None:
        """Fits the recommender the given *decisions*, their corresponding *rewards* and *contexts*, if any.
        If the recommender arms has not been initialized using the `set_arms`, the recommender arms will be set
        to the list of arms in *decisions*.

        Validates arguments and raises exceptions in case there are violations.

        This function makes the following assumptions:
            - each decision corresponds to an arm of the bandit.
            - there are no ``None``, ``Nan``, or ``Infinity`` values in the contexts.

        Parameters
        ----------
         decisions : Union[List[Arm], np.ndarray, pd.Series]
            The decisions that are made.
         rewards : Union[List[Num], np.ndarray, pd.Series]
            The rewards that are received corresponding to the decisions.
         contexts : Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame], default=None
            The context under which each decision is made.

        Returns
        -------
        Returns nothing.
        """
        if self.mab is None:
            self._init()
        self.mab.fit(decisions, rewards, contexts)
    
    def recommend(self, contexts: Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame] = None,
                  excluded_arms: List[List[Arm]] = None, return_scores: bool = False, apply_sigmoid: bool = True) \
            -> Union[Union[List[Arm], Tuple[List[Arm], List[Num]],
                     Union[List[List[Arm]], Tuple[List[List[Arm]], List[List[Num]]]]]]:
        #self._validate_mab(is_fit=True)
        #self._validate_get_rec(contexts, excluded_arms)

        start_time = time.time()
        # Get predicted expectations
        num_contexts = len(contexts) if contexts is not None else 1
        if num_contexts == 1:
            expectations = np.array([self.mab.predict_expectations(contexts)])
        else:
            expectations = self.mab.predict_expectations(contexts)
        
        #if not isinstance(expectations, np.ndarray):
        #    expectations = np.array(expectations)

        print(f'predict_expectations demorou {time.time() - start_time} segundos')

        if apply_sigmoid:
            expectations = expit(expectations)

        # Create an exclusion mask, where exclusion_mask[context_ind][arm_ind] denotes if the arm with the
        # index arm_ind was excluded for context with the index context_ind.
        # The value will be True if it is excluded and those arms will not be returned as part of the results.
        start_time = time.time()

        # Set excluded item scores to -1, so they automatically get placed lower in best results
        excluded_rows = torch.from_numpy(excluded_arms[0]).to(expectations.device)
        excluded_cols = torch.from_numpy(excluded_arms[1]).to(expectations.device)
        expectations[excluded_rows, excluded_cols] = -1.
        print(f'Exclude mask demorou {time.time() - start_time} segundos')

        start_time = time.time()
        # Get best `top_k` results by sorting the expectations
        #expectations = torch.tensor(expectations, device='cuda')
        topk_sorted_expectations = torch.topk(expectations, self.top_k, dim=1)
        recommendations = topk_sorted_expectations.indices.cpu().numpy()
        scores = topk_sorted_expectations.values.cpu().numpy()
        #arm_inds = np.argpartition(-expectations, self.top_k - 1, axis=1)[:, :self.top_k]
        #arm_inds = arm_inds[np.arange(arm_inds.shape[0]).reshape(-1, 1), np.argsort(-expectations[np.arange(expectations.shape[0]).reshape(-1, 1), arm_inds], axis=1)]
        print(f'Ordenação top-K demorou {time.time() - start_time} segundos')
        

        #start_time = time.time()
        # Get the list of top_k recommended items and corresponding expectations for each context
        #recommendations = arm_inds
        #scores = [[]] * num_contexts
        #for context_ind in range(num_contexts):
        #    recommendations[context_ind] = [self.mab.arms[arm_ind] for arm_ind in arm_inds[context_ind]]
        #    if len(recommendations[context_ind]) != self.top_k:
        #        print('Warning: the number of recommendations is less than the top_k value. ')
        #    if return_scores:
        #        scores[context_ind] = [expectations[context_ind, arm_ind] for arm_ind in arm_inds[context_ind]]
        #print(f'gerar lista de recomendações demorou {time.time() - start_time} segundos')
        # Return recommendations and scores
        if return_scores:
            if num_contexts > 1:
                return recommendations, scores
            else:
                return recommendations[0], scores[0]
        else:
            if num_contexts > 1:
                return recommendations
            else:
                return recommendations[0]

In [10]:
from abc import ABC
import pandas as pd
import numpy as np
from typing import Union
from mab2rec import LearningPolicy
from scipy.sparse import csr_matrix, lil_matrix

class Mab2RecRecommenderOptimized(ABC):
    '''
    Classe base para os recomendadores da biblioteca [mab2rec](https://github.com/fidelity/mab2rec).

    Essa classe é responsável por encapsular a lógica de inicialização e treinamento dos algoritmos da biblioteca mab2rec.

    Essa classe não deve ser utilizada diretamente. Para utilizar um algoritmo da biblioteca mab2rec, utilize uma das classes filhas dessa classe.

    Para implementar um novo algoritmo da biblioteca mab2rec, basta na classe filha implementar o método __init__ chamando o super().__init__ e inicializar o atributo self.recommender com o algoritmo da biblioteca mab2rec (pelo menos, a ideia inicial é ser simples assim).
    '''

    def __init__(self, num_users: int, num_items: int, num_features: int, user_column: str=src.COLUMN_USER_ID, item_column: str=src.COLUMN_ITEM_ID, rating_column: str=src.COLUMN_RATING, hyperparameters: dict={}):
        '''
        Inicializa o recomendador.

        params:
            user_column: Nome da coluna que representa o usuário.
            item_column: Nome da coluna que representa o item.
            rating_column: Nome da coluna que representa a avaliação.
        '''
        self.user_column = user_column
        self.item_column = item_column
        self.rating_column = rating_column
        self.num_items = num_items
        self.num_users = num_users
        self.num_features = num_features

        self.interactions_by_user: pd.DataFrame = None
        self.recommender: LearningPolicy = None

        self.exclude_mask = csr_matrix(([], ([], [])), shape=(self.num_users, self.num_items), dtype=bool).tolil()

    def train(self, interactions_df: pd.DataFrame, contexts):
        '''
        Treina "do zero" o recomendador com base nas interações passadas. Utilizar apenas na primeira chamada de treinamento, caso deseje treinar incrementalmente.

        params:
            interactions_df: DataFrame contendo as interações usuário-item.
        '''

        self.exclude_mask[interactions_df[self.user_column], interactions_df[self.item_column]] = True
        self.recommender.fit(
            decisions=interactions_df[self.item_column],
            rewards=interactions_df[self.rating_column],
            contexts=contexts
        )

    def partial_train(self, interactions_df: pd.DataFrame, contexts):
        '''
        Treina o recomendador incrementalmente com base nas interações passadas. Deve ser utilizado após a primeira chamada de treinamento, o novo conhecimento será incorporado ao modelo, sem esquecer o conhecimento anterior.

        params:
            interactions_df: DataFrame contendo as interações usuário-item.
        '''
        self.exclude_mask[interactions_df[self.user_column], interactions_df[self.item_column]] = True
        self.recommender.partial_fit(
            decisions=interactions_df[self.item_column],
            rewards=interactions_df[self.rating_column],
            contexts=contexts
        )
    
    def recommend(self, users_ids: 'Union[list[int], np.ndarray]', contexts) -> 'tuple[list[int], list[float]]':
        '''
        Gera recomendações para uma lista de usuários.

        params:
            users_ids: Lista de IDs dos usuários para os quais deseja-se gerar recomendações.
            topn: Número máximo de recomendações a serem geradas por `user_id`.

        returns:
            Tupla contendo dois arrays: o primeiro contém os IDs dos itens recomendados e o segundo contém a pontuação de cada item.
        '''
        return self.recommender.recommend(contexts, self.exclude_mask[users_ids].nonzero(), apply_sigmoid=False, return_scores=True)

In [11]:

from mab2rec import LearningPolicy
import src

class LinOptimized(Mab2RecRecommenderOptimized):

    def __init__(self, num_users: int, num_items: int, num_features: int, user_column: str=src.COLUMN_USER_ID, item_column: str=src.COLUMN_ITEM_ID, rating_column: str=src.COLUMN_RATING, hyperparameters: dict={}):
        super().__init__(num_users, num_items, num_features, user_column, item_column, rating_column)

        self.recommender = BanditRecommenderArmEncodedOptimized(
            num_arms=self.num_items,
            num_features=self.num_features,
            learning_policy=LearningPolicy.LinGreedy(epsilon=0),
            top_k=src.TOP_N,
            seed=src.RANDOM_STATE
        )

class LinGreedyOptimized(Mab2RecRecommenderOptimized):

    def __init__(self, num_users: int, num_arms: int, num_features: int, user_column: str=src.COLUMN_USER_ID, item_column: str=src.COLUMN_ITEM_ID, rating_column: str=src.COLUMN_RATING, hyperparameters: dict={}):
        super().__init__(num_users, num_arms, num_features, user_column, item_column, rating_column)

        self.recommender = BanditRecommenderArmEncodedOptimized(
            num_arms=self.num_arms,
            num_features=self.num_features,
            learning_policy=LearningPolicy.LinGreedy(**hyperparameters),
            top_k=src.TOP_N,
            seed=src.RANDOM_STATE
        )

class LinUCBOptimized(Mab2RecRecommenderOptimized):

    def __init__(self, num_users: int, num_items: int, num_features: int, user_column: str=src.COLUMN_USER_ID, item_column: str=src.COLUMN_ITEM_ID, rating_column: str=src.COLUMN_RATING, hyperparameters: dict={}):
        super().__init__(num_users, num_items, num_features, user_column, item_column, rating_column)

        self.recommender = BanditRecommenderArmEncodedOptimized(
            num_arms=self.num_items,
            num_features=self.num_features,
            learning_policy=LearningPolicy.LinUCB(**hyperparameters),
            top_k=src.TOP_N,
            seed=src.RANDOM_STATE
        )

class LinTSOptimized(Mab2RecRecommenderOptimized):

    def __init__(self, num_users: int, num_items: int, num_features: int, user_column: str=src.COLUMN_USER_ID, item_column: str=src.COLUMN_ITEM_ID, rating_column: str=src.COLUMN_RATING, hyperparameters: dict={}):
        super().__init__(num_users, num_items, num_features, user_column, item_column, rating_column)

        self.recommender = BanditRecommenderArmEncodedOptimized(
            num_arms=self.num_items,
            num_features=self.num_features,
            learning_policy=LearningPolicy.LinTS(**hyperparameters),
            top_k=src.TOP_N,
            seed=src.RANDOM_STATE
        )


## Geração de dados toy

In [12]:

np.random.seed(src.RANDOM_STATE)

def generate_toy_dataset(num_users: int, num_items: int, num_interactions: int) -> pd.DataFrame:
    """
    Gera um dataset de interações aleatórias entre usuários e itens.

    :param num_users: Número de usuários.
    :param num_items: Número de itens.
    :param num_interactions: Número total de interações a serem geradas.
    :return: DataFrame contendo as interações.
    """
    first_users = np.arange(num_users)
    users = np.random.randint(0, num_users, size=num_interactions - len(first_users))

    first_items = np.arange(num_items)
    items = np.random.randint(0, num_items, size=num_interactions - len(first_items))

    ratings = np.random.randint(0, 2, size=num_interactions)  # Avaliações entre 0 e 1 (binárias)

    full_dataset = pd.DataFrame({
        src.COLUMN_USER_ID: np.concatenate([first_users, users]),
        src.COLUMN_ITEM_ID: np.concatenate([first_items, items]),
        src.COLUMN_RATING: ratings
    })

    return full_dataset[:num_interactions//2], full_dataset[num_interactions//2:]

train_100_100_1k, test_100_100_1k = generate_toy_dataset(100, 100, 1000)
train_1k_1k_10k, test_1k_1k_10k = generate_toy_dataset(1_000, 1_000, 10_000)
train_10k_10k_100k, test_10k_10k_100k = generate_toy_dataset(10_000, 10_000, 100_000)
train_20k_20k_250k, test_20k_20k_250k = generate_toy_dataset(20_000, 20_000, 250_000)

train_10k_50k_250k, test_10k_50k_250k = generate_toy_dataset(10_000, 50_000, 250_000)

## Testes lin

In [13]:
# Calculate current memory usage
import psutil
def get_memory_usage():
    process = psutil.Process(os.getpid())
    memory_info = process.memory_info()
    return memory_info.rss / (1024 * 1024)  # Convert bytes to MB

In [14]:
algo1_contexts = np.random.rand(train_100_100_1k.shape[0], 10)
algo1_contexts_rec = np.random.rand(test_100_100_1k.shape[0], 10)

algo2_contexts = np.random.rand(train_1k_1k_10k.shape[0], 10)
algo2_contexts_rec = np.random.rand(test_1k_1k_10k.shape[0], 10)

algo3_contexts = np.random.rand(train_10k_10k_100k.shape[0], 10)
algo3_contexts_rec = np.random.rand(test_10k_10k_100k.shape[0], 10)

algo4_contexts = np.random.rand(train_20k_20k_250k.shape[0], 10)
algo4_contexts_rec = np.random.rand(test_20k_20k_250k.shape[0], 10)

algo5_contexts = np.random.rand(train_10k_50k_250k.shape[0], 128)
algo5_contexts_rec = np.random.rand(test_10k_50k_250k.shape[0], 128)

In [15]:
start_memory_usage = get_memory_usage()
algo1 = Lin()

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo1.train(train_100_100_1k, contexts=algo1_contexts)
memory_used_algo1 = get_memory_usage() - start_memory_usage
results_algo1 = algo1.recommend(users_ids=test_100_100_1k[src.COLUMN_USER_ID], contexts=algo1_contexts_rec)

total_time_algo1 = time.time() - start_time

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 100 users, 100 items and 1k interactions: {total_time_algo1:.2f} seconds")
print(f"Memory used by 100 users, 100 items and 1k interactions: {memory_used_algo1:.2f} MB")

init demorou 0.0015604496002197266 segundos
reset_arm_to_status demorou 9.5367431640625e-07 segundos
parallel_fit demorou 0.026643037796020508 segundos
Gerar as predições demorou 0.0013499259948730469 segundos
predict_expectations demorou 0.0015196800231933594 segundos
Ordenação top-K demorou 0.0010988712310791016 segundos
gerar lista de recomendações demorou 0.006888389587402344 segundos



-----------------------------------------------------------

Time taken by 100 users, 100 items and 1k interactions: 0.06 seconds
Memory used by 100 users, 100 items and 1k interactions: 1.92 MB


In [19]:
start_memory_usage = get_memory_usage()
algo2 = Lin()

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo2.train(train_1k_1k_10k, contexts=algo2_contexts)
memory_used_algo2 = get_memory_usage() - start_memory_usage
results_algo2 = algo2.recommend(users_ids=test_1k_1k_10k[src.COLUMN_USER_ID], contexts=algo2_contexts_rec)

total_time_algo2 = time.time() - start_time

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 1k users, 1k items and 10k interactions: {total_time_algo2:.2f} seconds")
print(f"Memory used by 1k users, 1k items and 10k interactions: {memory_used_algo2:.2f} MB")

init demorou 0.014403104782104492 segundos
reset_arm_to_status demorou 4.76837158203125e-07 segundos
parallel_fit demorou 0.2588481903076172 segundos
Gerar as predições demorou 0.14911866188049316 segundos
predict_expectations demorou 0.14976954460144043 segundos
Ordenação top-K demorou 0.09089255332946777 segundos
gerar lista de recomendações demorou 0.06270337104797363 segundos



-----------------------------------------------------------

Time taken by 1k users, 1k items and 10k interactions: 0.73 seconds
Memory used by 1k users, 1k items and 10k interactions: 3.88 MB


In [20]:
start_memory_usage = get_memory_usage()
algo3 = Lin()

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo3.train(train_10k_10k_100k, contexts=algo3_contexts)
memory_used_algo3 = get_memory_usage() - start_memory_usage
results_algo3 = algo3.recommend(users_ids=test_10k_10k_100k[src.COLUMN_USER_ID], contexts=algo3_contexts_rec)

total_time_algo3 = time.time() - start_time

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 10k users, 10k items and 100k interactions: {total_time_algo3:.2f} seconds")
print(f"Memory used by 10k users, 10k items and 100k interactions: {memory_used_algo3:.2f} MB")

init demorou 0.09664511680603027 segundos
reset_arm_to_status demorou 7.152557373046875e-07 segundos
parallel_fit demorou 2.4247357845306396 segundos
Gerar as predições demorou 11.9911527633667 segundos
predict_expectations demorou 11.996206998825073 segundos
Ordenação top-K demorou 7.821106672286987 segundos
gerar lista de recomendações demorou 0.8709344863891602 segundos



-----------------------------------------------------------

Time taken by 10k users, 10k items and 100k interactions: 24.25 seconds
Memory used by 10k users, 10k items and 100k interactions: 22.92 MB


In [19]:
start_memory_usage = get_memory_usage()
algo4 = Lin()

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo4.train(train_20k_20k_250k, contexts=algo4_contexts)
memory_used_algo4 = get_memory_usage() - start_memory_usage
results_algo4 = algo4.recommend(users_ids=test_20k_20k_250k[src.COLUMN_USER_ID], contexts=algo4_contexts_rec)

total_time_algo4 = time.time() - start_time

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 20k users, 20k items and 250k interactions: {total_time_algo4:.2f} seconds")
print(f"Memory used by 20k users, 20k items and 250k interactions: {memory_used_algo4:.2f} MB")

init demorou 0.2299494743347168 segundos
reset_arm_to_status demorou 7.152557373046875e-07 segundos
parallel_fit demorou 6.000527858734131 segundos
Gerar as predições demorou 55.42299270629883 segundos
predict_expectations demorou 55.42787313461304 segundos
Ordenação top-K demorou 45.45101046562195 segundos
gerar lista de recomendações demorou 2.0519351959228516 segundos



-----------------------------------------------------------

Time taken by 20k users, 20k items and 250k interactions: 112.63 seconds
Memory used by 20k users, 20k items and 250k interactions: 40.95 MB


In [16]:
ITEMS_PER_BATCH = 1
start_memory_usage = get_memory_usage()
algo1_optimized = LinOptimized(train_100_100_1k[src.COLUMN_USER_ID].nunique(), train_100_100_1k[src.COLUMN_ITEM_ID].nunique(), num_features=algo1_contexts.shape[1])

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo1_optimized.train(train_100_100_1k, contexts=algo1_contexts)
memory_used_algo1_optimized = get_memory_usage() - start_memory_usage
results_algo1_optimized = algo1_optimized.recommend(users_ids=test_100_100_1k[src.COLUMN_USER_ID], contexts=algo1_contexts_rec)

total_time_algo1_optimized = time.time() - start_time

print('recs iguais ?')
print(np.array_equal(results_algo1[0], results_algo1_optimized[0]))
print(np.allclose(results_algo1[1], results_algo1_optimized[1]))
print('\n\n\n-----------------------------------\n')

print(f"Time taken by 100 users, 100 items and 1k interactions: {total_time_algo1_optimized:.2f} seconds ({total_time_algo1 / total_time_algo1_optimized:.2f}x mais rápido)")
print(f"Memory used by 100 users, 100 items and 1k interactions: {memory_used_algo1_optimized:.2f} MB ({memory_used_algo1 / (memory_used_algo1_optimized + 0.00001):.2f}x menos memória)")

init demorou 0.5228850841522217 segundos
passar para cuda demorou 0.0006272792816162109 segundos
A add demorou 0.08681988716125488 segundos
Xty demorou 0.0003108978271484375 segundos
beta demorou 0.03995251655578613 segundos
beta demorou 0.0002601146697998047 segundos
beta demorou 0.00021791458129882812 segundos
beta demorou 0.00021123886108398438 segundos
beta demorou 0.0002090930938720703 segundos
beta demorou 0.00020837783813476562 segundos
beta demorou 0.00020813941955566406 segundos
beta demorou 0.00020694732666015625 segundos
beta demorou 0.00020766258239746094 segundos
beta demorou 0.00020694732666015625 segundos
beta demorou 0.00020742416381835938 segundos
beta demorou 0.003587484359741211 segundos
beta demorou 0.0003743171691894531 segundos
beta demorou 0.0002224445343017578 segundos
beta demorou 0.00021266937255859375 segundos
beta demorou 0.00020694732666015625 segundos
beta demorou 0.0002086162567138672 segundos
beta demorou 0.0002086162567138672 segundos
beta demorou 0.000

In [24]:
ITEMS_PER_BATCH = 1_000
start_memory_usage = get_memory_usage()
algo2_optimized = LinOptimized(train_1k_1k_10k[src.COLUMN_USER_ID].nunique(), train_1k_1k_10k[src.COLUMN_ITEM_ID].nunique(), num_features=algo2_contexts.shape[1])

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo2_optimized.train(train_1k_1k_10k, contexts=algo2_contexts)
memory_used_algo2_optimized = get_memory_usage() - start_memory_usage
results_algo2_optimized = algo2_optimized.recommend(users_ids=test_1k_1k_10k[src.COLUMN_USER_ID], contexts=algo2_contexts_rec)

total_time_algo2_optimized = time.time() - start_time

print('recs iguais ?')
print(np.array_equal(results_algo2[0], results_algo2_optimized[0]))
print(np.allclose(results_algo2[1], results_algo2_optimized[1]))

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 1k users, 1k items and 10k interactions: {total_time_algo2_optimized:.2f} seconds ({total_time_algo2 / total_time_algo2_optimized:.2f}x mais rápido)")
print(f"Memory used by 1k users, 1k items and 10k interactions: {memory_used_algo2_optimized:.2f} MB ({memory_used_algo2 / (memory_used_algo2_optimized + 0.00001):.2f}x menos memória)")

init demorou 0.022382020950317383 segundos
passar para cuda demorou 0.0006356239318847656 segundos
A add demorou 0.0002415180206298828 segundos
Xty demorou 0.00012087821960449219 segundos
beta demorou 0.0010783672332763672 segundos
parallel_fit demorou 0.002260446548461914 segundos
predict_expectations demorou 0.0007431507110595703 segundos
Exclude mask demorou 0.0003573894500732422 segundos
Ordenação top-K demorou 0.0013833045959472656 segundos
recs iguais ?
True
True



-----------------------------------------------------------

Time taken by 1k users, 1k items and 10k interactions: 0.04 seconds (17.19x mais rápido)
Memory used by 1k users, 1k items and 10k interactions: 0.00 MB (388281.25x menos memória)


In [27]:
ITEMS_PER_BATCH = 10_000
start_memory_usage = get_memory_usage()
algo3_optimized = LinOptimized(train_10k_10k_100k[src.COLUMN_USER_ID].nunique(), train_10k_10k_100k[src.COLUMN_ITEM_ID].nunique(), algo3_contexts.shape[1])

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo3_optimized.train(train_10k_10k_100k, contexts=algo3_contexts)
memory_used_algo3_optimized = get_memory_usage() - start_memory_usage
results_algo3_optimized = algo3_optimized.recommend(users_ids=test_10k_10k_100k[src.COLUMN_USER_ID], contexts=algo3_contexts_rec)

total_time_algo3_optimized = time.time() - start_time

print('recs iguais ?')
print(np.array_equal(results_algo3[0], results_algo3_optimized[0]))
print(np.allclose(results_algo3[1], results_algo3_optimized[1]))

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 10k users, 10k items and 100k interactions: {total_time_algo3_optimized:.2f} seconds ({total_time_algo3 / total_time_algo3_optimized:.2f}x mais rápido)")
print(f"Memory used by 10k users, 10k items and 100k interactions: {memory_used_algo3_optimized:.2f} MB ({memory_used_algo3 / (memory_used_algo3_optimized + 0.0001):.2f}x menos memória)")

init demorou 0.0004601478576660156 segundos
passar para cuda demorou 0.0010037422180175781 segundos
A add demorou 0.00018596649169921875 segundos
Xty demorou 7.700920104980469e-05 segundos
beta demorou 0.0019032955169677734 segundos
parallel_fit demorou 0.0032927989959716797 segundos
predict_expectations demorou 0.0013937950134277344 segundos
Exclude mask demorou 0.03150129318237305 segundos
Ordenação top-K demorou 0.046089887619018555 segundos
recs iguais ?
True
True



-----------------------------------------------------------

Time taken by 10k users, 10k items and 100k interactions: 0.36 seconds (67.45x mais rápido)
Memory used by 10k users, 10k items and 100k interactions: -0.75 MB (-30.40x menos memória)


In [23]:
start_memory_usage = get_memory_usage()
algo4_optimized = LinOptimized(train_20k_20k_250k[src.COLUMN_ITEM_ID].nunique(), algo4_contexts.shape[1])

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo4_optimized.train(train_20k_20k_250k, contexts=algo4_contexts)
memory_used_algo4_optimized = get_memory_usage() - start_memory_usage
start_time_2 = time.time()
results_algo4_optimized = algo4_optimized.recommend(users_ids=test_20k_20k_250k[src.COLUMN_USER_ID], contexts=algo4_contexts_rec)
print(time.time() - start_time_2)

total_time_algo4_optimized = time.time() - start_time

print('recs iguais ?')
print(np.array_equal(results_algo4[0], results_algo4_optimized[0]))
print(np.allclose(results_algo4[1], results_algo4_optimized[1]))

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 20k users, 20k items and 250k interactions: {total_time_algo4_optimized:.2f} seconds ({total_time_algo4 / total_time_algo4_optimized:.2f}x mais rápido)")
print(f"Memory used by 20k users, 20k items and 250k interactions: {memory_used_algo4_optimized:.2f} MB ({memory_used_algo4 / (memory_used_algo4_optimized + 0.0001):.2f}x menos memória)")

parallel_fit demorou 0.11179614067077637 segundos
predict_expectations demorou 0.006095409393310547 segundos
Exclude mask demorou 0.15730619430541992 segundos
Ordenação top-K demorou 0.24439501762390137 segundos
0.4246556758880615
recs iguais ?
True
True



-----------------------------------------------------------

Time taken by 20k users, 20k items and 250k interactions: 0.59 seconds (190.22x mais rápido)
Memory used by 20k users, 20k items and 250k interactions: 0.87 MB (47.00x menos memória)


In [17]:
BATCH_SIZE = 50_000
start_memory_usage = get_memory_usage()
algo5 = Lin()

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo5.train(train_10k_50k_250k, contexts=algo5_contexts)
memory_used_algo5 = get_memory_usage() - start_memory_usage

recommendations_ids5 = np.empty((test_10k_50k_250k.shape[0], src.TOP_N), dtype=int)
recommendations_scores5 = np.empty((test_10k_50k_250k.shape[0], src.TOP_N), dtype=float)

for j in range(0, len(test_10k_50k_250k), BATCH_SIZE):
    batch_df = test_10k_50k_250k.iloc[j:j + BATCH_SIZE]
    batch_contexts = algo5_contexts_rec[j:j + BATCH_SIZE]
    results_algo5 = algo5.recommend(users_ids=batch_df[src.COLUMN_USER_ID], contexts=batch_contexts)
    recommendations_ids5[j:j + BATCH_SIZE] = results_algo5[0]
    recommendations_scores5[j:j + BATCH_SIZE] = results_algo5[1]

total_time_algo5 = time.time() - start_time

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 10k users, 50k items and 250k interactions: {total_time_algo5:.2f} seconds")
print(f"Memory used by 10k users, 50k items and 250k interactions: {memory_used_algo5:.2f} MB")

init demorou 18.531693935394287 segundos
reset_arm_to_status demorou 3.814697265625e-06 segundos
parallel_fit demorou 99.5164840221405 segundos
Gerar as predições demorou 75.51276659965515 segundos
predict_expectations demorou 75.55166602134705 segundos
Ordenação top-K demorou 32.233283281326294 segundos
gerar lista de recomendações demorou 0.9358096122741699 segundos
Gerar as predições demorou 87.14873933792114 segundos
predict_expectations demorou 87.16777181625366 segundos
Ordenação top-K demorou 59.73408222198486 segundos
gerar lista de recomendações demorou 0.7291140556335449 segundos
Gerar as predições demorou 30.094655752182007 segundos
predict_expectations demorou 30.234052181243896 segundos
Ordenação top-K demorou 12.389704942703247 segundos
gerar lista de recomendações demorou 0.6723012924194336 segundos



-----------------------------------------------------------

Time taken by 10k users, 50k items and 250k interactions: 423.04 seconds
Memory used by 10k users, 50k items a

In [19]:
TRAIN_BATCH_SIZE = 50_000
RECOMMEND_BATCH_SIZE = 28_000
ITEMS_PER_BATCH = 25_000
start_memory_usage = get_memory_usage()
algo5_optimized = LinOptimized(train_10k_50k_250k[src.COLUMN_USER_ID].nunique(), train_10k_50k_250k[src.COLUMN_ITEM_ID].nunique(), algo5_contexts.shape[1])

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
for j in range(0, len(train_10k_50k_250k), TRAIN_BATCH_SIZE):
    batch_df = train_10k_50k_250k.iloc[j:j + TRAIN_BATCH_SIZE]
    batch_contexts = algo5_contexts[j:j + TRAIN_BATCH_SIZE]
    algo5_optimized.train(batch_df, contexts=batch_contexts)
#algo5_optimized.train(train_10k_50k_250k, contexts=algo5_contexts)
memory_used_algo5_optimized = get_memory_usage() - start_memory_usage

recommendations_ids5_optimized = np.empty((test_10k_50k_250k.shape[0], src.TOP_N), dtype=int)
recommendations_scores5_optimized = np.empty((test_10k_50k_250k.shape[0], src.TOP_N), dtype=float)

for j in range(0, len(test_10k_50k_250k), RECOMMEND_BATCH_SIZE):
    batch_df = test_10k_50k_250k.iloc[j:j + RECOMMEND_BATCH_SIZE]
    batch_contexts = algo5_contexts_rec[j:j + RECOMMEND_BATCH_SIZE]
    results_algo5_optimized = algo5_optimized.recommend(users_ids=batch_df[src.COLUMN_USER_ID], contexts=batch_contexts)
    recommendations_ids5_optimized[j:j + RECOMMEND_BATCH_SIZE] = results_algo5_optimized[0]
    recommendations_scores5_optimized[j:j + RECOMMEND_BATCH_SIZE] = results_algo5_optimized[1]

total_time_algo5_optimized = time.time() - start_time

print('recs iguais ?')
print(np.array_equal(recommendations_ids5, recommendations_ids5_optimized))
print(np.allclose(recommendations_scores5, recommendations_scores5_optimized))

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 10k users, 50k items and 250k interactions: {total_time_algo5_optimized:.2f} seconds ({total_time_algo5 / total_time_algo5_optimized:.2f}x mais rápido)")
print(f"Memory used by 10k users, 50k items and 250k interactions: {memory_used_algo5_optimized:.2f} MB")

init demorou 0.021104812622070312 segundos
passar para cuda demorou 0.04938936233520508 segundos
A add demorou 0.0002582073211669922 segundos
Xty demorou 8.535385131835938e-05 segundos
beta demorou 0.2589085102081299 segundos
beta demorou 0.2113180160522461 segundos
parallel_fit demorou 0.5202620029449463 segundos
passar para cuda demorou 0.00793910026550293 segundos
A add demorou 0.0002491474151611328 segundos
Xty demorou 8.177757263183594e-05 segundos
beta demorou 0.2349529266357422 segundos
beta demorou 0.20102715492248535 segundos
parallel_fit demorou 0.4446594715118408 segundos
passar para cuda demorou 0.0037581920623779297 segundos
A add demorou 0.00017595291137695312 segundos
Xty demorou 6.151199340820312e-05 segundos
beta demorou 0.21686267852783203 segundos
beta demorou 0.20102429389953613 segundos
parallel_fit demorou 0.42223429679870605 segundos
predict_expectations demorou 0.004850625991821289 segundos
Exclude mask demorou 0.6706581115722656 segundos
Ordenação top-K demorou

## Testes LinGreedy

In [15]:
start_memory_usage = get_memory_usage()
algo1 = LinGreedy(hyperparameters={'epsilon': 0.01})

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo1.train(train_100_100_1k, contexts=algo1_contexts)
memory_used_algo1 = get_memory_usage() - start_memory_usage
results_algo1 = algo1.recommend(users_ids=test_100_100_1k[src.COLUMN_USER_ID], contexts=algo1_contexts_rec)

total_time_algo1 = time.time() - start_time

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 100 users, 100 items and 1k interactions: {total_time_algo1:.2f} seconds")
print(f"Memory used by 100 users, 100 items and 1k interactions: {memory_used_algo1:.2f} MB")

init demorou 0.0028831958770751953 segundos
reset_arm_to_status demorou 1.430511474609375e-06 segundos
parallel_fit demorou 0.05723381042480469 segundos
Gerar as predições demorou 0.0014731884002685547 segundos
predict_expectations demorou 0.0017085075378417969 segundos
Ordenação top-K demorou 0.008001565933227539 segundos
gerar lista de recomendações demorou 0.014766693115234375 segundos



-----------------------------------------------------------

Time taken by 100 users, 100 items and 1k interactions: 0.36 seconds
Memory used by 100 users, 100 items and 1k interactions: 2.14 MB


In [None]:
start_memory_usage = get_memory_usage()
algo1_optimized = LinGreedyOptimized(hyperparameters={'epsilon': 0.01})

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo1_optimized.train(train_100_100_1k, contexts=algo1_contexts)
memory_used_algo1_optimized = get_memory_usage() - start_memory_usage
results_algo1_optimized = algo1_optimized.recommend(users_ids=test_100_100_1k[src.COLUMN_USER_ID], contexts=algo1_contexts_rec)

total_time_algo1_optimized = time.time() - start_time

print('recs iguais ?')
print(np.array_equal(results_algo1[0], results_algo1_optimized[0]))
print(np.allclose(results_algo1[1], results_algo1_optimized[1]))
print('\n\n\n-----------------------------------\n')

print(f"Time taken by 100 users, 100 items and 1k interactions: {total_time_algo1_optimized:.2f} seconds ({total_time_algo1 / total_time_algo1_optimized:.2f}x mais rápido)")
print(f"Memory used by 100 users, 100 items and 1k interactions: {memory_used_algo1_optimized:.2f} MB ({memory_used_algo1 / (memory_used_algo1_optimized + 0.00001):.2f}x menos memória)")

## Testes LinUcb

In [25]:
start_memory_usage = get_memory_usage()
algo1 = LinUCB(hyperparameters={})

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo1.train(train_100_100_1k, contexts=algo1_contexts)
memory_used_algo1 = get_memory_usage() - start_memory_usage
results_algo1 = algo1.recommend(users_ids=test_100_100_1k[src.COLUMN_USER_ID], contexts=algo1_contexts_rec)

total_time_algo1 = time.time() - start_time

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 100 users, 100 items and 1k interactions: {total_time_algo1:.2f} seconds")
print(f"Memory used by 100 users, 100 items and 1k interactions: {memory_used_algo1:.2f} MB")

init demorou 0.0022423267364501953 segundos
reset_arm_to_status demorou 1.1920928955078125e-06 segundos
parallel_fit demorou 0.047121286392211914 segundos
Gerar as predições demorou 0.0074520111083984375 segundos
predict_expectations demorou 0.007620334625244141 segundos
Ordenação top-K demorou 0.0013415813446044922 segundos
gerar lista de recomendações demorou 0.008218765258789062 segundos



-----------------------------------------------------------

Time taken by 100 users, 100 items and 1k interactions: 0.09 seconds
Memory used by 100 users, 100 items and 1k interactions: 0.25 MB


In [29]:
ITEMS_PER_BATCH = 1
start_memory_usage = get_memory_usage()
algo1_optimized = LinUCBOptimized(train_100_100_1k[src.COLUMN_USER_ID].nunique(), train_100_100_1k[src.COLUMN_ITEM_ID].nunique(), num_features=algo1_contexts.shape[1])

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo1_optimized.train(train_100_100_1k, contexts=algo1_contexts)
memory_used_algo1_optimized = get_memory_usage() - start_memory_usage
results_algo1_optimized = algo1_optimized.recommend(users_ids=test_100_100_1k[src.COLUMN_USER_ID], contexts=algo1_contexts_rec)

total_time_algo1_optimized = time.time() - start_time

print('recs iguais ?')
print(np.array_equal(results_algo1[0], results_algo1_optimized[0]))
print(np.allclose(results_algo1[1], results_algo1_optimized[1]))
print('\n\n\n-----------------------------------\n')

print(f"Time taken by 100 users, 100 items and 1k interactions: {total_time_algo1_optimized:.2f} seconds ({total_time_algo1 / total_time_algo1_optimized:.2f}x mais rápido)")
print(f"Memory used by 100 users, 100 items and 1k interactions: {memory_used_algo1_optimized:.2f} MB ({memory_used_algo1 / (memory_used_algo1_optimized + 0.00001):.2f}x menos memória)")

init demorou 0.023614168167114258 segundos
passar para cuda demorou 0.0005590915679931641 segundos
A add demorou 0.00029540061950683594 segundos
Xty demorou 0.0001354217529296875 segundos
beta demorou 0.020980119705200195 segundos
beta demorou 0.0005040168762207031 segundos
beta demorou 0.00045561790466308594 segundos
beta demorou 0.0004436969757080078 segundos
beta demorou 0.0004417896270751953 segundos
beta demorou 0.0004360675811767578 segundos
beta demorou 0.0004553794860839844 segundos
beta demorou 0.0004355907440185547 segundos
beta demorou 0.00041294097900390625 segundos
beta demorou 0.00040984153747558594 segundos
beta demorou 0.00041031837463378906 segundos
beta demorou 0.0004119873046875 segundos
beta demorou 0.0004189014434814453 segundos
beta demorou 0.00040078163146972656 segundos
beta demorou 0.00040268898010253906 segundos
beta demorou 0.00043082237243652344 segundos
beta demorou 0.00038170814514160156 segundos
beta demorou 0.000385284423828125 segundos
beta demorou 0.00

In [16]:
BATCH_SIZE = 1_000
start_memory_usage = get_memory_usage()
algo5 = LinUCB()

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo5.train(train_10k_50k_250k, contexts=algo5_contexts)
memory_used_algo5 = get_memory_usage() - start_memory_usage

recommendations_ids5 = np.zeros((test_10k_50k_250k.shape[0], src.TOP_N), dtype=int)
recommendations_scores5 = np.zeros((test_10k_50k_250k.shape[0], src.TOP_N), dtype=float)

for j in range(0, len(test_10k_50k_250k), BATCH_SIZE):
    batch_df = test_10k_50k_250k.iloc[j:j + BATCH_SIZE]
    batch_contexts = algo5_contexts_rec[j:j + BATCH_SIZE]
    results_algo5 = algo5.recommend(users_ids=batch_df[src.COLUMN_USER_ID], contexts=batch_contexts)
    recommendations_ids5[j:j + BATCH_SIZE] = results_algo5[0]
    recommendations_scores5[j:j + BATCH_SIZE] = results_algo5[1]
    break

total_time_algo5 = time.time() - start_time

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 10k users, 50k items and 250k interactions: {total_time_algo5:.2f} seconds")
print(f"Memory used by 10k users, 50k items and 250k interactions: {memory_used_algo5:.2f} MB")

init demorou 12.653273582458496 segundos
reset_arm_to_status demorou 4.0531158447265625e-06 segundos
parallel_fit demorou 109.72455358505249 segundos
Gerar as predições demorou 83.270334482193 segundos
predict_expectations demorou 83.27344107627869 segundos
Ordenação top-K demorou 0.358318567276001 segundos
gerar lista de recomendações demorou 0.01276850700378418 segundos



-----------------------------------------------------------

Time taken by 10k users, 50k items and 250k interactions: 206.89 seconds
Memory used by 10k users, 50k items and 250k interactions: 12720.37 MB


In [17]:
TRAIN_BATCH_SIZE = 50_000
RECOMMEND_BATCH_SIZE = 1_000
ITEMS_PER_BATCH = 5_000
start_memory_usage = get_memory_usage()
algo5_optimized = LinUCBOptimized(train_10k_50k_250k[src.COLUMN_USER_ID].nunique(), train_10k_50k_250k[src.COLUMN_ITEM_ID].nunique(), algo5_contexts.shape[1])

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
for j in range(0, len(train_10k_50k_250k), TRAIN_BATCH_SIZE):
    batch_df = train_10k_50k_250k.iloc[j:j + TRAIN_BATCH_SIZE]
    batch_contexts = algo5_contexts[j:j + TRAIN_BATCH_SIZE]
    algo5_optimized.train(batch_df, contexts=batch_contexts)
#algo5_optimized.train(train_10k_50k_250k, contexts=algo5_contexts)
memory_used_algo5_optimized = get_memory_usage() - start_memory_usage

recommendations_ids5_optimized = np.zeros((test_10k_50k_250k.shape[0], src.TOP_N), dtype=int)
recommendations_scores5_optimized = np.zeros((test_10k_50k_250k.shape[0], src.TOP_N), dtype=float)

for j in range(0, len(test_10k_50k_250k), RECOMMEND_BATCH_SIZE):
    batch_df = test_10k_50k_250k.iloc[j:j + RECOMMEND_BATCH_SIZE]
    batch_contexts = algo5_contexts_rec[j:j + RECOMMEND_BATCH_SIZE]
    results_algo5_optimized = algo5_optimized.recommend(users_ids=batch_df[src.COLUMN_USER_ID], contexts=batch_contexts)
    recommendations_ids5_optimized[j:j + RECOMMEND_BATCH_SIZE] = results_algo5_optimized[0]
    recommendations_scores5_optimized[j:j + RECOMMEND_BATCH_SIZE] = results_algo5_optimized[1]
    break

total_time_algo5_optimized = time.time() - start_time

print('recs iguais ?')
print(np.array_equal(recommendations_ids5, recommendations_ids5_optimized))
print(np.allclose(recommendations_scores5, recommendations_scores5_optimized))

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 10k users, 50k items and 250k interactions: {total_time_algo5_optimized:.2f} seconds ({total_time_algo5 / total_time_algo5_optimized:.2f}x mais rápido)")
print(f"Memory used by 10k users, 50k items and 250k interactions: {memory_used_algo5_optimized:.2f} MB")

init demorou 0.43347907066345215 segundos
passar para cuda demorou 0.019133329391479492 segundos
A add demorou 0.03985595703125 segundos
Xty demorou 0.0002586841583251953 segundos
beta demorou 0.11709332466125488 segundos
beta demorou 0.03383684158325195 segundos
beta demorou 0.03361153602600098 segundos
beta demorou 0.0335845947265625 segundos
beta demorou 0.032950639724731445 segundos
beta demorou 0.030330896377563477 segundos
beta demorou 0.03026580810546875 segundos
beta demorou 0.03025650978088379 segundos
beta demorou 0.030420303344726562 segundos
beta demorou 0.030422687530517578 segundos
parallel_fit demorou 0.46248936653137207 segundos
passar para cuda demorou 0.007070302963256836 segundos
A add demorou 0.00021910667419433594 segundos
Xty demorou 6.556510925292969e-05 segundos
beta demorou 0.06274843215942383 segundos
beta demorou 0.030429363250732422 segundos
beta demorou 0.030375242233276367 segundos
beta demorou 0.03040480613708496 segundos
beta demorou 0.030420780181884766

## Testes LinTS

In [15]:
TRAIN_BATCH_SIZE = 50_000
RECOMMEND_BATCH_SIZE = 1_000
ITEMS_PER_BATCH = 1
INTERACTIONS_PER_BATCH_LINTS = 1

from mabwiser.linear import _LinTS
from mabwiser.utils import create_rng

algo1 = _LinTSOptimized(create_rng(src.RANDOM_STATE), 1)
algo1.init(10, 1)
algo1.fit(train_100_100_1k[src.COLUMN_ITEM_ID][train_100_100_1k[src.COLUMN_ITEM_ID] == 0].values, algo1_contexts[train_100_100_1k[src.COLUMN_ITEM_ID] == 0], train_100_100_1k[src.COLUMN_RATING][train_100_100_1k[src.COLUMN_ITEM_ID] == 0].values)
scores1 = algo1.predict(algo1_contexts_rec)

algo2 = _LinTSOptimized(create_rng(src.RANDOM_STATE), 1)
algo2.init(10, 1)
algo2.fit(train_100_100_1k[src.COLUMN_ITEM_ID][train_100_100_1k[src.COLUMN_ITEM_ID] == 0].values, algo1_contexts[train_100_100_1k[src.COLUMN_ITEM_ID] == 0], train_100_100_1k[src.COLUMN_RATING][train_100_100_1k[src.COLUMN_ITEM_ID] == 0].values)
scores2 = algo2.predict(algo1_contexts_rec)

print(torch.allclose(torch.tensor(scores1, device='cuda'), scores2))

init demorou 0.37107110023498535 segundos
passar para cuda demorou 0.0015044212341308594 segundos
A add demorou 0.11273717880249023 segundos
Xty demorou 0.00038909912109375 segundos
beta demorou 0.08366703987121582 segundos
init demorou 0.0004889965057373047 segundos
passar para cuda demorou 0.0007333755493164062 segundos
A add demorou 0.00021529197692871094 segundos
Xty demorou 0.00011515617370605469 segundos
beta demorou 0.0006401538848876953 segundos
True


  print(torch.allclose(torch.tensor(scores1, device='cuda'), scores2))


In [16]:
start_memory_usage = get_memory_usage()
algo1 = LinTS(hyperparameters={'alpha': 0.0005})

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo1.train(train_100_100_1k, contexts=algo1_contexts)
memory_used_algo1 = get_memory_usage() - start_memory_usage
results_algo1 = algo1.recommend(users_ids=test_100_100_1k[src.COLUMN_USER_ID], contexts=algo1_contexts_rec)

total_time_algo1 = time.time() - start_time

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 100 users, 100 items and 1k interactions: {total_time_algo1:.2f} seconds")
print(f"Memory used by 100 users, 100 items and 1k interactions: {memory_used_algo1:.2f} MB")

init demorou 0.001753091812133789 segundos
reset_arm_to_status demorou 4.76837158203125e-07 segundos
parallel_fit demorou 0.04980802536010742 segundos
Gerar as predições demorou 0.039191246032714844 segundos
predict_expectations demorou 0.03983616828918457 segundos
Ordenação top-K demorou 0.0018582344055175781 segundos
gerar lista de recomendações demorou 0.016396522521972656 segundos



-----------------------------------------------------------

Time taken by 100 users, 100 items and 1k interactions: 0.19 seconds
Memory used by 100 users, 100 items and 1k interactions: 1.69 MB


In [17]:
TRAIN_BATCH_SIZE = 3
RECOMMEND_BATCH_SIZE = 3
ITEMS_PER_BATCH = 14
start_memory_usage = get_memory_usage()
algo1_optimized = LinTSOptimized(train_100_100_1k[src.COLUMN_USER_ID].nunique(), train_100_100_1k[src.COLUMN_ITEM_ID].nunique(), algo1_contexts.shape[1], hyperparameters={'alpha': 0.0005})

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo1_optimized.train(train_100_100_1k, contexts=algo1_contexts)
memory_used_algo1_optimized = get_memory_usage() - start_memory_usage
results_algo1_optimized = algo1_optimized.recommend(users_ids=test_100_100_1k[src.COLUMN_USER_ID], contexts=algo1_contexts_rec)

total_time_algo1_optimized = time.time() - start_time

print('recs iguais ?')
print(np.array_equal(results_algo1[0], results_algo1_optimized[0]))
print(np.allclose(results_algo1[1], results_algo1_optimized[1]))
print('\n\n\n-----------------------------------\n')

print(f"Time taken by 100 users, 100 items and 1k interactions: {total_time_algo1_optimized:.2f} seconds ({total_time_algo1 / total_time_algo1_optimized:.2f}x mais rápido)")
print(f"Memory used by 100 users, 100 items and 1k interactions: {memory_used_algo1_optimized:.2f} MB ({memory_used_algo1 / (memory_used_algo1_optimized + 0.00001):.2f}x menos memória)")

init demorou 0.0006394386291503906 segundos
passar para cuda demorou 0.0006206035614013672 segundos
A add demorou 0.0005955696105957031 segundos
Xty demorou 0.0001201629638671875 segundos
beta demorou 0.0017261505126953125 segundos
beta demorou 0.0003895759582519531 segundos
beta demorou 0.00037360191345214844 segundos
beta demorou 0.00038743019104003906 segundos
beta demorou 0.0003604888916015625 segundos
beta demorou 0.00037932395935058594 segundos
beta demorou 0.0003757476806640625 segundos
beta demorou 0.0004506111145019531 segundos
parallel_fit demorou 0.0062296390533447266 segundos
predict_expectations demorou 0.008992910385131836 segundos
Exclude mask demorou 0.017832040786743164 segundos
Ordenação top-K demorou 0.2548861503601074 segundos
recs iguais ?
False
False



-----------------------------------

Time taken by 100 users, 100 items and 1k interactions: 0.29 seconds (0.65x mais rápido)
Memory used by 100 users, 100 items and 1k interactions: 1.01 MB (1.67x menos memória)


In [18]:
results_algo1_optimized[0]

array([[26, 59, 54, ..., 27, 24, 78],
       [58, 20, 74, ..., 82, 99, 54],
       [35, 78, 67, ..., 45,  0, 22],
       ...,
       [54, 99, 98, ..., 60, 58, 45],
       [74, 34, 60, ..., 54,  2, 78],
       [18, 78, 35, ..., 30, 45, 27]])

In [19]:
results_algo1[0]

[[26,
  59,
  54,
  7,
  99,
  85,
  25,
  34,
  87,
  60,
  52,
  16,
  18,
  20,
  77,
  89,
  30,
  27,
  24,
  78],
 [58,
  20,
  74,
  60,
  18,
  59,
  35,
  17,
  50,
  2,
  23,
  98,
  13,
  21,
  37,
  27,
  31,
  82,
  99,
  54],
 [35,
  78,
  67,
  54,
  74,
  77,
  73,
  34,
  2,
  98,
  20,
  52,
  15,
  60,
  94,
  24,
  27,
  45,
  0,
  22],
 [18,
  34,
  74,
  15,
  54,
  38,
  31,
  49,
  98,
  46,
  35,
  45,
  7,
  60,
  69,
  14,
  25,
  30,
  12,
  61],
 [34,
  78,
  59,
  54,
  20,
  77,
  18,
  99,
  60,
  35,
  27,
  94,
  74,
  98,
  55,
  15,
  25,
  37,
  30,
  67],
 [58,
  78,
  18,
  2,
  20,
  74,
  35,
  60,
  22,
  37,
  21,
  23,
  34,
  59,
  98,
  31,
  13,
  6,
  50,
  17],
 [99,
  54,
  20,
  15,
  18,
  30,
  60,
  2,
  78,
  16,
  34,
  35,
  56,
  31,
  59,
  49,
  55,
  57,
  46,
  45],
 [18,
  54,
  99,
  34,
  22,
  58,
  98,
  74,
  35,
  2,
  38,
  15,
  49,
  51,
  56,
  69,
  20,
  31,
  73,
  60],
 [37,
  18,
  35,
  22,
  15,
  38,
  55,

In [None]:
BATCH_SIZE = 1_000
start_memory_usage = get_memory_usage()
algo5 = LinTS(hyperparameters={'alpha': 0.00005})

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
algo5.train(train_10k_50k_250k, contexts=algo5_contexts)
memory_used_algo5 = get_memory_usage() - start_memory_usage

recommendations_ids5 = np.zeros((test_10k_50k_250k.shape[0], src.TOP_N), dtype=int)
recommendations_scores5 = np.zeros((test_10k_50k_250k.shape[0], src.TOP_N), dtype=float)

for j in range(0, len(test_10k_50k_250k), BATCH_SIZE):
    batch_df = test_10k_50k_250k.iloc[j:j + BATCH_SIZE]
    batch_contexts = algo5_contexts_rec[j:j + BATCH_SIZE]
    results_algo5 = algo5.recommend(users_ids=batch_df[src.COLUMN_USER_ID], contexts=batch_contexts)
    recommendations_ids5[j:j + BATCH_SIZE] = results_algo5[0]
    recommendations_scores5[j:j + BATCH_SIZE] = results_algo5[1]
    break

total_time_algo5 = time.time() - start_time

print('\n\n\n-----------------------------------------------------------\n')
print(f"Time taken by 10k users, 50k items and 250k interactions: {total_time_algo5:.2f} seconds")
print(f"Memory used by 10k users, 50k items and 250k interactions: {memory_used_algo5:.2f} MB")

In [20]:
TRAIN_BATCH_SIZE = 50_000
RECOMMEND_BATCH_SIZE = 50
ITEMS_PER_BATCH = 1_000
start_memory_usage = get_memory_usage()
algo5_optimized = LinTSOptimized(train_10k_50k_250k[src.COLUMN_USER_ID].nunique(), train_10k_50k_250k[src.COLUMN_ITEM_ID].nunique(), algo5_contexts.shape[1], hyperparameters={'alpha': 0.00005})

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
for j in range(0, len(train_10k_50k_250k), TRAIN_BATCH_SIZE):
    batch_df = train_10k_50k_250k.iloc[j:j + TRAIN_BATCH_SIZE]
    batch_contexts = algo5_contexts[j:j + TRAIN_BATCH_SIZE]
    algo5_optimized.train(batch_df, contexts=batch_contexts)
#algo5_optimized.train(train_10k_50k_250k, contexts=algo5_contexts)
memory_used_algo5_optimized = get_memory_usage() - start_memory_usage

recommendations_ids5_optimized = np.zeros((test_10k_50k_250k.shape[0], src.TOP_N), dtype=int)
recommendations_scores5_optimized = np.zeros((test_10k_50k_250k.shape[0], src.TOP_N), dtype=float)

for j in range(0, len(test_10k_50k_250k), RECOMMEND_BATCH_SIZE):
    batch_df = test_10k_50k_250k.iloc[j:j + RECOMMEND_BATCH_SIZE]
    batch_contexts = algo5_contexts_rec[j:j + RECOMMEND_BATCH_SIZE]
    results_algo5_optimized = algo5_optimized.recommend(users_ids=batch_df[src.COLUMN_USER_ID], contexts=batch_contexts)
    recommendations_ids5_optimized[j:j + RECOMMEND_BATCH_SIZE] = results_algo5_optimized[0]
    recommendations_scores5_optimized[j:j + RECOMMEND_BATCH_SIZE] = results_algo5_optimized[1]
    break

total_time_algo5_optimized = time.time() - start_time

#print('recs iguais ?')
#print(np.array_equal(recommendations_ids5, recommendations_ids5_optimized))
#print(np.allclose(recommendations_scores5, recommendations_scores5_optimized))

#print('\n\n\n-----------------------------------------------------------\n')
#print(f"Time taken by 10k users, 50k items and 250k interactions: {total_time_algo5_optimized:.2f} seconds ({total_time_algo5 / total_time_algo5_optimized:.2f}x mais rápido)")
#print(f"Memory used by 10k users, 50k items and 250k interactions: {memory_used_algo5_optimized:.2f} MB")

init demorou 0.040284156799316406 segundos
passar para cuda demorou 0.024843931198120117 segundos
A add demorou 0.0010445117950439453 segundos
Xty demorou 8.702278137207031e-05 segundos
beta demorou 0.0658121109008789 segundos
beta demorou 0.009865760803222656 segundos
beta demorou 0.009853363037109375 segundos
beta demorou 0.009863853454589844 segundos
beta demorou 0.009884357452392578 segundos
beta demorou 0.009868383407592773 segundos
beta demorou 0.009876012802124023 segundos
beta demorou 0.009832143783569336 segundos
beta demorou 0.009868860244750977 segundos
beta demorou 0.009895563125610352 segundos
beta demorou 0.009325027465820312 segundos
beta demorou 0.009111642837524414 segundos
beta demorou 0.009108543395996094 segundos
beta demorou 0.00915384292602539 segundos
beta demorou 0.0091094970703125 segundos
beta demorou 0.009120702743530273 segundos
beta demorou 0.009132623672485352 segundos
beta demorou 0.009146690368652344 segundos
beta demorou 0.009175300598144531 segundos
be

In [None]:
TRAIN_BATCH_SIZE = 50_000
RECOMMEND_BATCH_SIZE = 50
ITEMS_PER_BATCH = 2_000
start_memory_usage = get_memory_usage()
algo5_optimized = LinTSOptimized(train_10k_50k_250k[src.COLUMN_USER_ID].nunique(), train_10k_50k_250k[src.COLUMN_ITEM_ID].nunique(), algo5_contexts.shape[1], hyperparameters={'alpha': 0.00005})

# Treinando o modelo com o dataset de 100 usuários e 100 itens
start_time = time.time()
for j in range(0, len(train_10k_50k_250k), TRAIN_BATCH_SIZE):
    batch_df = train_10k_50k_250k.iloc[j:j + TRAIN_BATCH_SIZE]
    batch_contexts = algo5_contexts[j:j + TRAIN_BATCH_SIZE]
    algo5_optimized.train(batch_df, contexts=batch_contexts)
#algo5_optimized.train(train_10k_50k_250k, contexts=algo5_contexts)
memory_used_algo5_optimized = get_memory_usage() - start_memory_usage

recommendations_ids5_optimized = np.zeros((test_10k_50k_250k.shape[0], src.TOP_N), dtype=int)
recommendations_scores5_optimized = np.zeros((test_10k_50k_250k.shape[0], src.TOP_N), dtype=float)

for j in range(0, len(test_10k_50k_250k), RECOMMEND_BATCH_SIZE):
    batch_df = test_10k_50k_250k.iloc[j:j + RECOMMEND_BATCH_SIZE]
    batch_contexts = algo5_contexts_rec[j:j + RECOMMEND_BATCH_SIZE]
    results_algo5_optimized2 = algo5_optimized.recommend(users_ids=batch_df[src.COLUMN_USER_ID], contexts=batch_contexts)
    recommendations_ids5_optimized[j:j + RECOMMEND_BATCH_SIZE] = results_algo5_optimized[0]
    recommendations_scores5_optimized[j:j + RECOMMEND_BATCH_SIZE] = results_algo5_optimized[1]
    break

total_time_algo5_optimized = time.time() - start_time

init demorou 0.0003666877746582031 segundos
passar para cuda demorou 0.026379108428955078 segundos
A add demorou 0.0002143383026123047 segundos
Xty demorou 6.29425048828125e-05 segundos
beta demorou 0.041290998458862305 segundos
beta demorou 0.00945591926574707 segundos
beta demorou 0.00946950912475586 segundos
beta demorou 0.009478330612182617 segundos
beta demorou 0.009142160415649414 segundos
beta demorou 0.008839130401611328 segundos
beta demorou 0.008839607238769531 segundos
beta demorou 0.008753776550292969 segundos
beta demorou 0.008795022964477539 segundos
beta demorou 0.008748769760131836 segundos
beta demorou 0.008763313293457031 segundos
beta demorou 0.008742809295654297 segundos
beta demorou 0.008740663528442383 segundos
beta demorou 0.008862972259521484 segundos
beta demorou 0.008753776550292969 segundos
beta demorou 0.008743762969970703 segundos
beta demorou 0.008727073669433594 segundos
beta demorou 0.008786916732788086 segundos
beta demorou 0.00874638557434082 segundos


In [22]:
(results_algo5_optimized[0] == results_algo5_optimized2[0]).all()

True

In [29]:
np.array(results_algo5[0])

array([[30555, 38986,   215, ...,   891, 29288, 24339],
       [23236, 35837, 23776, ...,  8175, 32133, 41982],
       [21231, 32512,  3650, ..., 39024,  2994, 28461],
       ...,
       [32512, 34953, 24339, ...,  3650, 36448, 12308],
       [32512,  6081, 37859, ..., 33491, 25172, 34763],
       [29865, 32512,   215, ..., 11034, 23776, 33491]])

In [30]:
results_algo5_optimized[0]

array([[30555, 38986,   215, ..., 24339, 17700, 40985],
       [23236, 35837, 23776, ...,  1293, 32133,  3754],
       [21231, 32512,  3650, ..., 41786,  6890,  2994],
       ...,
       [32512, 34953, 46052, ..., 29288, 44618, 41982],
       [32512,  6081, 37859, ...,  1293, 47550, 25172],
       [29865, 32512,   215, ..., 33491, 11769, 23776]])

In [26]:
torch_rng = torch.Generator(device='cuda').manual_seed(src.RANDOM_STATE)

In [27]:
torch.randn((10, 5), generator=torch_rng, device='cuda', dtype=torch.double)

tensor([[ 1.0360e+00, -1.9756e+00, -1.0678e+00, -7.3533e-01,  7.0253e-01],
        [-9.6031e-01, -9.1739e-01,  4.0556e-01,  8.0946e-01, -4.8576e-01],
        [ 7.8499e-01, -2.4007e-01, -6.9460e-01, -1.9525e+00,  3.9284e-01],
        [-1.0636e+00,  2.1251e-01, -1.6480e+00, -5.1468e-01,  1.1583e+00],
        [ 3.4949e-01,  7.4801e-01, -5.5509e-01,  8.3432e-01,  1.0950e+00],
        [ 2.4906e-01, -1.0698e+00,  7.8971e-02, -8.3719e-02,  3.2338e-02],
        [ 5.1603e-01, -6.8338e-01,  1.0061e+00,  5.0865e-01, -1.2080e+00],
        [ 1.3042e+00,  4.4752e-01, -1.2036e+00,  2.0147e+00, -4.4517e-01],
        [-1.6052e+00, -8.0793e-01, -3.3069e-01,  1.4150e-01,  4.5848e-01],
        [ 7.0441e-01, -2.3455e-01, -1.7963e+00,  7.0716e-04,  9.7392e-01]],
       device='cuda:0', dtype=torch.float64)

In [28]:
torch_rng = torch.Generator(device='cuda').manual_seed(src.RANDOM_STATE)

In [29]:
torch.randn((5, 5), generator=torch_rng, device='cuda', dtype=torch.double)

tensor([[ 1.0360, -1.9756, -1.0678, -0.7353,  0.7025],
        [-0.9603, -0.9174,  0.4056,  0.8095, -0.4858],
        [ 0.7850, -0.2401, -0.6946, -1.9525,  0.3928],
        [-1.0636,  0.2125, -1.6480, -0.5147,  1.1583],
        [ 0.3495,  0.7480, -0.5551,  0.8343,  1.0950]], device='cuda:0',
       dtype=torch.float64)

In [30]:
torch.randn((5, 5), generator=torch_rng, device='cuda', dtype=torch.double)

tensor([[ 0.4233,  0.1292, -0.4278,  2.0776, -1.3249],
        [ 0.1959, -1.0691,  1.2437, -1.5044,  0.7260],
        [-0.1422,  0.9751, -0.6168,  0.7498, -0.6927],
        [ 0.7814, -0.4943,  1.1254,  0.2509,  0.8883],
        [-0.7887, -0.4151,  1.0460,  0.6586, -1.3972]], device='cuda:0',
       dtype=torch.float64)

In [16]:
import torch
torch_rng = torch.Generator(device='cuda')
torch_rng.manual_seed(src.RANDOM_STATE)

# Then in your loop, just keep drawing in chunks
z1 = torch.randn((5, 5), generator=torch_rng, device='cuda', dtype=torch.double)
z2 = torch.randn((5, 5), generator=torch_rng, device='cuda', dtype=torch.double)
z3 = torch.randn((5, 5), generator=torch_rng, device='cuda', dtype=torch.double)

# This gives the same result as:
torch_rng = torch.Generator(device='cuda').manual_seed(src.RANDOM_STATE)
z_full = torch.randn((15, 5), generator=torch_rng, device='cuda', dtype=torch.double)
z1_expected = z_full[:5]
z2_expected = z_full[5:10]
z3_expected = z_full[10:15]

In [None]:
(z1 == z1_expected).all()

tensor([[True, True, True, True, True],
        [True, True, True, True, True],
        [True, True, True, True, True],
        [True, True, True, True, True],
        [True, True, True, True, True]], device='cuda:0')

In [None]:
(z2 == z2_expected).all()

In [7]:
import torch

RANDOM_STATE = 1234  # Fixed seed

# Generator setup
torch_rng = torch.Generator(device='cpu').manual_seed(RANDOM_STATE)

# Draw in chunks
z1 = torch.randn((5, 5), generator=torch_rng, device='cpu', dtype=torch.double)
z2 = torch.randn((5, 5), generator=torch_rng, device='cpu', dtype=torch.double)
z3 = torch.randn((5, 5), generator=torch_rng, device='cpu', dtype=torch.double)

# Draw all at once
torch_rng = torch.Generator(device='cpu').manual_seed(RANDOM_STATE)
z_full = torch.randn((15, 5), generator=torch_rng, device='cpu', dtype=torch.double)
z1_expected = z_full[:5]
z2_expected = z_full[5:10]
z3_expected = z_full[10:15]

# Compare
print((z1 == z1_expected).all())
print((z2 == z2_expected).all())
print((z3 == z3_expected).all())

tensor(False)
tensor(False)
tensor(False)


In [8]:
z1

tensor([[-0.0883,  0.3420,  0.4112,  1.0051, -0.1117],
        [-0.5988, -0.0982, -0.3511,  0.7209, -0.1952],
        [-0.5215,  0.8718,  1.7656,  0.7725, -2.6852],
        [-0.1483, -1.7856,  0.0853,  1.0006, -0.0962],
        [ 1.0628,  0.8112, -0.0721,  0.8024,  0.1205]], dtype=torch.float64)

In [9]:
z1_expected

tensor([[-0.0883,  0.3420,  0.4112,  1.0051, -0.1117],
        [-0.5988, -0.0982, -0.3511,  0.7209, -0.2169],
        [-1.0427,  0.2448, -0.9887, -0.5196,  0.6585],
        [ 0.6406,  0.7839,  0.6573, -1.6348, -0.6108],
        [ 0.6003, -0.8769,  0.9649, -0.1926,  0.3745]], dtype=torch.float64)

In [4]:
import numpy as np

rng = np.random.default_rng(seed=src.RANDOM_STATE)

z1 = rng.standard_normal((5, 5))
z2 = rng.standard_normal((5, 5))
z3 = rng.standard_normal((5, 5))

rng = np.random.default_rng(seed=src.RANDOM_STATE)

z_full = rng.standard_normal((15, 5))
z1_expected = z_full[:5]
z2_expected = z_full[5:10]
z3_expected = z_full[10:15]

print((z1 == z1_expected).all())
print((z2 == z2_expected).all())
print((z3 == z3_expected).all())

True
True
True


In [6]:
from typing import Union, Iterable, List, Tuple

class _NumpyRNG:
    
    def __init__(self, seed):
        self.seed = seed
        self.rng = np.random.default_rng(self.seed)

    def rand(self, size=None):
        return self.rng.random(size)

    def randint(self, low: int, high: int = None, size: int = None):
        return self.rng.integers(low=low, high=high, size=size)

    def choice(self, a: Union[int, Iterable[int]], size: Union[int, Tuple[int]] = None, p: Iterable[float] = None):
        return self.rng.choice(a=a, size=size, p=p)

    def beta(self, num_success: int, num_failure: int, size=None):
        return self.rng.beta(num_success, num_failure, size)

    def standard_normal(self, size=None):
        return self.rng.standard_normal(size)

    def multivariate_normal(self, mean: Union[np.ndarray, List[float]],
                            covariance: Union[np.ndarray, List[List[float]]], size=None):
        return np.squeeze(self.rng.multivariate_normal(mean, covariance, size=size, method='cholesky'))

    def dirichlet(self, alpha: List[float], size=None):
        return self.rng.dirichlet(alpha, size)

In [7]:
import numpy as np

class _RidgeRegression:

    def __init__(self, rng, alpha = 1.0, l2_lambda = 1.0, scale: bool = False):

        # Ridge Regression: https://onlinecourses.science.psu.edu/stat857/node/155/
        self.rng = rng                      # random number generator
        self.alpha = alpha                  # exploration parameter
        self.l2_lambda = l2_lambda          # regularization parameter
        self.scale = scale                  # scale contexts

        self.beta = None                    # (XtX + l2_lambda * I_d)^-1 * Xty = A^-1 * Xty
        self.A = None                       # (XtX + l2_lambda * I_d)
        self.A_inv = None                   # (XtX + l2_lambda * I_d)^-1
        self.Xty = None
        self.scaler = None

    def init(self, num_features: int):
        # By default, assume that
        # A is the identity matrix and Xty is set to 0
        self.Xty = np.zeros(num_features)
        self.A = self.l2_lambda * np.identity(num_features)
        self.A_inv = self.A.copy()
        self.beta = np.dot(self.A_inv, self.Xty)

    def fit(self, X: np.ndarray, y: np.ndarray):

        # Scale
        if self.scaler is not None:
            X = X.astype('float64')
            if not hasattr(self.scaler, 'scale_'):
                self.scaler.fit(X)
            else:
                self.scaler.partial_fit(X)
            X = self.scaler.transform(X)

        # X transpose
        Xt = X.T

        # Update A
        self.A = self.A + np.dot(Xt, X)
        self.A_inv = np.linalg.inv(self.A)

        # Add new Xty values to old
        self.Xty = self.Xty + np.dot(Xt, y)

        # Recalculate beta coefficients
        self.beta = np.dot(self.A_inv, self.Xty)

    def predict(self, x: np.ndarray):

        # Scale
        if self.scaler is not None:
            x = self._scale_predict_context(x)

        # Calculate default expectation y = x * b
        return np.dot(x, self.beta)

    def _scale_predict_context(self, x: np.ndarray):
        if not hasattr(self.scaler, 'scale_'):
            return x

        # Transform and return to previous shape. Convert to float64 to suppress any type warnings.
        return self.scaler.transform(x.astype('float64'))


class _LinTS(_RidgeRegression):

    def predict(self, x: np.ndarray):
        # Scale
        if self.scaler is not None:
            x = self._scale_predict_context(x)

        # Randomly sample coefficients from multivariate normal distribution
        # Covariance is enhanced with the exploration factor
        # Generates  random samples for all contexts in one single go. type(beta_sampled): np.ndarray
        beta_sampled = self.rng.multivariate_normal(self.beta, np.square(self.alpha) * self.A_inv, size=x.shape[0])

        # Calculate expectation y = x * beta_sampled
        return np.sum(x * beta_sampled, axis=1)

In [18]:
import numpy as np

mean = np.array([0.0, 1.0])
cov = np.array([[1.0, 0.5], [0.5, 1.0]])
size = 100_000

# Method 1: NumPy default_rng().multivariate_normal
rng = np.random.default_rng(seed=42)
samples_mv = rng.multivariate_normal(mean, cov, size=size)

# Method 2: Manual sampling
rng = np.random.default_rng(seed=42)
eps = rng.standard_normal(size=(size, 2))
L = np.linalg.cholesky(cov)
samples_manual = mean + eps @ L.T

# Compare sample mean and covariance
print("Multivariate Normal (mean):", samples_mv.mean(axis=0))
print("Manual Sampling (mean):     ", samples_manual.mean(axis=0))

print("Multivariate Normal (cov):\n", np.cov(samples_mv, rowvar=False))
print("Manual Sampling (cov):\n", np.cov(samples_manual, rowvar=False))

Multivariate Normal (mean): [0.00156743 1.00226364]
Manual Sampling (mean):      [-0.00221187  0.999497  ]
Multivariate Normal (cov):
 [[0.99977376 0.4947176 ]
 [0.4947176  1.00322467]]
Manual Sampling (cov):
 [[0.99747788 0.49701348]
 [0.49701348 1.00781644]]


In [20]:
np.allclose(samples_mv.mean(axis=0), mean, atol=1e-2)
np.allclose(np.cov(samples_mv, rowvar=False), cov, atol=1e-2)

True