In [5]:
import os 
import sys 
import numpy as np

from pprint import pprint
import scipy as sp

# sys.path.append(r'.\LMEO')

from pprint import pprint 
import pickle

In [6]:
def exp_transform(params):
    """Transform parameters into exp-scale weights."""
    weights = np.exp(np.asarray(params) - np.mean(params))
    return (len(weights) / weights.sum()) * weights

def log_transform(weights):
    """Transform weights into centered log-scale parameters."""
    params = np.log(weights)
    return params - params.mean()

import abc
class ConvergenceTest(metaclass=abc.ABCMeta):

    """Abstract base class for convergence tests.

    Convergence tests should implement a single function, `__call__`, which
    takes a parameter vector and returns a boolean indicating whether or not
    the convergence criterion is met.
    """

    @abc.abstractmethod
    def __call__(self, params, update=True):
        """Test whether convergence criterion is met.

        The parameter `update` controls whether `params` should replace the
        previous parameters (i.e., modify the state of the object).
        """

class NormOfDifferenceTest(ConvergenceTest):

    """Convergence test based on the norm of the difference vector.

    This convergence test computes the difference between two successive
    parameter vectors, and declares convergence when the norm of this
    difference vector (normalized by the number of items) is below `tol`.
    """

    def __init__(self, tol=1e-8, order=1):
        self._tol = tol
        self._ord = order
        self._prev_params = None

    def __call__(self, params, update=True):
        params = np.asarray(params) - np.mean(params)
        if self._prev_params is None:
            if update:
                self._prev_params = params
            return False
        dist = np.linalg.norm(self._prev_params - params, ord=self._ord)
        if update:
            self._prev_params = params
        return dist <= self._tol * len(params)


def _mm(n_items, data, initial_params, alpha, max_iter, tol, mm_fun):
    """
    Iteratively refine MM estimates until convergence.

    Raises
    ------
    RuntimeError
        If the algorithm does not converge after `max_iter` iterations.
    """
    
    if initial_params is None:
        params = np.zeros(n_items)
    else:
        params = initial_params
    converged = NormOfDifferenceTest(tol=tol, order=1)
    
    for _ in range(max_iter):
        nums, denoms = mm_fun(n_items, data, params)
        params = log_transform((nums + alpha) / (denoms + alpha))
        if converged(params):
            return params
        
    raise RuntimeError("Did not converge after {} iterations".format(max_iter))

def _mm_rankings(n_items, data, params):
    """Inner loop of MM algorithm for ranking data."""
    weights = exp_transform(params)
    wins = np.zeros(n_items, dtype=float)
    denoms = np.zeros(n_items, dtype=float)
    for ranking in data:
        sum_ = weights.take(ranking).sum()
        for i, winner in enumerate(ranking[:-1]):
            wins[winner] += 1
            val = 1.0 / sum_
            for item in ranking[i:]:
                denoms[item] += val
            sum_ -= weights[winner]
    return wins, denoms


def mm_rankings(n_items, data, initial_params=None, alpha=0.0,
            max_iter=10000, tol=1e-8):
        """Compute the ML estimate of model parameters using the MM algorithm.

        This function computes the maximum-likelihood (ML) estimate of model
        parameters given ranking data (see :ref:`data-rankings`), using the
        minorization-maximization (MM) algorithm [Hun04]_, [CD12]_.

        If ``alpha > 0``, the function returns the maximum a-posteriori (MAP)
        estimate under a (peaked) Dirichlet prior. See :ref:`regularization` for
        details.

        Parameters
        ----------
        n_items : int
            Number of distinct items.
        data : list of lists
            Ranking data.
        initial_params : array_like, optional
            Parameters used to initialize the iterative procedure.
        alpha : float, optional
            Regularization parameter.
        max_iter : int, optional
            Maximum number of iterations allowed.
        tol : float, optional
            Maximum L1-norm of the difference between successive iterates to
            declare convergence.

        Returns
        -------
        params : numpy.ndarray
            The ML estimate of model parameters.
        """
        return _mm(n_items, data, initial_params, alpha, max_iter, tol,
                _mm_rankings)

In [31]:
## Function to convert the ranks to a numbers based of a word list possibly containing lists.
## You need other as well.
## You should prob return a dict which maps a number to the target word.

class ConsitFive:
    @staticmethod
    def hotel_target_flag(hotel, target_word):
            if isinstance(target_word, list):
                return any(word in hotel for word in target_word)
            else:
                return target_word in hotel

    def rank_to_num(self, ranks, target_words):
        ''' Convert the ranks to a number based on a target word list possibly containing lists.
        target_words: Should be chosen so that other is used as comonly on average as each target word.'''
        
        # Adding category: OTHER to the target words.
        inner_target_words = target_words + ['OTHER']

        # Coversion dicts.
        num_to_id = {i : x[0] if isinstance(x, list) else x for i, x in enumerate(inner_target_words)}
        id_to_num = {v : k for k, v in num_to_id.items()}

        # Loop through each rank and covert to number.
        converted_ranks = []
        for rank_instance in ranks:
            converted_rank_instance = []
            
            for hotel in rank_instance:
                for target_id in inner_target_words:
                    # Will be used to convert the target word to a number.
                    single_id = target_id[0] if isinstance(target_id, list) else target_id
                    
                    if self.hotel_target_flag(hotel, target_id):
                        converted_rank_instance.append(id_to_num[single_id])
                        break
                else: # The hotel is in OTHER category.
                    converted_rank_instance.append(id_to_num['OTHER'])
            
            converted_ranks.append(converted_rank_instance)

        return converted_ranks, num_to_id


In [32]:
# We now know how to turn a rankings into a number. 
# You should create a function which takes a ranking and a target word list and returns a the parameters.
# That is just a wrapper. 

def ranks_to_params(ranks, target_words, **kargs):
    cf = ConsitFive()
    converted_ranks, num_to_id = cf.rank_to_num(ranks, target_words)
    MM_fitted_params = mm_rankings(len(num_to_id), converted_ranks, **kargs)
    MM_fitted_params = sorted(zip(MM_fitted_params, num_to_id.values()), key=lambda x: x[0], reverse=True)
    return MM_fitted_params

In [34]:
cf = ConsitFive()

file_id = 'LondonTop3_1000'

with open(fr'.\pickles\ranks_{file_id}.pkl', 'rb') as file:
    ranks_3 = pickle.load(file)

with open(fr'C:\Users\chris\Documents\LMEO\LMEO\Recomendation Research\pickles\ranks_London3000.pkl', 'rb') as file:
    ranks_5 = pickle.load(file)

print(len(ranks_3))

london_words = 'Savoy Langham Ritz Ned Clarid hangri Dorchest Shard'.split()

print(ranks_to_params(ranks_3, london_words))
print(ranks_to_params(ranks_5, london_words, alpha=0))

988
[(2.5870714744923733, 'Ritz'), (0.7136625360414011, 'Langham'), (0.4179767595157995, 'Savoy'), (0.3483472927201855, 'Ned'), (0.301808167249091, 'Shard'), (0.21560685447205374, 'Dorchest'), (-0.17396681601465414, 'OTHER'), (-1.1447293253594197, 'Clarid'), (-3.26577694311683, 'hangri')]
[(2.1829326071998616, 'Ritz'), (0.3511280308230571, 'Savoy'), (0.23858368472724123, 'Langham'), (0.13146978179926783, 'Shard'), (-0.02599249236739587, 'Ned'), (-0.06849699509657359, 'Dorchest'), (-0.7056916148250785, 'Clarid'), (-0.796717450548396, 'OTHER'), (-1.3072155517119846, 'hangri')]


You now need to write the code to get the pairwise ranking.

In [99]:
hotel_1, hotel_2 = 'Savoy', 'Langham'
pairwise_prompt = 'Which hotel do you recommend more: {} or {}. Simply write the name of the hotel and nothing else.'#.format(hotel_1, hotel_2)

import openai
import anthropic

from dotenv import load_dotenv
import os

load_dotenv()

class adaptedLMEO1(ConsitFive):
    def __init__(self, model: str = "gpt-3.5-turbo"):
        personal_api_key = os.environ.get('MY_API_KEY')
        anthropic_api_key = os.environ.get('ANTHROPIC_API_KEY')
        
        self.client = openai.OpenAI(api_key=personal_api_key)
        self.anthropic_client = anthropic.Anthropic(api_key=anthropic_api_key)
        self.model = model
    
    def order_pairwise_preferences(self, pickle_id, pairwise_prompt, proper_hotel_ids, num_time=50, dump=True) -> dict:
        ''' Function to order the pairwise preferences. 
        
        Parameters:
        pairwise_prompt: str
            The prompt to ask the user to compare two hotels.
        hotel_name_id_tuple: This is the proper name of the hotel zipped with its id'''

        # We want all the permutation of the hotels where they are not (i,i)
        hotel_order_permutations = [(hotel_1, hotel_2) for hotel_1 in proper_hotel_ids for hotel_2 in proper_hotel_ids if hotel_1 != hotel_2]
        print(hotel_order_permutations)
        
        all_responses = []
        undecided = []

        for hotel_1, hotel_2 in hotel_order_permutations:
            print(hotel_1, hotel_2)
            # Create the pairwise payload.
            coresponding_pairwise_prompt = pairwise_prompt.format(hotel_1, hotel_2)
            LLM_payload = [{"role": "user", "content": coresponding_pairwise_prompt}]

            # Getting the response from the payload
            response = self.client.chat.completions.create(
                    model=self.model,
                    messages=LLM_payload,
                    n=num_time,
                )
            
            # Add the responses to the response_dict
            for message_obj in response.choices:
                preference = message_obj.message.content    

                if hotel_1 in preference:
                    all_responses.append([hotel_1, hotel_2])
                
                elif hotel_2 in preference:
                    all_responses.append([hotel_2, hotel_1])
                
                else:
                    print(preference)
                    undecided.append(preference)

        if dump:
            with open(fr'.\pickles\ranks_{pickle_id}.pkl', 'wb') as file:
                pickle.dump(all_responses, file)
        
        print(undecided)
        return all_responses    

In [60]:
lm = adaptedLMEO()

proper_hotel_ids = [('"The Ritz"', 'Ritz'), ('"The Savoy"', 'Savoy'), ('"The Langham"', 'Langham')]
result = lm.order_pairwise_preferences('Pair_test', pairwise_prompt, proper_hotel_ids, num_time=5, dump=True)

In [100]:
lm = adaptedLMEO1()

proper_hotel_ids = ['The Ritz', 'The Savoy', 'The Langham']
result = lm.order_pairwise_preferences('Pair_test1', pairwise_prompt, proper_hotel_ids, num_time=5, dump=True)

[('The Ritz', 'The Savoy'), ('The Ritz', 'The Langham'), ('The Savoy', 'The Ritz'), ('The Savoy', 'The Langham'), ('The Langham', 'The Ritz'), ('The Langham', 'The Savoy')]
The Ritz The Savoy
The Ritz The Langham
The Savoy The Ritz
The Savoy The Langham
The Langham The Ritz
The Langham The Savoy
[]


In [106]:
paired = cf.rank_to_num(result, ['The Ritz', 'The Savoy', 'The Langham'])
pprint(paired)
print(len(paired[0]))

([[0, 1],
  [0, 1],
  [0, 1],
  [0, 1],
  [0, 1],
  [0, 2],
  [0, 2],
  [2, 0],
  [2, 0],
  [0, 2],
  [0, 1],
  [0, 1],
  [0, 1],
  [0, 1],
  [0, 1],
  [1, 2],
  [2, 1],
  [1, 2],
  [1, 2],
  [1, 2],
  [0, 2],
  [0, 2],
  [0, 2],
  [0, 2],
  [0, 2],
  [2, 1],
  [1, 2],
  [2, 1],
  [2, 1],
  [2, 1]],
 {0: 'The Ritz', 1: 'The Savoy', 2: 'The Langham', 3: 'OTHER'})
30


In [110]:
sys.path.append(r"../")

from methods.SimilarityTests import SimilarityTests

In [108]:
print(paired[0])

[[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 2], [0, 2], [2, 0], [2, 0], [0, 2], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [1, 2], [2, 1], [1, 2], [1, 2], [1, 2], [0, 2], [0, 2], [0, 2], [0, 2], [0, 2], [2, 1], [1, 2], [2, 1], [2, 1], [2, 1]]


In [111]:
cf = SimilarityTests()
pprint(cf.pairwise_ranks_to_params(result, ['The Ritz', 'The Savoy', 'The Langham'], alpha=0))

KeyError: 'OTHER'

The final test which we need to implement is the top1 test. 

To do:
- Simply order based of of prompt and then colate into a list. 
- Then inside pairwose to params, run the ranks into the rank_to_num then just append the set of indexs to each element at which point you are ready to call the 

In [None]:
class top1(ConsitFive):
    
    def order_top_1(self, pickle_id, prompt, times = 100, dump=True) -> list:
        LLM_payload = [{"role": "user", "content": prompt}]

        # Functionality to deal with requests which are greater than the chunk size.

        choices_list = []
        chunk_size = 128
        full_chunks = times // chunk_size
        remainder = times % chunk_size

        for i in range(full_chunks):
            print(f'Ordering batch {i}.')
            response = self.client.chat.completions.create(
                model=self.model,
                messages=LLM_payload,
                n=chunk_size
            )
            choices_list.extend(response.choices)

        if remainder > 0:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=LLM_payload,
                n=remainder
            )
            choices_list.extend(response.choices)

        all_responses = [message_obj.message.content for message_obj in choices_list]

        if dump:
            with open(fr'.\pickles\top1_{pickle_id}.pkl', 'wb') as file:
                pickle.dump(all_responses, file)

        return all_responses
    
    def top1_ranks_to_params(self, ranks, target_words, **kargs):
        ''' Takes a sets of pairwise ranks and converts them to MM parameters.'''
        converted_ranks, num_to_id = self.rank_to_num(ranks, target_words, other_flag=True)

        index_set = set(range(len(num_to_id)))
        formated_ranks = [[rank, index_set] for rank in converted_ranks]
        
        MM_fitted_params = mm_pairwise(len(num_to_id), formated_ranks, **kargs) # Have to minus
        MM_fitted_params = sorted(zip(MM_fitted_params, num_to_id.values()), key=lambda x: x[0], reverse=True)
        
        return MM_fitted_params
    

