In [1]:
import sys

sys.path.append('../')

from methods.SimilarityTests import SimilarityTests
from methods.LMEO import LMEO

st = SimilarityTests()

## Preference Decomposition.

Args: Master_prompt, Decomposition_features, Times, Hotel_names

Needs to Order the decompositions. 
Calculate the elo scores. 
Regress them with the total scores. 
Output the regressor.

In [None]:
a = '{}{}{}'
formatted_a = a.format('x', '{}', '{}')
print(formatted_a)
print(formatted_a.format('y', 'z'))

x{}{}
xyz


In [1]:
from sklearn.linear_model import LinearRegression

In [6]:
import pickle 
import numpy as np
import os


class pairwise_decomposition(SimilarityTests):
    def get_pairwise_decompostion(self, master_prompt: str, pickle_prefix: str, decomposition_features: list, hotel_names: list, num_times: int = 50) -> dict:
        """
        Get the pairwise decomposition of preferences for a given set of features.

        Args:
            master_prompt (str): The master prompt string that will be used to generate prompts for each feature.
            pickle_prefix (str): The prefix string used for generating pickle IDs.
            decomposition_features (list): A list of features for which pairwise decomposition of preferences will be calculated.
            hotel_names (list): A list of hotel names.
            num_times (int, optional): The number of times to run the pairwise preference ordering. Defaults to 50.

        Returns:
            dict: A dictionary containing the pairwise decomposition of preferences for each feature.
        """
        decomposition_pickle_id = pickle_prefix

        if os.path.exists(fr'.\pickles\feature_decomposition_{decomposition_pickle_id}{num_times}.pkl'):
            print("Loading Previously Generated ELo scores.")
            with open(fr'.\pickles\feature_decomposition_{decomposition_pickle_id}{num_times}.pkl', 'rb') as file:
                return pickle.load(file)

        decomposition_dict = {}

        for feature in decomposition_features:
            prompt_with_feature = master_prompt.format('{}', '{}', feature)
            pickle_id = pickle_prefix + feature + str(num_times)
            
            feature_ranks = self.order_pairwise_preferences(pickle_id, prompt_with_feature, hotel_names, num_times)
            decomposition_dict[feature] = feature_ranks

        # Pickle the decomposition dict
        with open(fr'.\pickles\feature_decomposition_{decomposition_pickle_id}{num_times}.pkl', 'wb') as file:
            pickle.dump(decomposition_dict, file)

        return decomposition_dict
    
    def get_decomposition_elo_scores(self, pickle_id: str, decomposition_dict: dict, hotel_names: list, valid_alphas = None) -> dict:
        """
        Get the ELO scores for each hotel based on the pairwise decomposition of preferences.

        Args:
            pickle_id (str): The ID used for naming the pickle file.
            decomposition_dict (dict): A dictionary containing the pairwise decomposition of preferences for each feature.
            hotel_names (list): A list of hotel names.
            valid_alphas (list, optional): A list of valid alpha values to be used for fitting the parameters. Defaults to None.

        Returns:
            dict: A dictionary containing the ELO scores for each hotel based on the pairwise decomposition of preferences.
        """
        
        if os.path.exists(fr'.\pickles\elo_scores_{pickle_id}.pkl'):
            print("Loading Previously Generated ELo scores.")
            with open(fr'.\pickles\elo_scores_{pickle_id}.pkl', 'rb') as file:
                return pickle.load(file)
        
        valid_alphas = valid_alphas if valid_alphas is not None else [0, 10**-10, 10**-8, 10**-6, 10**-4, 10**-2, 1]
        
        elo_score_dict = {}

        for feature, pairwise_preferences in decomposition_dict.items():
            print('Beginning fit for feature: ', feature)
            
            for alpha in valid_alphas:
                try:
                    fitted_params = self.pairwise_ranks_to_params(pairwise_preferences, hotel_names, alpha=alpha)
                    print(f"Successfully fit for {feature}, when using alpha: ", alpha)
                    elo_score_dict[feature] = sorted(fitted_params, key=lambda x: x[0])
                    break
                except RuntimeError:
                    print(f"Recursion Error for {feature}, when using alpha: ", alpha)
            else:
                print('No valid alpha found for top3')

        # Dump the ELO scores to a pickle file
        with open(fr'.\pickles\elo_scores_{pickle_id}.pkl', 'wb') as file:
                pickle.dump(elo_score_dict, file)

        return elo_score_dict

    def get_decomposition(self, recommendation_elo_scores: dict, feature_elo_scores: dict, hotel_names: list):
        # We need to get the elo scores.
        # Sort the feature elo scores alphabetically. Then convert to a numpy array. And then use collumn stack. 

        raw_elo_scores = []   
        
        # Prepearing the elo score dict to be stacked.   
        for elo_score_tuples in feature_elo_scores.values():
            sorted_elo_score_tuples = dict(sorted(elo_score_tuples, key=lambda x: x[1]))
            sorted_elo_score = [x[0] for x in sorted_elo_score_tuples]
            raw_elo_scores.append(sorted_elo_score)

        X_regressors = np.column_stack(raw_elo_scores)

        # Getting the corresponding y values.         
        Y_values = [x[0] for x in sorted(recommendation_elo_scores, key=lambda x: x[1])]

        # Fitting the linear regression model.
        reg = LinearRegression().fit(X_regressors, Y_values)
        
        # Now we will get return the covariate feature pairs. 
        return zip(feature_elo_scores.keys(), reg.coef_)
        

In [8]:
pd = pairwise_decomposition()

master_prompt = 'Which hotel do you recommend more {} or {}, on the basis of {}? State the name of the hotel you prefer only and nothing else. '

features = ['value for money', 'luxury', 'location', 'food', 'being close to the beach']

# I have removed the shard as it is the same as the shangri.
pairwise_words = ['The Savoy', 'The Langham', 'The Ritz', 'The Ned', 'Claridges', 'The Shangri', 'The Dorchester'] 

In [9]:
decomposition_dict = pd.get_pairwise_decompostion(master_prompt, 'london_test_run', features, pairwise_words, 10)

Undecided: ['Shangri-La']
Undecided: []
Undecided: []
Undecided: []
Undecided: []


In [11]:
print(decomposition_dict['being close to the beach'])

[['The Savoy', 'The Langham'], ['The Langham', 'The Savoy'], ['The Savoy', 'The Langham'], ['The Savoy', 'The Langham'], ['The Savoy', 'The Langham'], ['The Savoy', 'The Langham'], ['The Savoy', 'The Langham'], ['The Savoy', 'The Langham'], ['The Savoy', 'The Langham'], ['The Savoy', 'The Langham'], ['The Savoy', 'The Ritz'], ['The Savoy', 'The Ritz'], ['The Ritz', 'The Savoy'], ['The Savoy', 'The Ritz'], ['The Savoy', 'The Ritz'], ['The Savoy', 'The Ritz'], ['The Savoy', 'The Ritz'], ['The Savoy', 'The Ritz'], ['The Savoy', 'The Ritz'], ['The Savoy', 'The Ritz'], ['The Savoy', 'The Ned'], ['The Savoy', 'The Ned'], ['The Savoy', 'The Ned'], ['The Savoy', 'The Ned'], ['The Savoy', 'The Ned'], ['The Savoy', 'The Ned'], ['The Savoy', 'The Ned'], ['The Savoy', 'The Ned'], ['The Savoy', 'The Ned'], ['The Savoy', 'The Ned'], ['The Savoy', 'Claridges'], ['The Savoy', 'Claridges'], ['The Savoy', 'Claridges'], ['The Savoy', 'Claridges'], ['The Savoy', 'Claridges'], ['The Savoy', 'Claridges'], [

In [12]:
elo_scores = pd.get_decomposition_elo_scores('london_test_run', decomposition_dict, pairwise_words, [0])

Beginning fit for feature:  value for money
Successfully fit for value for money, when using alpha:  0
Beginning fit for feature:  luxury
Successfully fit for luxury, when using alpha:  0
Beginning fit for feature:  location
Successfully fit for location, when using alpha:  0
Beginning fit for feature:  food
Successfully fit for food, when using alpha:  0
Beginning fit for feature:  being close to the beach
Successfully fit for being close to the beach, when using alpha:  0


In [13]:
from pprint import pprint
pprint(elo_scores)

{'being close to the beach': [(-1.3785663661168046, 'The Ned'),
                              (-0.6870374514393548, 'The Savoy'),
                              (-0.47553902143139315, 'The Langham'),
                              (-0.40578522205814804, 'Claridges'),
                              (-0.057086213448635736, 'The Dorchester'),
                              (0.6088683345492291, 'The Ritz'),
                              (2.395145939945107, 'The Shangri')],
 'food': [(-2.3621679583451476, 'The Shangri'),
          (-1.4999535408372924, 'The Langham'),
          (-0.23619382530681166, 'The Savoy'),
          (0.2519085958256934, 'The Dorchester'),
          (1.010748753955646, 'The Ritz'),
          (1.261875888544305, 'The Ned'),
          (1.5737820861636074, 'Claridges')],
 'location': [(-2.6830919193402876, 'Claridges'),
              (-1.5572181731857733, 'The Shangri'),
              (0.2770035569704947, 'The Langham'),
              (0.4708957495016102, 'The Dorchester'),