In [3]:
import os
from dotenv import load_dotenv # This allows me to use the .env file to store my API key
from openai import OpenAI

# Load the .env file
load_dotenv()
personal_api_key = os.environ.get('MY_API_KEY')

## Intention

I want to formulate how consitent the reccomendations by non RAG LLM are for a variety of services. 

What you will need:
- Query model, for variety of services. 
- Extract the order. 
- Use stats to do some form of consitency test.

In [4]:
client = OpenAI(api_key=personal_api_key)
model = "gpt-3.5-turbo"

In [5]:
message_content = "Reccomend five hotels in Bristol UK. State the name of the hotel on a new line each time. Dont use a numbered list."
message = [{"role": "user", "content": message_content}]

In [6]:
response = client.chat.completions.create(
            model=model,
            messages=message,
        )

LLM_response = response.choices[0].message.content

In [7]:
print(LLM_response)

Radisson Blu Hotel, Bristol
Bristol Harbour Hotel & Spa
Mercure Bristol Grand Hotel
Bristol Marriott Royal Hotel
The Bristol Hotel


In [8]:
Ordered_list = LLM_response.split("\n")
print(Ordered_list)

['Radisson Blu Hotel, Bristol', 'Bristol Harbour Hotel & Spa', 'Mercure Bristol Grand Hotel', 'Bristol Marriott Royal Hotel', 'The Bristol Hotel']


Now I need to decide how I will test the consistency. I feel like there shoudl be some stats test that I already know.

You could create a dictonary of appearences, and then just add points depending on where they come -> this isnt fair as when they dont appear will look like they come well. 
You could make a dictionary a list in each position and then just look at the frequency.   

It might be more informative to have a list of hotels and then ask to rank although. This is different to the current question but is likley more informative. You will benefit from doing ten runs and then seeing what comes up in each as you can use this to have the hotels for it to rank. 

After Research:
- Jackards Index
- 

In [9]:
import re

def order_set(message_content: str, times: int = 1):
    ''' Generate multiple rank sets. '''
    
    message = [{"role": "user", "content": message_content}]
    
    response = client.chat.completions.create(
            model=model,
            messages=message,
            n=times
        )

    # Will remove most of the unwanted aretfacts from the response. By no means optimsed.
    pattern = r"\d+|\s?-\s|\s?\.\s|\s|,"
    ranks = [LLM_response.message.content.split("\n") for LLM_response in response.choices]
    ranks = [[re.sub(pattern, "", item) for item in rank] for rank in ranks]
    # remove any empty strings
    ranks = [[item for item in rank if item] for rank in ranks]
    
    return ranks

In [10]:
ranks = order_set(message_content, 100)

In [20]:
import pickle

# Dump the ranks variable
with open('ranks.pkl', 'wb') as file:
    pickle.dump(ranks, file)

In [11]:
# Going to go simple for analysis.
num_ranks = 5
frequency_dict = {x: {} for x in range(num_ranks)}

# Count the number of times each hotel appears in each rank
for rank_hotels in ranks:
    for i, hotel in enumerate(rank_hotels):
        if i == num_ranks:
            break
        number_of_occurences = frequency_dict[i].get(hotel, 0)
        frequency_dict[i][hotel] = number_of_occurences + 1

In [12]:
import pprint
pprint.pprint(frequency_dict)

{0: {'AvonGorgebyHotelDuVin': 1,
     'BerwickLodge': 1,
     'BristolHarborHotel': 3,
     'BristolHarbourHotel&Spa': 20,
     'BristolMarriottHotelCityCentre': 2,
     'BristolMarriottRoyalHotel': 3,
     'HotelduVin': 1,
     'HotelduVinBristol': 2,
     'MercureBristolGrandHotel': 1,
     'RadissonBluHotel': 2,
     'RadissonBluHotelBristol': 16,
     'TheBristolHarborHotel': 1,
     'TheBristolHotel': 46,
     'TheGainsboroughBathSpa': 1},
 1: {'BerkeleySuites': 2,
     'BerwickLodge': 3,
     'BristolHarbourHotel&Spa': 5,
     'BristolMarriottRoyalHotel': 10,
     'FutureInnBristol': 1,
     'HamptonbyHiltonBristolCityCentre': 1,
     'HotelduVin&BistroBristol': 6,
     'MercureBristolGrandHotel': 16,
     'MercureBristolHollandHouseHotelandSpa': 1,
     'RadissonBluHotel': 1,
     'RadissonBluHotelBristol': 22,
     'TheBristol': 7,
     'TheBristolHotel': 22,
     'TheBristolMarriottRoyalHotel': 1,
     'TheGainsboroughBathSpa': 1,
     'TheRadissonBluHotelBristol': 1},
 2: {'A

In [13]:
for key, value in frequency_dict.items():
    print(key)
    print('ratio of largest in dict to total:', max(value.values())/sum(value.values()))
    print('Number of unique hotels:', len(value))

0
ratio of largest in dict to total: 0.46
Number of unique hotels: 14
1
ratio of largest in dict to total: 0.22
Number of unique hotels: 16
2
ratio of largest in dict to total: 0.22
Number of unique hotels: 27
3
ratio of largest in dict to total: 0.24
Number of unique hotels: 25
4
ratio of largest in dict to total: 0.21
Number of unique hotels: 32


Would also be useful to know the relative frequency of those in a given row in all the other rows.


In [14]:
def frequency_given_row(row):
    hotels_in_row = frequency_dict[row]
    rows_to_compare = [x for x in frequency_dict.keys() if x != row]
    
    for hotel in hotels_in_row:
        print('\n' + hotel)
        print(f"occurence in row {row}: {round(hotels_in_row[hotel]/sum(hotels_in_row.values()), 2)}")
        for comparison_row in rows_to_compare:
            freq_in_comparison = frequency_dict[comparison_row].get(hotel, 0)
            if freq_in_comparison:
                print(f"occurence in row {comparison_row}: {round(freq_in_comparison/sum(frequency_dict[comparison_row].values()), 2)}")
        

In [15]:
frequency_given_row(0)


TheBristolHotel
occurence in row 0: 0.46
occurence in row 1: 0.22
occurence in row 2: 0.08
occurence in row 3: 0.07
occurence in row 4: 0.05

BristolHarbourHotel&Spa
occurence in row 0: 0.2
occurence in row 1: 0.05
occurence in row 2: 0.09
occurence in row 3: 0.12
occurence in row 4: 0.04

HotelduVinBristol
occurence in row 0: 0.02
occurence in row 2: 0.01

BerwickLodge
occurence in row 0: 0.01
occurence in row 1: 0.03
occurence in row 2: 0.03
occurence in row 4: 0.01

BristolMarriottRoyalHotel
occurence in row 0: 0.03
occurence in row 1: 0.1
occurence in row 2: 0.12
occurence in row 3: 0.09
occurence in row 4: 0.04

BristolMarriottHotelCityCentre
occurence in row 0: 0.02
occurence in row 2: 0.02
occurence in row 4: 0.01

RadissonBluHotelBristol
occurence in row 0: 0.16
occurence in row 1: 0.22
occurence in row 2: 0.17
occurence in row 3: 0.2
occurence in row 4: 0.08

TheBristolHarborHotel
occurence in row 0: 0.01

TheGainsboroughBathSpa
occurence in row 0: 0.01
occurence in row 1: 0.

These funcitons arent useless but it is bogged down by the many ways there is to say the same hotel so it isnt perfect. I think a better way to compare is to give list of hotels and ask to rank. ALthough again this is a differnt question.

Could easily right something which matches hotels to numbers and then you can just make it easier.

In [16]:
# Get all of the hotels.
unpacked_ranks = [hotel for rank in ranks for hotel in rank]
all_hotels = set(unpacked_ranks)

In [74]:
target_words = ['Marriott','Gainsborough','Hilton','Mercure', 'arbour' ,'Radisson', 'Avon', 'DoubleTree', 'Berkeley', 'ibis', 'Future','ztec', 'gabel',  ]

In [75]:
grouped_hotels = {}

all_hotels_copy = all_hotels.copy()
for word in target_words:
    relevant = [hotel for hotel in all_hotels if word in hotel]
    grouped_hotels[word] = relevant
    all_hotels_copy = all_hotels_copy.difference(relevant)

for word in all_hotels_copy:
    grouped_hotels[word] = [word]

In [76]:
def frequency_given_row(frequency_dict, row):
    hotels_in_row = frequency_dict[row]
    rows_to_compare = [x for x in frequency_dict.keys() if x != row]
    
    for hotel in hotels_in_row:
        print('\n' + hotel)
        print(f"occurence in row {row}: {round(hotels_in_row[hotel]/sum(hotels_in_row.values()), 2)}")
        for comparison_row in rows_to_compare:
            freq_in_comparison = frequency_dict[comparison_row].get(hotel, 0)
            if freq_in_comparison:
                print(f"occurence in row {comparison_row}: {round(freq_in_comparison/sum(frequency_dict[comparison_row].values()), 2)}")


def frequency_in_row(ranks, grouped_hotels):
    frequency_dict = {x: {} for x in range(num_ranks)}

    for rank_instance in ranks:
        for i, hotel in enumerate(rank_instance):
            for word, hotel_list in grouped_hotels.items():
                if hotel in hotel_list:
                    num_occurences = frequency_dict[i].get(word, 0)
                    frequency_dict[i][word] = num_occurences + 1
    
    return frequency_dict

        

In [77]:
from pprint import pprint
pprint(frequency_in_row(ranks, grouped_hotels))

{0: {'Avon': 1,
     'BerwickLodge': 1,
     'BristolHarborHotel': 3,
     'Gainsborough': 1,
     'HotelduVin': 1,
     'HotelduVinBristol': 2,
     'Marriott': 5,
     'Mercure': 1,
     'Radisson': 18,
     'TheBristolHarborHotel': 1,
     'TheBristolHotel': 46,
     'arbour': 20},
 1: {'Berkeley': 2,
     'BerwickLodge': 3,
     'Future': 1,
     'Gainsborough': 1,
     'Hilton': 1,
     'HotelduVin&BistroBristol': 6,
     'Marriott': 11,
     'Mercure': 17,
     'Radisson': 24,
     'TheBristol': 7,
     'TheBristolHotel': 22,
     'arbour': 5},
 2: {'Avon': 2,
     'Berkeley': 4,
     'BerwickLodge': 3,
     'BrooksGuesthouseBristol': 1,
     'DoubleTree': 1,
     'Future': 2,
     'Gainsborough': 1,
     'Hilton': 1,
     'HotelduVin&BistroBristol': 1,
     'HotelduVinBristol': 1,
     'IbisBristolCentre': 1,
     'Marriott': 14,
     'Mercure': 22,
     'NovotelBristolCentre': 1,
     'Radisson': 18,
     'SACOBristolWestIndiaHouse': 1,
     'TheBristol': 1,
     'TheBristolHot

In [78]:
frequency_given_row(frequency_in_row(ranks, grouped_hotels), 0)


TheBristolHotel
occurence in row 0: 0.46
occurence in row 1: 0.22
occurence in row 2: 0.08
occurence in row 3: 0.07
occurence in row 4: 0.05

arbour
occurence in row 0: 0.2
occurence in row 1: 0.05
occurence in row 2: 0.11
occurence in row 3: 0.12
occurence in row 4: 0.05

HotelduVinBristol
occurence in row 0: 0.02
occurence in row 2: 0.01

BerwickLodge
occurence in row 0: 0.01
occurence in row 1: 0.03
occurence in row 2: 0.03
occurence in row 4: 0.01

Marriott
occurence in row 0: 0.05
occurence in row 1: 0.11
occurence in row 2: 0.14
occurence in row 3: 0.09
occurence in row 4: 0.05

Radisson
occurence in row 0: 0.18
occurence in row 1: 0.24
occurence in row 2: 0.18
occurence in row 3: 0.2
occurence in row 4: 0.07

TheBristolHarborHotel
occurence in row 0: 0.01

Gainsborough
occurence in row 0: 0.01
occurence in row 1: 0.01
occurence in row 2: 0.01
occurence in row 4: 0.03

Avon
occurence in row 0: 0.01
occurence in row 2: 0.02
occurence in row 3: 0.01
occurence in row 4: 0.03

Hotel

You now have a decent frequency of each hotel. You now want to investigate how much of a proper ranking it is, you first need to define what this means and then write a script which will tell you.

Defining a proper ranking:
- Hotels frequencys should be unimodal and concave
- The most common hotel in each row should be consistent.
- 

How consistent the ranking are on differnt models would be intresting, if they correlated it would suggest that it is determinstic and that you can prob game it. If they arent it will be largerly random and hard to game.

tkinter solutin is certainly possible but you dont know much about it and it seems hard, you could just do a text solution. Whereby you but into text file and sort in there with different tags.