In [2]:
from neo4j import GraphDatabase
from langchain.vectorstores import FAISS 
from langchain.chains import RetrievalQA
from langchain.llms import Ollama
from langchain_ollama import OllamaEmbeddings
from langchain.llms import Ollama
from tqdm import tqdm
from langchain_ollama import ChatOllama
from langchain_community.graphs.neo4j_graph import Neo4jGraph
import random
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field
from langchain.output_parsers.fix import OutputFixingParser
from langchain.output_parsers.retry import RetryOutputParser
import collections
import pandas as pd
import numpy as np
import ast
import os
from enum import Enum
from langchain_core.output_parsers import JsonOutputParser
from langchain.output_parsers.retry import RetryOutputParser, OutputParserException
from langchain_core.prompts import PromptTemplate
from langchain_community.llms import Ollama
from langchain_core.prompt_values import StringPromptValue


In [4]:
####################
total = 250
num_members = [2,4,8]


num_items = 50  #### [5,10,25,50,100]
##################
## Datasets are named after num_members, num_items and total groups


file = f'groups/groups_{num_members}members_{num_items}items_totalgroups{total}.csv'

df = pd.read_csv(file)
df

Unnamed: 0,user_id,item_1,item_2,item_3,item_4,item_5,item_6,item_7,item_8,item_9,...,item_42,item_43,item_44,item_45,item_46,item_47,item_48,item_49,item_50,groupId
0,user_4856,3,1,2,1,5,1,2,4,8,...,6,5,4,2,5,5,2,2,4,751
1,user_58959,7,1,1,8,2,7,2,3,7,...,9,6,9,9,5,10,10,7,2,751
2,user_55428,7,5,3,9,7,7,1,3,7,...,7,3,7,4,2,2,10,9,10,751
3,user_76123,7,3,4,7,10,8,3,3,2,...,1,2,4,9,5,10,4,3,3,751
4,user_63417,1,6,10,3,7,4,1,4,2,...,2,1,4,4,9,4,8,7,6,752
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1155,user_70989,4,6,4,1,4,1,5,8,8,...,6,5,4,1,4,5,10,10,4,999
1156,user_10003,6,4,1,10,2,2,10,4,9,...,3,5,8,9,1,6,3,8,7,999
1157,user_15895,7,5,3,7,4,9,3,3,4,...,3,3,6,5,6,5,9,10,10,999
1158,user_60908,6,3,4,5,1,9,1,7,6,...,8,6,9,9,5,2,5,9,1,1000


In [105]:
## Helper functions:

def transform_df(df):
    df_long = df.melt(id_vars=['groupId', 'user_id'], var_name='item', value_name='rating')
    return df_long

def listmaker(value):
    if isinstance(value, str):
        try:
            value = [value]
        except (ValueError, SyntaxError):
            print(f"ERROR MAKING A LIST?!")
            return []  
    return value if isinstance(value, list) else []  


## Social choice-based aggregation strategies:

def MAJ(df):
    counts = df.groupby(['groupId', 'item']).size().reset_index(name='count')
    return list(counts.loc[counts['count'].values ==counts['count'].values.max()].item)

def ADD(df):
    counts = df.groupby(['groupId', 'item'])['rating'].sum().reset_index(name='sum_rating')
    return list(counts.loc[counts['sum_rating'].values ==counts['sum_rating'].values.max()].item)
    
def APP(df, threshold=6):
    above_threshold = df[df['rating'] > threshold]
    counts = above_threshold.groupby(['groupId', 'item']).size().reset_index(name='count_above_threshold')
    return list(counts.loc[counts['count_above_threshold'].values ==counts['count_above_threshold'].values.max()].item)

def LMS(df):
    counts = df.groupby(['groupId', 'item'])['rating'].min().reset_index(name='min_rating')
    return list(counts.loc[counts['min_rating'].values ==counts['min_rating'].values.max()].item)

def MPL(df):
    counts = df.groupby(['groupId', 'item'])['rating'].max().reset_index(name='max_rating')
    
    return list(counts.loc[counts['max_rating'].values ==counts['max_rating'].values.max()].item)

In [14]:
## Descriptions taken from earlier work by Francesco + 2 item example

strat_list = [
'ADD: ADD sums all ratings per item and recommend the item with the highest sum (Senot et al. 2010). For example, the first item has a 4 rating and 5 rating (sum=9). The second item has a 6 and 7 rating (sum=13). Recommend the second item because its sum is higher than the sum of the first item. Use ADD to refer to this strategy.',
'APP: APP is a majority-based strategy. A predefined threshold is set at 6. For each item, you count the number of times it has been rated above 6. Recommend the item which has been rated above the threshold the most. For example, the first item has a rating of 7 and 8. The second item has a rating of 9 and 5. Recommend the first item because it has more ratings above 6. Use APP to refer to this strategy.',
'LMS: LMS recommends the item which has the highest rating if you only take the lowest rating per item into account (Senot et al. 2010). For example, the first item has a rating of 5 and 6. The second item has a rating of 2 and 9. Recommend the first item because its lowest rating (5) is higher than the lowest rating of the second item (2). Use LMS to refer to this strategy.',
'MPL: MPL recommends the item with the highest single rating across all relevant individuals (Senot et al. 2010). For example, the first item has a rating of 5 and 6. The second item has a rating of 2 and 9. Recommend the second item because 9 is the highest rating across all items. Use MPL to refer to this strategy.']

In [11]:
#### FULL INITIALIZATION OF ALL LLM CHAINS
## A llm chain consists of three parts 1) prompt 2) model 3) parser


#### 1) Prompt + parser
class Recommendation(BaseModel):
    strategy: str = Field(description="The aggregation strategy that was used")
    recommendation: str = Field(description="python list of the final group recommendation")

parser = JsonOutputParser(pydantic_object=Recommendation)

prompt = PromptTemplate(
    template="""
    Only respond with the required json format. 
    The only response is a json dictionary with the strategy and recommendation keys. Follow these formatting instructions:
    \n
    {format_instructions}\n 

    #######
    
    You are an expert in making group recommendations based on a table of ratings presented below. 
    That information includes users (user_ids) and information on which items they like (item_x). The rating is a scale from 0 to 10. 
    You recommend an item to the group. For the recommendation, you simply mention the item name. 

    The table is found below:
    
    ## begin group table ##
    {desc}
    ## end group table ##
    
    To obtain a group recommendation, you follow a social choice-based aggregation strategy. The strategy is explained (alongside a simple example) in the following excerpt:
    
    ## begin strategy excerpt ##
    {strat}
    ## end strategy excerpt ##
   
    Based on the described group composition, you strictly apply the procedure of the aggregation strategy on the group table to obtain a group recommendation. Do not write python code.
    
    If multiple items have the same end score, recommend them both in the form of a list. If there is a tie, recommend the set of items in the tie. Refer to items using their name (item_value)
    

    
     If you do not know the answer, respond with an empty list in the recommendation key. Follow the instructions:
    ## begin instructions ##
    \n
    {format_instructions}\n 
    ## end instructions ##

    Only respond with the requested JSON dictionary which includes the recommendation and strategy keys.
    """ ,
    input_variables=["desc", "strat"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)







#### 2) MODELS
llm_llama = ChatOllama(model='llama3.1:8b-instruct-q8_0', temperature=0, max_tokens=1000,seed=1234) 
llm_mistral = ChatOllama(model='mistral:instruct', temperature=0, max_tokens=1000,seed=1234) 
llm_gemma = ChatOllama(model='gemma2', temperature=0, max_tokens=1000,seed=1234) 
llm_phi = ChatOllama(model='phi4', temperature=0, max_tokens=1000,seed=1234) 



## full chains

chain_llama = prompt | llm_llama | parser
chain_mistral = prompt | llm_mistral | parser
chain_gemma = prompt | llm_gemma | parser
chain_phi = prompt | llm_phi| parser

In [None]:
random.seed(123456) 
np.random.seed(123456)


result_file = f'YOUR FILE NAME'
result_exists = os.path.isfile(result_file)

###############################

if result_exists == True:
    done = pd.read_csv(result_file)
    counter = done['groupId'].max() +1
    max_counter = df['groupId'].max()

else:
    counter = df['groupId'].min()
    max_counter = df['groupId'].max()
    #max_counter = df['groupId'].min() + 3
###############################

print(counter)

strategy_map = {
    'ADD': ADD,
    'APP': APP,
    'LMS': LMS,
    'MPL': MPL
}



while counter <= max_counter:
    df_members = df.loc[df['groupId'] == counter]

    chosen_strat = random.choice(strat_list)
    try:
        
        responses = {
            "llama": chain_llama.invoke({"desc": df_members.iloc[:, :-1].to_dict(orient='list'),'strat':chosen_strat}),
            "mistral": chain_mistral.invoke({"desc": df_members.iloc[:, :-1].to_dict(orient='list'),'strat':chosen_strat}),
            "gemma": chain_gemma.invoke({"desc": df_members.iloc[:, :-1].to_dict(orient='list'),'strat':chosen_strat}),
            "deepseek": chain_deepseek.invoke({"desc": df_members,'strat':chosen_strat}),
            "phi": chain_phi.invoke({"desc": df_members.iloc[:, :-1].to_dict(orient='list'),'strat':chosen_strat}),
        }

        if responses['phi']['strategy'] in strategy_map:
            final_strat = responses['phi']['strategy']
            gold_label = strategy_map.get(responses['phi']['strategy'])(transform_df(df_members))
        elif responses['mistral']['strategy'] in strategy_map:
            final_strat = responses['mistral']['strategy']
            gold_label = strategy_map.get(responses['mistral']['strategy'])(transform_df(df_members)) ### sometimes a formatting error can occur.
        else:
            counter+=1
            print('fail', counter)
            continue

        recs = {key: listmaker(res["recommendation"]) for key, res in responses.items()}

        df_temp = pd.DataFrame([{
                "groupId": counter,
                "group_size":len(df_members),
                "num_items": int(len(list(df_members))-2), ## num columns - user_id column - groupid column
                "strategy": final_strat,
                "gold_label": gold_label,
                **{f"{key}": vec for key, vec in recs.items()} 
            }])

        df_temp.to_csv(result_file, mode='a', index=False, header=not result_exists)
        result_exists = True


    except Exception as e:
        print(f"Error processing group {counter}: {e}")
    counter +=1


    #break


In [54]:
### EXPLANATIONS ( LOOP ABOVE WAS USED TO RUN CODE) #########
### Few shot uses own loop ###


In [6]:
#######################
##### PROMPT WITH EXPLANATIONS
#############


#### 1) Prompt + parser
class Recommendation(BaseModel):
    strategy: str = Field(description="The aggregation strategy that was used")
    recommendation: str = Field(description="python list of the final group recommendation")
    explanation: str = Field(description="a short explanation detailing the recommendation procedure")

parser = JsonOutputParser(pydantic_object=Recommendation)

prompt = PromptTemplate(
    template="""
    Only respond with the required json format. 
    The only response is a json dictionary with the strategy, recommendation and explanation keys. Follow these formatting instructions:
    \n
    {format_instructions}\n 

    #######
    
    You are an expert in making and explaining group recommendations based on a table of ratings presented below. 
    That information includes users (user_ids) and information on which items they like (item_x). The rating is a scale from 0 to 10. 
 You recommend an item to the group. For the recommendation, you simply mention the item name. 

    The table is found below:
    
    ## begin group table ##
    {desc}
    ## end group table ##
    
    To obtain a group recommendation, you follow a social choice-based aggregation strategy. The strategy is explained (alongside a simple example) in the following excerpt:
    
    ## begin strategy excerpt ##
    {strat}
    ## end strategy excerpt ##
  

    Based on the described group composition, you strictly apply the procedure of the aggregation strategy on the group table to obtain a group recommendation. Do not write python code.
    
    If multiple items have the same end score, recommend them both in the form of a list. If there is a tie, recommend the set of items in the tie. Refer to items using their name (item_value)
    Provide a short explanation detailing how you derived the recommendation. Explain to the group how the strategy works and why the output is being recommended to them.
    
    

   Only respond with the requested JSON dictionary which includes the recommendation, strategy and explanaiton keys.
    ## begin instructions ##
    \n
    {format_instructions}\n 
    ## end instructions ##
    """ ,
    input_variables=["desc", "strat"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)





#### 2) MODELS
llm_llama = ChatOllama(model='llama3.1:8b-instruct-q8_0', temperature=0, max_tokens=1000,seed=1234) 
llm_mistral = ChatOllama(model='mistral:instruct', temperature=0, max_tokens=1000,seed=1234) 
llm_gemma = ChatOllama(model='gemma2', temperature=0, max_tokens=1000,seed=1234) 
llm_phi = ChatOllama(model='phi4', temperature=0, max_tokens=1000,seed=1234) 



## full chains

chain_llama = prompt | llm_llama | parser
chain_mistral = prompt | llm_mistral | parser
chain_gemma = prompt | llm_gemma | parser
chain_phi = prompt | llm_phi| parser

In [33]:
chain_phi.invoke({"desc": df[df.groupId==501], 'strat': strat_list[0]})

{'strategy': 'ADD',
 'recommendation': ['item_7'],
 'explanation': "The ADD strategy sums all ratings per item and recommends the item with the highest sum. In this case, 'item_7' has the highest total score of 30 across all users in the group, making it the top recommendation."}

In [None]:
##### FEW SHOT #########

In [21]:
fs= pd.read_csv('group_data/fewshot-examples.csv')
fs_list = list(set(fs.groupId))
fs_list

[1501, 1502, 1503]

In [29]:
#### FULL INITIALIZATION OF ALL LLM CHAINS
## A llm chain consists of three parts 1) prompt 2) model 3) parser


#### 1) Prompt + parser
class Recommendation(BaseModel):
    strategy: str = Field(description="The aggregation strategy that was used")
    recommendation: str = Field(description="python list of the final group recommendation")

parser = JsonOutputParser(pydantic_object=Recommendation)

prompt = PromptTemplate(
    template="""
    Only respond with the required json format. 
    The only response is a json dictionary with the strategy and recommendation keys. Follow these formatting instructions:
    \n
    {format_instructions}\n 

    #######
    
    You are an expert in making group recommendations based on a table of ratings presented below. 
    That information includes users (user_ids) and information on which items they like (item_x). The rating is a scale from 0 to 10. 
    You recommend an item to the group. For the recommendation, you simply mention the item name. 

    To obtain a group recommendation, you follow a social choice-based aggregation strategy. 
   
    
     
    To showcase how to apply the strategy, you are provided with these examples.
    If the input would be {example1in}, the recommendation would be {example1out}.
    If the input would be {example2in}, the recommendation would be {example2out}.
    If the input would be {example3in}, the recommendation would be {example3out}.

    The strategy is explained (alongside another simple example) in the following excerpt:
    
    ## begin strategy excerpt ##
    {strat}
    ## end strategy excerpt ##
    Based on the described group composition, you strictly apply the procedure of the aggregation strategy on the group table to obtain a group recommendation. Do not write python code.
    If multiple items have the same end score, recommend them both in the form of a list. If there is a tie, recommend the set of items in the tie. Refer to items using their name (item_value)
    Apply the strategy described in the strategy excerpt and showcases in the three examples on the following group table.
    
    ## begin group table ##
    {desc}
    ## end group table ##
    

    Do not write python code. Only respond with the requested JSON dictionary which includes the recommendation (items) and strategy keys.
    ## begin instructions ##
    \n
    {format_instructions}\n 
    ## end instructions ##
    """ ,
    input_variables=["desc", "strat", 'example1in', 'example1out', 'example2in', 'example2out', 'example3in', 'example3out'],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)





llm_phi = ChatOllama(model='phi4', temperature=0, max_tokens=1000,seed=1234) 

## full chains
chain_phi = prompt | llm_phi| parser


In [30]:
random_index = random.choice([2])
chosen_strat = strat_list[random_index]
strat_func = [ADD, APP, LMS, MPL]


selected_func = strat_func[random_index]

chain_phi.invoke({
    "desc": df[df.groupId == 850].iloc[:, :-1].to_dict(orient='list'),
    "strat": chosen_strat,
    "example1in": fs[fs.groupId == fs_list[0]].iloc[:, :-1].to_dict(orient='list'),
    "example1out": selected_func(transform_df(fs[fs.groupId == fs_list[0]])),
    "example2in": fs[fs.groupId == fs_list[1]].iloc[:, :-1].to_dict(orient='list'),
    "example2out": selected_func(transform_df(fs[fs.groupId == fs_list[1]])),
    "example3in": fs[fs.groupId == fs_list[2]].iloc[:, :-1].to_dict(orient='list'),
    "example3out": selected_func(transform_df(fs[fs.groupId == fs_list[2]]))
})

{'strategy': 'Average Score Strategy',
 'recommendation': ['item_37', 'item_42', 'item_47', 'item_46', 'item_40']}

In [28]:
LMS(transform_df(df[df.groupId==850]))

['item_14', 'item_33', 'item_38', 'item_42']

In [None]:
random.seed(123456) 
np.random.seed(123456)


result_file = f'results/all_results-fs-mistral.csv'
result_exists = os.path.isfile(result_file)

###############################

if result_exists == True:
    done = pd.read_csv(result_file)
    counter = done['groupId'].max() +1
    max_counter = df['groupId'].max()

else:
    counter = df['groupId'].min()
    max_counter = df['groupId'].max()
    #max_counter = df['groupId'].min() + 3
###############################

print(counter)

strategy_map = {
    'ADD': ADD,
    'APP': APP,
    'LMS': LMS,
    'MPL': MPL
}



while counter <= max_counter:
    df_members = df.loc[df['groupId'] == counter]

    random_index = random.choice([0,1,2,3])
    chosen_strat = strat_list[random_index]
    strat_func = [ADD, APP, LMS, MPL]
    strat_str = ['ADD', 'APP', 'LMS', 'MPL']
    selected_func = strat_func[random_index]


    try:
        
        responses = {'phi':chain_phi.invoke({
                        "desc":df_members.iloc[:, :-1].to_dict(orient='list'),
                        "strat": chosen_strat,
                        "example1in": fs[fs.groupId == fs_list[0]].iloc[:, :-1].to_dict(orient='list'),
                        "example1out": selected_func(transform_df(fs[fs.groupId == fs_list[0]])),
                        "example2in": fs[fs.groupId == fs_list[1]].iloc[:, :-1].to_dict(orient='list'),
                        "example2out": selected_func(transform_df(fs[fs.groupId == fs_list[1]])),
                        "example3in": fs[fs.groupId == fs_list[2]].iloc[:, :-1].to_dict(orient='list'),
                        "example3out": selected_func(transform_df(fs[fs.groupId == fs_list[2]]))
        })
        }

       
        final_strat = strat_str[random_index]
        gold_label = selected_func(transform_df(df_members))
        #else:
            #final_strat = responses['phi']['strategy']
            #gold_label = strategy_map.get(responses['phi']['strategy'])(transform_df(df_members)) ### sometimes a formatting error can occur.
      

        recs = {key: listmaker(res["recommendation"]) for key, res in responses.items()}

        df_temp = pd.DataFrame([{
                "groupId": counter,
                "group_size":len(df_members),
                "num_items": int(len(list(df_members))-2), ## num columns - user_id column - groupid column
                "strategy": final_strat,
                "gold_label": gold_label,
                **{f"{key}": vec for key, vec in recs.items()} 
            }])

        df_temp.to_csv(result_file, mode='a', index=False, header=not result_exists)
        result_exists = True


    except Exception as e:
        print(f"Error processing group {counter}: {e}")
    counter +=1


    #break


In [41]:
df_members = df.loc[df['groupId'] == 751]

random_index = random.choice([0,1,2,3])
chosen_strat = strat_list[random_index]
strat_func = [ADD, APP, LMS, MPL]
strat_str = ['ADD', 'APP', 'LMS', 'MPL']
selected_func = strat_func[random_index]


responses = {
            'phi':chain_phi.invoke({
                        "desc":df_members.iloc[:, :-1].to_dict(orient='list'),
                        "strat": chosen_strat,
                        "example1in": fs[fs.groupId == fs_list[0]].iloc[:, :-1].to_dict(orient='list'),
                        "example1out": selected_func(transform_df(fs[fs.groupId == fs_list[0]])),
                        "example2in": fs[fs.groupId == fs_list[1]].iloc[:, :-1].to_dict(orient='list'),
                        "example2out": selected_func(transform_df(fs[fs.groupId == fs_list[1]])),
                        "example3in": fs[fs.groupId == fs_list[2]].iloc[:, :-1].to_dict(orient='list'),
                        "example3out": selected_func(transform_df(fs[fs.groupId == fs_list[2]]))
        })
}


In [None]:
#### BASELINE #####

In [25]:
#### FULL INITIALIZATION OF ALL LLM CHAINS
## A llm chain consists of three parts 1) prompt 2) model 3) parser


#### 1) Prompt + parser
class Recommendation(BaseModel):
    strategy: str = Field(description="The aggregation strategy that was used")
    recommendation: str = Field(description="python list of the final group recommendation")

parser = JsonOutputParser(pydantic_object=Recommendation)

prompt = PromptTemplate(
    template="""
    Only respond with the required json format. 
    The only response is a json dictionary with the strategy and recommendation keys. Follow these formatting instructions:
    \n
    {format_instructions}\n 

    #######
    
    You are an expert in making group recommendations based on a table of ratings presented below. 
    That information includes users (user_ids) and information on which items they like (item_x). The rating is a scale from 0 to 10. 
    You recommend an item to the group. For the recommendation, you simply mention the item name. 

    The table is found below:
    
    ## begin group table ##
    {desc}
    ## end group table ##
    
    To obtain a group recommendation, you follow a social choice-based aggregation strategy. The strategy is explained (alongside a simple example) in the following excerpt:
    
    ## begin strategy excerpt ##
    {strat}
    ## end strategy excerpt ##
   
    Based on the described group composition, you strictly apply the procedure of the aggregation strategy on the group table to obtain a group recommendation. Do not write python code.
    
    If multiple items have the same end score, recommend them both in the form of a list. If there is a tie, recommend the set of items in the tie. Refer to items using their name (item_value)
    

    
     If you do not know the answer, respond with an empty list in the recommendation key. 
         
    Follow the instructions:
    ## begin instructions ##
    \n
    {format_instructions}\n 
    ## end instructions ##
    Only respond with the requested JSON dictionary which includes the recommendation and strategy keys.
    """ ,
    input_variables=["desc", "strat"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)



llm_phi = ChatOllama(model='phi4', temperature=0, max_tokens=1000,seed=1234) 
chain_phi = prompt | llm_phi| parser

In [None]:
####### BASELINE RUN FOR ONLY PART 2 ####
random.seed(123456) 
np.random.seed(123456)


result_file = f'results/all_results-baseline.csv'
result_exists = os.path.isfile(result_file)

###############################

if result_exists == True:
    done = pd.read_csv(result_file)
    counter = done['groupId'].max() +1
    max_counter = df['groupId'].max()

else:
    counter = df['groupId'].min()
    max_counter = df['groupId'].max()
    #max_counter = df['groupId'].min() + 3
###############################

print(counter)

strategy_map = {
    'ADD': ADD,
    'APP': APP,
    'LMS': LMS,
    'MPL': MPL
}



while counter <= max_counter:
    df_members = df.loc[df['groupId'] == counter]

    random_index = random.choice([0,1,2,3])
    chosen_strat = strat_list[random_index]
    strat_func = [ADD, APP, LMS, MPL]
    strat_str = ['ADD', 'APP', 'LMS', 'MPL']
    selected_func = strat_func[random_index]
    try:
        
        responses = {

            "phi": chain_phi.invoke({"desc": df_members.iloc[:, :-1].to_dict(orient='list'),'strat':chosen_strat}),
        }
        final_strat = strat_str[random_index]
        gold_label = selected_func(transform_df(df_members))

        recs = {key: listmaker(res["recommendation"]) for key, res in responses.items()}

        df_temp = pd.DataFrame([{
                "groupId": counter,
                "group_size":len(df_members),
                "num_items": int(len(list(df_members))-2), ## num columns - user_id column - groupid column
                "strategy": final_strat,
                "gold_label": gold_label,
                **{f"{key}": vec for key, vec in recs.items()} 
            }])

        df_temp.to_csv(result_file, mode='a', index=False, header=not result_exists)
        result_exists = True


    except Exception as e:
        print(f"Error processing group {counter}: {e}")
    counter +=1


    #break


In [None]:
####### WITH CONTEXT #######

In [13]:
####### CONTEXT ###########
movies = pd.read_csv('group_data/movies.csv')
movies = list(movies.title)
titles = random.sample(movies,50)
len(titles)

50

In [14]:
#### FULL INITIALIZATION OF ALL LLM CHAINS
## A llm chain consists of three parts 1) prompt 2) model 3) parser


#### 1) Prompt + parser
class Recommendation(BaseModel):
    strategy: str = Field(description="The aggregation strategy that was used")
    recommendation: str = Field(description="python list of the final group recommendation")

parser = JsonOutputParser(pydantic_object=Recommendation)

prompt = PromptTemplate(
    template="""
    Only respond with the required json format. 
    The only response is a json dictionary with the strategy and recommendation keys. Follow these formatting instructions:
    \n
    {format_instructions}\n 

    #######
    
    You are an expert in making group recommendations based on a table of ratings presented below. 
    That information includes users (user_ids) and information on which movies they like (strings with movie titles). The rating is a scale from 0 to 10. 
    You recommend a movie to the group. For the recommendation, you simply mention the movie title. 

    The table is found below:
    
    ## begin group table ##
    {desc}
    ## end group table ##
    
    To obtain a group recommendation, you follow a social choice-based aggregation strategy. The strategy is explained (alongside a simple example) in the following excerpt:
    
    ## begin strategy excerpt ##
    {strat}
    ## end strategy excerpt ##
   
    Based on the described group composition, you strictly apply the procedure of the aggregation strategy on the group table to obtain a group recommendation. Do not write python code.
    
    If multiple movies have the same end score, recommend them both in the form of a list. If there is a tie, recommend the set of movies in the tie. Refer to movies using their full title.
    

    
     If you do not know the answer, respond with an empty list in the recommendation key. 
         
    Follow the instructions:
    ## begin instructions ##
    \n
    {format_instructions}\n 
    ## end instructions ##
    Only respond with the requested JSON dictionary which includes the recommendation and strategy keys.
    """ ,
    input_variables=["desc", "strat"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)



llm_phi = ChatOllama(model='phi4', temperature=0, max_tokens=1000,seed=1234) 
chain_phi = prompt | llm_phi| parser

In [23]:
cols = ['user_id']
cols.extend(random.sample(titles, int(len(list(df_members))-2)))
cols.extend(['groupId'])
df_members.columns = cols


In [None]:
####### BASELINE RUN FOR ONLY PART 2 ####
random.seed(123456) 
np.random.seed(123456)


result_file = f'results/all_results-context.csv'
result_exists = os.path.isfile(result_file)

###############################

if result_exists == True:
    done = pd.read_csv(result_file)
    counter = done['groupId'].max() +1
    max_counter = df['groupId'].max()

else:
    counter = df['groupId'].min()
    max_counter = df['groupId'].max()
    #max_counter = df['groupId'].min() + 3
###############################

print(counter)

strategy_map = {
    'ADD': ADD,
    'APP': APP,
    'LMS': LMS,
    'MPL': MPL
}



while counter <= max_counter:
    df_members = df.loc[df['groupId'] == counter]
    
    # setting movie titles as columns instead of item_x
    cols = ['user_id']
    cols.extend(random.sample(titles, int(len(list(df_members))-2)))
    cols.extend(['groupId'])
    df_members.columns = cols


    random_index = random.choice([0,1,2,3])
    chosen_strat = strat_list[random_index]
    strat_func = [ADD, APP, LMS, MPL]
    strat_str = ['ADD', 'APP', 'LMS', 'MPL']
    selected_func = strat_func[random_index]
    try:
        
        responses = {
            "phi": chain_phi.invoke({"desc": df_members.iloc[:, :-1].to_dict(orient='list'),'strat':chosen_strat}),
        }
        final_strat = strat_str[random_index]
        gold_label = selected_func(transform_df(df_members))

        recs = {key: listmaker(res["recommendation"]) for key, res in responses.items()}

        df_temp = pd.DataFrame([{
                "groupId": counter,
                "group_size":len(df_members),
                "num_items": int(len(list(df_members))-2), ## num columns - user_id column - groupid column
                "strategy": final_strat,
                "gold_label": gold_label,
                **{f"{key}": vec for key, vec in recs.items()} 
            }])

        df_temp.to_csv(result_file, mode='a', index=False, header=not result_exists)
        result_exists = True


    except Exception as e:
        print(f"Error processing group {counter}: {e}")
    counter +=1


    #break


In [32]:
#### TEST CASE: DATA FORMATTING

#### FULL INITIALIZATION OF ALL LLM CHAINS
## A llm chain consists of three parts 1) prompt 2) model 3) parser


#### 1) Prompt + parser
class Recommendation(BaseModel):
    strategy: str = Field(description="The aggregation strategy that was used")
    recommendation: str = Field(description="python list of the final group recommendation")

parser = JsonOutputParser(pydantic_object=Recommendation)

prompt = PromptTemplate(
    template="""
    Only respond with the required json format. 
    The only response is a json dictionary with the strategy and recommendation keys. Follow these formatting instructions:
    \n
    {format_instructions}\n 

    #######
    
    You are an expert in making group recommendations based on a table of ratings presented below. 
    That information includes users (user_ids) and information on which items they like (item_x). The rating is a scale from 0 to 10. 
    You recommend an item to the group. For the recommendation, you simply mention the item name. 

    The table is found below:
    
    ## begin group table ##
    {desc}
    ## end group table ##
    
    To obtain a group recommendation, you follow a social choice-based aggregation strategy. The strategy is explained (alongside a simple example) in the following excerpt:
    
    ## begin strategy excerpt ##
    {strat}
    ## end strategy excerpt ##
   
    Based on the described group composition, you strictly apply the procedure of the aggregation strategy on the group table to obtain a group recommendation. Do not write python code.
    
    If multiple items have the same end score, recommend them both in the form of a list. If there is a tie, recommend the set of items in the tie. Refer to items using their name (item_value)
    

    
     If you do not know the answer, respond with an empty list in the recommendation key. Follow the instructions:
    ## begin instructions ##
    \n
    {format_instructions}\n 
    ## end instructions ##

    Only respond with the requested JSON dictionary which includes the recommendation and strategy keys.
    """ ,
    input_variables=["desc", "strat"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)







#### 2) MODELS

llm_phi = ChatOllama(model='phi4', temperature=0, max_tokens=1000,seed=1234) 



## full chains

chain_phi = prompt | llm_phi| parser

In [56]:
## Adjusted functions for ranking

def ADD(df):
    counts = df.groupby(['groupId', 'item'])['rating'].sum().reset_index(name='sum_rating')
    return list(counts.sort_values(by='sum_rating', ascending=False)['item'].head(10))
    

def APP(df, threshold=6):
    above_threshold = df[df['rating'] > threshold]
    counts = above_threshold.groupby(['groupId', 'item']).size().reset_index(name='count_above_threshold')
    
    all_items = df['item'].unique()
    all_groups = df['groupId'].unique()
    full_index = pd.MultiIndex.from_product([all_groups, all_items], names=['groupId', 'item'])
    
    counts = counts.set_index(['groupId', 'item']).reindex(full_index, fill_value=0).reset_index()
    
    return list(counts.sort_values(by='count_above_threshold', ascending=False)['item'].head(10))

def LMS(df):
    counts = df.groupby(['groupId', 'item'])['rating'].min().reset_index(name='min_rating')
    return list(counts.sort_values(by='min_rating', ascending=False)['item'].head(10))

def MPL(df):
    counts = df.groupby(['groupId', 'item'])['rating'].max().reset_index(name='max_rating')
    
    return list(counts.sort_values(by='max_rating', ascending=False)['item'].head(10))

In [None]:
random.seed(123456) 
np.random.seed(123456)


result_file = f'results/all_results-asrecords.csv'
result_exists = os.path.isfile(result_file)

###############################

if result_exists == True:
    done = pd.read_csv(result_file)
    counter = done['groupId'].max() +1
    max_counter = df['groupId'].max()

else:
    counter = df['groupId'].min()
    max_counter = df['groupId'].max()
    #max_counter = df['groupId'].min() + 3
###############################

print(counter)

strategy_map = {
    'ADD': ADD,
    'APP': APP,
    'LMS': LMS,
    'MPL': MPL
}



while counter <= max_counter:
    df_members = df.loc[df['groupId'] == counter]

    random_index = random.choice([0,1,2,3])
    chosen_strat = strat_list[random_index]
    strat_func = [ADD, APP, LMS, MPL]
    strat_str = ['ADD', 'APP', 'LMS', 'MPL']
    selected_func = strat_func[random_index]
    try:
        
        responses = {
            "phi": chain_phi.invoke({"desc": df_members.iloc[:, :-1].to_dict(orient='records'),'strat':chosen_strat}),
        }

        if responses['phi']['strategy'] in strategy_map:
            final_strat = responses['phi']['strategy']
            gold_label = strategy_map.get(responses['phi']['strategy'])(transform_df(df_members))
        #else:
            #final_strat = responses['phi']['strategy']
            #gold_label = strategy_map.get(responses['phi']['strategy'])(transform_df(df_members)) ### sometimes a formatting error can occur.
        else:
            counter+=1
            print('fail', counter)
            continue

        recs = {key: listmaker(res["recommendation"]) for key, res in responses.items()}

        df_temp = pd.DataFrame([{
                "groupId": counter,
                "group_size":len(df_members),
                "num_items": int(len(list(df_members))-2), ## num columns - user_id column - groupid column
                "strategy": final_strat,
                "gold_label": gold_label,
                **{f"{key}": vec for key, vec in recs.items()} 
            }])

        df_temp.to_csv(result_file, mode='a', index=False, header=not result_exists)
        result_exists = True


    except Exception as e:
        print(f"Error processing group {counter}: {e}")
    counter +=1


    #break


In [55]:
#### RANKING #######

In [91]:
#### FULL INITIALIZATION OF ALL LLM CHAINS
## A llm chain consists of three parts 1) prompt 2) model 3) parser


#### 1) Prompt + parser
class Recommendation(BaseModel):
    recommendation: list = Field(description="python list of the final group recommendation")
    strategy: str = Field(description="The aggregation strategy that was used")


parser = JsonOutputParser(pydantic_object=Recommendation)

prompt = PromptTemplate(
    template="""
    Only respond with the required json format. Only respond with the recommendation and strategy keys.
    \n
    {format_instructions}\n 

    #######
    
    You are an expert in making group recommendations based on a table of ratings presented below. 
    That information includes users (user_ids) and information on which items they like (item_x). The rating is a scale from 0 to 10. 
    You recommend 10 items to the group. 

    The table is found below:
    
    ## begin group table ##
    {desc}
    ## end group table ##
    
    To obtain a group recommendation, you follow a social choice-based aggregation strategy. The strategy is explained (alongside a simple example) in the following excerpt:
    
    ## begin strategy excerpt ##
    {strat}
    ## end strategy excerpt ##
   
    Based on the described group composition, you strictly apply the procedure of the aggregation strategy. Do not write python code.
    
    You make a recommendation to this group of users by providing the top 10 items. You reply with the top 10. 
    Your recommendation contains exactly 10 items and is formatted as a python list containing strings. Refer to items using their name (item_value)
    
    Provide your answer strictly as a JSON object with the following format:
{{
  "recommendation": {{["item","item","item","item","item","item","item","item","item","item"]}},
  "strategy": "{{the strategy that was used}}"
}}

    """ ,
    input_variables=["desc", "strat"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)



llm_phi = ChatOllama(model='phi4', temperature=0, max_tokens=1000,seed=1234) 
chain_phi = prompt | llm_phi| parser
chain_mistral = prompt | llm_mistral | parser

In [None]:
random.seed(123456) 
np.random.seed(123456)


result_file = f'results/all_results-ranking.csv'
result_exists = os.path.isfile(result_file)

###############################

if result_exists == True:
    done = pd.read_csv(result_file)
    counter = done['groupId'].max() +1
    max_counter = df['groupId'].max()

else:
    counter = df['groupId'].min()
    max_counter = df['groupId'].max()
    #max_counter = df['groupId'].min() + 3
###############################

print(counter)

strategy_map = {
    'ADD': ADD,
    'APP': APP,
    'LMS': LMS,
    'MPL': MPL
}



while counter <= max_counter:
    df_members = df.loc[df['groupId'] == counter]

    random_index = random.choice([0,1,2,3])
    chosen_strat = strat_list[random_index]
    strat_func = [ADD, APP, LMS, MPL]
    strat_str = ['ADD', 'APP', 'LMS', 'MPL']
    selected_func = strat_func[random_index]
    try:
        
        responses = {
            "mistral": chain_mistral.invoke({"desc": df_members.iloc[:, :-1].to_dict(orient='list'),'strat':chosen_strat}),
            "phi": chain_phi.invoke({"desc": df_members.iloc[:, :-1].to_dict(orient='list'),'strat':chosen_strat}),
        }

        final_strat = strat_str[random_index]
        gold_label = selected_func(transform_df(df_members))

        recs = {key: listmaker(res["recommendation"]) for key, res in responses.items()}

        df_temp = pd.DataFrame([{
                "groupId": counter,
                "group_size":len(df_members),
                "num_items": int(len(list(df_members))-2), ## num columns - user_id column - groupid column
                "strategy": final_strat,
                "gold_label": gold_label,
                **{f"{key}": vec for key, vec in recs.items()} 
            }])

        df_temp.to_csv(result_file, mode='a', index=False, header=not result_exists)
        result_exists = True


    except Exception as e:
        print(f"Error processing group {counter}: {e}")
    counter +=1


    #break
