In [1]:
import json
from collections import defaultdict
import itertools
import re
import pandas as pd
from typing import List, Dict, Any

In [2]:
def read_file(file_name):
    with open(file_name, "r") as f:
        d = json.load(f)
    return d

In [3]:
llama_results_file = "../../../../data/conv-trs/llm-results/Llama3Point2Vision90B_generated_parsed_queries.json"
llama_results = read_file(llama_results_file)
print(len(llama_results))

2302


In [4]:
gemini_results_file = "../../../../data/conv-trs/llm-results/Gemini1Point5Pro_generated_queries.json"
gemini_results = read_file(gemini_results_file)
print(len(gemini_results))

2302


In [5]:
from collections import defaultdict
import itertools
import re
import pandas as pd
from typing import List, Dict


def organize_configs(configs: List[Dict]) -> Dict[str, List[Dict]]:
    """
    Organizes configs by p_id for easier access.
    """
    organized = defaultdict(list)
    for config in configs:
        p_id = config['config']['p_id']
        organized[p_id].append(config)
    return organized

def get_difficulty_index(config_id: str) -> str:
    """
    Extracts the difficulty level from config_id.
    """
    return config_id.split('_')[-1]

def get_popularity_level(config_id: str) -> str:
    """
    Extracts the popularity level from config_id.
    """
    parts = config_id.split('_')
    return parts[parts.index('pop') + 1]

def get_round_robin_configs(configs: List[Dict]) -> pd.DataFrame:
    """
    Selects configs in a round-robin fashion and returns a DataFrame.
    """
    # Organize configs by p_id
    organized_configs = organize_configs(configs)
    
    # Get all unique p_ids in natural order
    p_ids = sorted(organized_configs.keys(), key=lambda x: int(re.search(r'\d+', x).group()))
    if not p_ids:
        return pd.DataFrame()
    
    # Get all possible difficulties in order
    difficulties = ['easy', 'medium', 'hard', 'sustainable']
    popularities = ['low', 'medium', 'high']
    
    # Initialize result list
    result = []
    
    # Round-robin over p_ids while repeating filter types in order
    p_id_cycle = itertools.cycle(p_ids)  # Cycle through p_ids
    difficulty_cycle = itertools.cycle(difficulties)  # Cycle through difficulties
    popularity_cycle = itertools.cycle(popularities) # Cycle through popularities

    while len(result) < 200:
        p_id = next(p_id_cycle)  # Pick the next p_id
        difficulty = next(difficulty_cycle)  # Pick the next difficulty
        popularity = next(popularity_cycle)  # Pick the next popularity

        # Find a matching config for (p_id, difficulty, popularity)
        matching_config = next(
            (cfg for cfg in organized_configs[p_id] 
             if get_difficulty_index(cfg['config_id']) == difficulty 
             and get_popularity_level(cfg['config_id']) == popularity),
            None
        )

        if matching_config:
            result.append(matching_config)
    # Convert to DataFrame
    df_data = []
    for idx, config in enumerate(result):
        df_data.append({
            'config_id': config['config_id'],
            'persona_id': config['config']['p_id'],
            'persona': config['config']['persona'],
            'filters': config['config']["filters"],
            'context': config["context"],
            'city': config["city"],
            'query_v': config["query_v"],
            'query_p0': config["query_p0"],
            'query_p1': config["query_p1"],
        })
    
    df = pd.DataFrame(df_data)
    return df

In [6]:
# Generate configs and get the round-robin selection
llama_df = get_round_robin_configs(llama_results)
llama_df.to_csv("../../../../data/conv-trs/eval/human-eval/llama/llama_results_subset.csv", index=False)

In [7]:
gemini_df = get_round_robin_configs(gemini_results)
gemini_df.to_csv("../../../../data/conv-trs/eval/human-eval/gemini/gemini_results_subset.csv", index=False)

In [8]:
gemini_df

Unnamed: 0,config_id,persona_id,persona,filters,context,city,query_v,query_p0,query_p1
0,c_p_0_pop_low_easy,p_0,A top-scoring player in the local league who i...,"{'popularity': 'low', 'month': 'February'}",Adana has low popularity. Adana has low season...,"[Adana, Adiyaman, Agri, Arad, Arkhangelsk, Bac...",Cheap European city break in February.\n,"European city break in February, less crowded ...",Best European cities for intense physical trai...
1,c_p_1_pop_medium_medium,p_1,A former DJ at WSUM who is now working as a mu...,"{'popularity': 'medium', 'budget': 'medium', '...",Coimbra has medium popularity and medium budge...,"[Coimbra, Brno, Braga]",Medium budget European city break with parks a...,"European city break, medium budget, good parks...","Best European cities for live music, especiall..."
2,c_p_2_pop_high_hard,p_2,A fellow agent-turned-author who shares the sa...,"{'popularity': 'high', 'budget': 'low', 'inter...",Zagreb has high popularity and low budget. Zag...,"[Zagreb, Volgograd, Tirana, Tbilisi, Sofia, Sa...",Cheap January city break in Europe with museum...,European city break in January. Budget friendl...,Where can I find inspiring European cities wit...
3,c_p_3_pop_low_sustainable,p_3,a film critic who dislikes storylines involvin...,"{'popularity': 'low', 'interests': 'Outdoors &...",Van has low popularity. Van has low season in ...,"[Van, Uzhhorod, Trabzon, Thessaloniki, Sivas, ...",Cheap European city break in January with inte...,European city break in January. Low-budget des...,"Best European cities for unique, artistic expe..."
4,c_p_4_pop_medium_easy,p_4,A biology major conducting research on equine ...,"{'popularity': 'medium', 'budget': 'high'}",Aalborg has medium popularity and high budget....,"[Aalborg, Astrakhan, Bari, Bremen, Cheboksary,...",Suggest some moderately popular European citie...,High-budget European city with equestrian attr...,European cities with renowned veterinary or ag...
...,...,...,...,...,...,...,...,...,...
195,c_p_8_pop_medium_easy,p_8,A junior Flash developer seeking guidance on c...,"{'popularity': 'medium', 'interests': 'Outdoor...",Zaragoza has medium popularity. In Zaragoza yo...,"[Zaragoza, Varna, Turku, Tampere, Szczecin, St...",Good places to swim outdoors in Europe\n,Good places for a Flash developer to visit in ...,Best European cities for tech meetups & cowork...
196,c_p_9_pop_high_medium,p_9,An avid gamer who has played Sins of a Solar E...,"{'popularity': 'high', 'month': 'April', 'budg...",Sarajevo has high popularity and medium budget...,"[Sarajevo, Tallinn, Vilnius, Belgrade, Bratisl...",Medium budget European city break in April wit...,Budget-friendly European city break in April w...,"European cities with a grand, futuristic feel ..."
197,c_p_10_pop_low_hard,p_10,"An atheist, philosophy lecturer who encourages...","{'popularity': 'low', 'interests': 'Outdoors &...",Malatya has low popularity and high budget. Ma...,"[Malatya, Ioannina]",High-budget European city trip in February wit...,European city break in February. Low season & ...,Where can I find European cities rich in histo...
198,c_p_11_pop_medium_sustainable,p_11,A young apprentice fascinated by the technolog...,"{'popularity': 'medium', 'budget': 'low', 'mon...","Chelyabinsk has medium popularity , low budget...","[Chelyabinsk, Kirov, Podgorica, Rijeka, Sibiu,...","Suggest a low-budget, walkable European city b...","Budget-friendly European city break in April, ...",Which European cities offer glimpses into the ...


In [9]:
gemini_df[gemini_df['config_id'].str.contains("pop_medium")]

Unnamed: 0,config_id,persona_id,persona,filters,context,city,query_v,query_p0,query_p1
1,c_p_1_pop_medium_medium,p_1,A former DJ at WSUM who is now working as a mu...,"{'popularity': 'medium', 'budget': 'medium', '...",Coimbra has medium popularity and medium budge...,"[Coimbra, Brno, Braga]",Medium budget European city break with parks a...,"European city break, medium budget, good parks...","Best European cities for live music, especiall..."
4,c_p_4_pop_medium_easy,p_4,A biology major conducting research on equine ...,"{'popularity': 'medium', 'budget': 'high'}",Aalborg has medium popularity and high budget....,"[Aalborg, Astrakhan, Bari, Bremen, Cheboksary,...",Suggest some moderately popular European citie...,High-budget European city with equestrian attr...,European cities with renowned veterinary or ag...
7,c_p_7_pop_medium_sustainable,p_7,A dog owner who wants the best medical treatme...,"{'popularity': 'medium', 'interests': 'Nightli...","Tampere has medium popularity , high budget, a...","[Tampere, Stavanger, Santander, Rouen, Pamplon...",High budget European city with good air qualit...,"European city break, high budget, excellent ai...",European cities with top veterinary clinics an...
10,c_p_10_pop_medium_hard,p_10,"An atheist, philosophy lecturer who encourages...","{'popularity': 'medium', 'budget': 'medium', '...",Brno has medium popularity and medium budget. ...,[Brno],Recommend a moderately priced Central European...,European city break in November. Mid-range bu...,Where can I find European cities rich in histo...
13,c_p_13_pop_medium_medium,p_13,A travel blogger who provides insider tips on ...,"{'popularity': 'medium', 'interests': 'Food', ...",Zaragoza has medium popularity. Zaragoza has l...,"[Zaragoza, Turku, Tampere, Szczecin, Stavanger...",Looking for a medium-popularity European city ...,Best European cities for digital nomads with g...,Best European cities for digital nomads with e...
...,...,...,...,...,...,...,...,...,...
186,c_p_199_pop_medium_sustainable,p_199,An individual wrongfully denied the right to v...,"{'popularity': 'medium', 'month': 'April', 'bu...","Rijeka has medium popularity , low budget, and...","[Rijeka, Szczecin, Bursa, Lviv, Plovdiv]","Looking for a less-crowded, budget-friendly ci...",Where can I find an affordable off-season Euro...,"Where can I find accessible, inclusive Europea..."
189,c_p_2_pop_medium_hard,p_2,A fellow agent-turned-author who shares the sa...,"{'popularity': 'medium', 'month': 'July', 'bud...",Turku has medium popularity and high budget. T...,"[Turku, Tampere, Stavanger, Santander, Rouen, ...",High budget European city break in July with i...,European city trip in July with interesting mu...,Where can I find inspiring European cities wit...
192,c_p_5_pop_medium_medium,p_5,an average Ukrainian citizen who is indifferen...,"{'popularity': 'medium', 'interests': 'Nightli...",Brno has medium popularity and medium budget. ...,[Brno],Recommend a moderately priced Eastern European...,Recommend an affordable Central European city ...,Affordable and safe European cities with good ...
195,c_p_8_pop_medium_easy,p_8,A junior Flash developer seeking guidance on c...,"{'popularity': 'medium', 'interests': 'Outdoor...",Zaragoza has medium popularity. In Zaragoza yo...,"[Zaragoza, Varna, Turku, Tampere, Szczecin, St...",Good places to swim outdoors in Europe\n,Good places for a Flash developer to visit in ...,Best European cities for tech meetups & cowork...


In [10]:
llama_df[llama_df['config_id'].str.contains("pop_medium")]

Unnamed: 0,config_id,persona_id,persona,filters,context,city,query_v,query_p0,query_p1
1,c_p_1_pop_medium_medium,p_1,A former DJ at WSUM who is now working as a mu...,"{'popularity': 'medium', 'budget': 'medium', '...",Coimbra has medium popularity and medium budge...,"[Coimbra, Brno, Braga]",'medium budget European city breaks with parks...,"""Medium budget European cities with parks and ...",Which European cities offer a mix of music fes...
4,c_p_4_pop_medium_easy,p_4,A biology major conducting research on equine ...,"{'popularity': 'medium', 'budget': 'high'}",Aalborg has medium popularity and high budget....,"[Aalborg, Astrakhan, Bari, Bremen, Cheboksary,...","""European cities for a luxurious trip.""","""European cities with horse riding trails and ...",Which European cities have horse riding school...
7,c_p_7_pop_medium_sustainable,p_7,A dog owner who wants the best medical treatme...,"{'popularity': 'medium', 'interests': 'Nightli...","Tampere has medium popularity , high budget, a...","[Tampere, Stavanger, Santander, Rouen, Pamplon...","""European cities with great air quality, high-...","""European cities with high-quality veterinary ...",Which European cities have excellent veterinar...
10,c_p_10_pop_medium_hard,p_10,"An atheist, philosophy lecturer who encourages...","{'popularity': 'medium', 'budget': 'medium', '...",Brno has medium popularity and medium budget. ...,[Brno],'medium budget European city breaks in Novembe...,"""European city breaks for a philosophy lecture...","Which European cities offer coworking spaces, ..."
13,c_p_13_pop_medium_medium,p_13,A travel blogger who provides insider tips on ...,"{'popularity': 'medium', 'interests': 'Food', ...",Zaragoza has medium popularity. Zaragoza has l...,"[Zaragoza, Turku, Tampere, Szczecin, Stavanger...","""European cities with traditional cuisine and ...","""European cities with good food and Wi-Fi opti...",Which European cities offer strong 5G connecti...
...,...,...,...,...,...,...,...,...,...
186,c_p_199_pop_medium_sustainable,p_199,An individual wrongfully denied the right to v...,"{'popularity': 'medium', 'month': 'April', 'bu...","Rijeka has medium popularity , low budget, and...","[Rijeka, Szczecin, Bursa, Lviv, Plovdiv]","""European cities with low budget and great air...","""Affordable European cities to visit in April ...",European cities with landmarks and museums sho...
189,c_p_2_pop_medium_hard,p_2,A fellow agent-turned-author who shares the sa...,"{'popularity': 'medium', 'month': 'July', 'bud...",Turku has medium popularity and high budget. T...,"[Turku, Tampere, Stavanger, Santander, Rouen, ...","""European cities to visit in July with high-en...","""European cities with high-end nightlife and c...",Which European cities offer a mix of historica...
192,c_p_5_pop_medium_medium,p_5,an average Ukrainian citizen who is indifferen...,"{'popularity': 'medium', 'interests': 'Nightli...",Brno has medium popularity and medium budget. ...,[Brno],'medium budget city break with museums and coc...,"""Mid-budget European city breaks with museums,...",Which European cities offer a mix of historica...
195,c_p_8_pop_medium_easy,p_8,A junior Flash developer seeking guidance on c...,"{'popularity': 'medium', 'interests': 'Outdoor...",Zaragoza has medium popularity. In Zaragoza yo...,"[Zaragoza, Varna, Turku, Tampere, Szczecin, St...","""European cities with medium popularity for ou...",A junior Flash developer seeking guidance on c...,Which European cities offer a mix of cultural ...


In [11]:
False in (llama_df['config_id'] == gemini_df['config_id']).tolist()

False