In [9]:
import os
import glob

In [10]:
def run_mcp_client(query: str):
    os.system(f"python main.py ../server/mcp_server.py {query}")

In [11]:
def run_query(query: str, id: str = None):    
    ## move logs and result files to new directory
    run_mcp_client(query)    
    os.makedirs(f"logs/{id}/", exist_ok=True)
    glob_results = glob.glob("r_result_*")
    for file in glob_results:
        os.rename(file, f"logs/{id}/{file}")
    if os.path.exists("logs.txt"):
        os.rename("logs.txt", f"logs/{id}/logs.txt")

In [12]:
question_files = glob.glob("../questions/*.txt")
question_files[:5]

['../questions\\anthropic_onehop.txt',
 '../questions\\anthropic_twothreehop.txt',
 '../questions\\chatgpt.txt',
 '../questions\\chatgpt_obs.txt']

In [13]:
collated_question_df_list = []
for question_file in question_files:
    with open(question_file, "r") as f:
        lines = f.readlines()
    for idx, line in enumerate(lines):
        collated_question_df_list.append({
            "question": f"\"{line.strip()}\"",
            "id": f"{os.path.basename(question_file).split('.')[0]}_{idx}"
        })
collated_question_df_list[:5]

[{'question': '"What are the current Premier League standings for matchday 30 in the 2024-2025 season?"',
  'id': 'anthropic_onehop_0'},
 {'question': '"Show me the list of players with expiring contracts in Spain for 2025."',
  'id': 'anthropic_onehop_1'},
 {'question': '"Who are all the injured players in the German Bundesliga right now?"',
  'id': 'anthropic_onehop_2'},
 {'question': '"Can you get me the transfer history of Erling Haaland?"',
  'id': 'anthropic_onehop_3'},
 {'question': '"What are the current market values of players in the Italian Serie A for the 2024-2025 season?"',
  'id': 'anthropic_onehop_4'}]

In [14]:
import pandas as pd
collated_question_df = pd.DataFrame(collated_question_df_list)
collated_question_df.head(5)

Unnamed: 0,question,id
0,"""What are the current Premier League standings...",anthropic_onehop_0
1,"""Show me the list of players with expiring con...",anthropic_onehop_1
2,"""Who are all the injured players in the German...",anthropic_onehop_2
3,"""Can you get me the transfer history of Erling...",anthropic_onehop_3
4,"""What are the current market values of players...",anthropic_onehop_4


In [15]:
collated_question_df.to_csv("collated_questions.tsv", sep = '\t', index=False)

In [16]:
first_row = collated_question_df.iloc[0]
run_query(first_row["question"], first_row["id"])

In [17]:
for idx, row in collated_question_df.iterrows():
    if idx == 0:
        continue
    try:
        run_query(row["question"], row["id"])
    except Exception as e:
        print(f"Error processing question {idx}: {e}")
        ## clean up the logs and result files
        glob_results = glob.glob("r_result_*")
        for file in glob_results:
            os.remove(file)
        if os.path.exists("logs.txt"):
            os.remove("logs.txt")
        continue