# Create the truncated single input agenda for 67 Meetings

In [5]:
import os 
from src.utils import utils
import pandas as pd 
from langchain_core.tracers.context import tracing_v2_enabled
import json

In [6]:
from src.utils.generate import Generation 
from src.utils import utils
from src.utils.llm_models import get_llm_model
import os
from dotenv import load_dotenv
load_dotenv()

file_config = utils.load_config("src/config/file_config.yml")
model_config = utils.load_config(file_config["llm_env"]["model_config_file"])
prompt_config = utils.load_config(file_config["llm_env"]["prompting_file"])
generate = Generation(prompt_config)

config = {
    'openai_api_key': os.getenv("OPENAI_API_KEY"),
    'llm_choice': 'OpenAI', # Ollama
    'model_choice': 'gpt-4o-mini', # gpt-4o
    'parameters': {
        'temperature': 0.2,
        'top_p': 0.95,
        'max_retries': 2,
    },
}
llm = get_llm_model(
    chatmodel=config['llm_choice'], 
    model_name=config['model_choice'], 
    param=config['parameters'], 
)

In [8]:
df_shared_docs = utils.load_data_with_shared_doc_path()
for file in df_shared_docs['file'].values:
    print(file)
    category, description = utils.extract_category(file)
    sample_jsondict = utils.extract_data_from_file(file)
    shared_docs = sample_jsondict['shared-doc']
    # Truncate shared_docs to ensure num_tokens < 90000
    truncate_shared_docs = utils.truncate_shared_docs(shared_docs, max_tokens=90000)
    # Generate the template
    with tracing_v2_enabled(project_name="generate_category_truncated_multi_input_agenda"):
        agenda = generate.generate_category_truncated_multi_input_agenda(
            llm=llm, 
            shared_docs=truncate_shared_docs,
            category=category,
            description=description,    
        )
    data = {
        'truncate_shared_docs': truncate_shared_docs,
        'category': category,
        'description': description,
        'agenda': agenda['text'], 
    }
    with open(f'/datadrive/CuongHV/project/DATA/mm_agenda_generation_research_output/generate_category_truncated_multi_input_agenda/{file}', 'a', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
    print(f"Finish {file}")


ES2004d.json
Finish ES2004d.json
ES2008b.json
Finish ES2008b.json
IS1004d.json
Finish IS1004d.json
IS1008a.json
Finish IS1008a.json
IS1004a.json
Finish IS1004a.json
IS1005b.json
Finish IS1005b.json
ES2008c.json
Finish ES2008c.json
ES2008d.json
Finish ES2008d.json
IS1006d.json
Finish IS1006d.json
IS1006c.json
Finish IS1006c.json
IS1005a.json
Finish IS1005a.json
ES2006d.json
Finish ES2006d.json
ES2004b.json
Finish ES2004b.json
ES2009d.json
Finish ES2009d.json
IS1005c.json
Finish IS1005c.json
ES2005a.json
Finish ES2005a.json
ES2006b.json
Finish ES2006b.json
IS1001c.json
Finish IS1001c.json
ES2003a.json
Finish ES2003a.json
IS1001d.json
Finish IS1001d.json
IS1001a.json
Finish IS1001a.json
ES2002b.json
Finish ES2002b.json
ES2004c.json
Finish ES2004c.json
ES2002a.json
Finish ES2002a.json
ES2003c.json
Finish ES2003c.json
ES2008a.json
Finish ES2008a.json
ES2006c.json
Finish ES2006c.json
ES2007d.json
Finish ES2007d.json
IS1009d.json
Finish IS1009d.json
IS1009b.json
Finish IS1009b.json
ES2005d.js