In [1]:
import os 
import json 
import random
from src.utils.generate import Generation
from src.utils import utils

In [2]:
def load_1_shared_docs(root: str = '/datadrive/CuongHV/project/DATA/AMI_MS_Cleaned'):
    """
    Load shared documents from the 1st file.
    """
    files = os.listdir(root)
    file = random.choice(files)
    with open(f'{root}/{file}', 'r') as f:
        data = json.load(f)
    return data['shared-doc']

# Generate 1 truncated multi input 

In [3]:
# load 1 truncated shared_docs
shared_docs =load_1_shared_docs()
truncated_shared_docs = utils.truncate_shared_docs(shared_docs, max_tokens=90000)

In [4]:
from src.utils.generate import Generation 
from src.utils import utils
file_config = utils.load_config("src/config/file_config.yml")
model_config = utils.load_config(file_config["llm_env"]["model_config_file"])
prompt_config = utils.load_config(file_config["llm_env"]["prompting_file"])
generate = Generation(prompt_config)

In [5]:
from src.utils.llm_models import get_llm_model
import os
from dotenv import load_dotenv
load_dotenv()
config = {
    'openai_api_key': os.getenv("OPENAI_API_KEY"),
    'llm_choice': 'OpenAI', # Ollama
    'model_choice': 'gpt-4o-mini', # gpt-4o
    'parameters': {
        'temperature': 0.2,
        'top_p': 0.95,
        'max_retries': 2,
    },
}
llm = get_llm_model(
    chatmodel=config['llm_choice'], 
    model_name=config['model_choice'], 
    param=config['parameters'], 
)

In [6]:
from langchain_core.tracers.context import tracing_v2_enabled

with tracing_v2_enabled(project_name="generate_truncated_multi_input"):
    agenda = generate.generate_truncated_multi_input_agenda(llm=llm, shared_docs=truncated_shared_docs)
agenda 

{'text': '```\n**Meeting Agenda: Remote Control Design Project Update**  \n\n1. **Introductions and Meeting Overview (5 minutes)**  \n  + **Meeting Objectives (2 minutes)**  \n    + Confirm the purpose of the meeting: to discuss design updates and decisions on the remote control project.  \n    + Outline focus areas: design components, user interface, and marketing strategies.  \n  + **Team Introductions (3 minutes)**  \n    + Brief introductions of team members and their roles in the project.  \n    + Overview of the project’s current status.\n\n2. **Design Presentations (10 minutes)**  \n  + **Components Design Update (5 minutes)**  \n    + Presentation by Christine on the conceptual specification of components.  \n    + Discussion on materials and findings from past evaluations.  \n  + **User Interface Design Update (5 minutes)**  \n    + Presentation by Agnes on the user interface functionalities and design considerations.  \n    + Review of feedback on the first draft of the user 

In [7]:
print(agenda['text'])

```
**Meeting Agenda: Remote Control Design Project Update**  

1. **Introductions and Meeting Overview (5 minutes)**  
  + **Meeting Objectives (2 minutes)**  
    + Confirm the purpose of the meeting: to discuss design updates and decisions on the remote control project.  
    + Outline focus areas: design components, user interface, and marketing strategies.  
  + **Team Introductions (3 minutes)**  
    + Brief introductions of team members and their roles in the project.  
    + Overview of the project’s current status.

2. **Design Presentations (10 minutes)**  
  + **Components Design Update (5 minutes)**  
    + Presentation by Christine on the conceptual specification of components.  
    + Discussion on materials and findings from past evaluations.  
  + **User Interface Design Update (5 minutes)**  
    + Presentation by Agnes on the user interface functionalities and design considerations.  
    + Review of feedback on the first draft of the user interface.

3. **Discussion

# Create the truncated multi input agenda for 67 Meetings

In [8]:
import os 
from src.utils import utils
import pandas as pd 

In [9]:
def load_data_path(path: str = "EDA/token_data.csv") -> pd.DataFrame:
    """
    Load the data from the path
    """
    df = pd.read_csv(path)
    df_shared_docs = df[df["num_tokens_shared_doc"] > 0]
    return df_shared_docs
def extract_data_from_file(file: str, root: str = '/datadrive/CuongHV/project/DATA/AMI_MS_Cleaned') -> pd.DataFrame:
    """
    Extract the data from the file
    """
    path = f'{root}/{file}'
    with open(path, encoding='utf-8') as f:
        jsondict = json.load(f)
    return jsondict

In [10]:
from src.utils.generate import Generation 
from src.utils import utils
from src.utils.llm_models import get_llm_model
import os
from dotenv import load_dotenv
load_dotenv()

file_config = utils.load_config("src/config/file_config.yml")
model_config = utils.load_config(file_config["llm_env"]["model_config_file"])
prompt_config = utils.load_config(file_config["llm_env"]["prompting_file"])
generate = Generation(prompt_config)

config = {
    'openai_api_key': os.getenv("OPENAI_API_KEY"),
    'llm_choice': 'OpenAI', # Ollama
    'model_choice': 'gpt-4o-mini', # gpt-4o
    'parameters': {
        'temperature': 0.2,
        'top_p': 0.95,
        'max_retries': 2,
    },
}
llm = get_llm_model(
    chatmodel=config['llm_choice'], 
    model_name=config['model_choice'], 
    param=config['parameters'], 
)

In [11]:
df_shared_docs = load_data_path()
for file in df_shared_docs['file'].values:
    print(file)
    sample_jsondict = extract_data_from_file(file)
    shared_docs = sample_jsondict['shared-doc']
    # Truncate shared_docs to ensure num_tokens < 90000
    truncate_shared_docs = utils.truncate_shared_docs(shared_docs, max_tokens=90000)
    # Generate the template
    with tracing_v2_enabled(project_name="generate_truncated_multi_input"):
        agenda = generate.generate_truncated_multi_input_agenda(llm=llm, shared_docs=truncate_shared_docs)
    data = {
        'truncate_shared_docs': truncate_shared_docs,
        'agenda': agenda['text'], 
    }
    with open(f'/datadrive/CuongHV/project/DATA/mm_agenda_generation_research_output/truncated_multi_input_agenda/{file}', 'a', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
    print(f"Finish {file}")


ES2004d.json
Finish ES2004d.json
ES2008b.json
Finish ES2008b.json
IS1004d.json
Finish IS1004d.json
IS1008a.json
Finish IS1008a.json
IS1004a.json
Finish IS1004a.json
IS1005b.json
Finish IS1005b.json
ES2008c.json
Finish ES2008c.json
ES2008d.json
Finish ES2008d.json
IS1006d.json
Finish IS1006d.json
IS1006c.json
Finish IS1006c.json
IS1005a.json
Finish IS1005a.json
ES2006d.json
Finish ES2006d.json
ES2004b.json
Finish ES2004b.json
ES2009d.json
Finish ES2009d.json
IS1005c.json
Finish IS1005c.json
ES2005a.json
Finish ES2005a.json
ES2006b.json
Finish ES2006b.json
IS1001c.json
Finish IS1001c.json
ES2003a.json
Finish ES2003a.json
IS1001d.json
Finish IS1001d.json
IS1001a.json
Finish IS1001a.json
ES2002b.json
Finish ES2002b.json
ES2004c.json
Finish ES2004c.json
ES2002a.json
Finish ES2002a.json
ES2003c.json
Finish ES2003c.json
ES2008a.json
Finish ES2008a.json
ES2006c.json
Finish ES2006c.json
ES2007d.json
Finish ES2007d.json
IS1009d.json
Finish IS1009d.json
IS1009b.json
Finish IS1009b.json
ES2005d.js

# Compare with Recap Agenda

In [31]:
recap_root = '/datadrive/CuongHV/project/DATA/mm_agenda_generation_research_output/recap_agenda_title' 
truncatied_s_root = '/datadrive/CuongHV/project/DATA/mm_agenda_generation_research_output/truncated_single_input_agenda'
file = 'ES2002a.json'
def load_agenda(file, root):
    """
    Load the agenda from the root
    """
    path = f'{root}/{file}'
    with open(path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return data['agenda']

In [32]:
recap_agenda  = load_agenda(file, recap_root)
print(recap_agenda)

**Meeting Agenda: Remote Control Design Project Kick-off**  
1. **Introductions and Icebreaker (5 minutes)**  
  + **Icebreaker Activity** (2 minutes)  
    + Participants draw their favorite animal and describe its key characteristics.  
  + **Overview of Participants** (3 minutes)  
    + Brief introductions of team members and their roles in the project.  

2. **Project Overview (10 minutes)**  
  + **Project Goals Presentation** (5 minutes)  
    + Outline of project objectives: design a trendy, original, and user-friendly remote control.  
    + Financial guidelines: Selling price set at €25, with a production cost cap of €12.50.  
  + **Discussion on Market Considerations** (5 minutes)  
    + Explore international market implications and regional pricing strategies.  

3. **Feature Brainstorming (10 minutes)**  
  + **Core Functionalities Discussion** (5 minutes)  
    + Identify essential features: multi-device compatibility, user-friendly interface, and additional functionalit

In [24]:
truncatied_s_agenda = load_agenda(file, truncatied_s_root)
print(truncatied_s_agenda)

**Meeting Agenda: Mando Remote Control Project Update**  

1. **Opening and Introductions (5 minutes)**  
   + **Welcome and Overview** (2 minutes)  
     + Brief introduction of the project and its objectives.  
     + Overview of the meeting agenda.  
   + **Participant Introductions** (3 minutes)  
     + Each participant shares their role in the project.  

2. **Project Status Updates (10 minutes)**  
   + **Functional Design Review** (5 minutes)  
     + Summary of decisions made regarding the remote control's functionality.  
     + Discussion on the exclusion of speech recognition and LCD features.  
   + **Conceptual Design Insights** (5 minutes)  
     + Presentation of the banana design concept and its implications.  
     + Review of user interface options: scroll wheels and buttons.  

3. **Cost Evaluation and Budget Discussion (10 minutes)**  
   + **Production Cost Analysis** (5 minutes)  
     + Presentation of the current cost analysis and budget constraints.  
     + D