### UTILS

In [1]:
import base64
import os
import json
from datetime import datetime
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, AIMessage
import requests
import re
import json
from typing import Dict, Any

def load_markdown_to_str(file_path):
	with open(file_path, 'r', encoding='utf-8') as md_file:
		markdown_content = md_file.read()
	return markdown_content

def load_latest_sprint_status(base_path):
    """
    Find the latest sprint directory and load the project-sprint-status.md file.
    
    Args:
    base_path (str): Path to the directory containing sprint folders.
    
    Returns:
    str: Content of the project-sprint-status.md file from the latest sprint.
    """
    try:
        # List all directories in the base path
        directories = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]
        
        # Filter and sort sprint directories
        sprint_dirs = sorted([d for d in directories if d.startswith('sprint') and d[6:].isdigit()],
                             key=lambda x: int(x[6:]),
                             reverse=True)
        
        if not sprint_dirs:
            return "No sprint directories found."
        
        # Get the latest sprint directory
        latest_sprint = sprint_dirs[0]
        sprint_path = os.path.join(base_path, latest_sprint)
        
        # Look for project-sprint-status.md in the latest sprint directory
        status_file = os.path.join(sprint_path, 'project-sprint-status.md')
        
        if os.path.exists(status_file):
            with open(status_file, 'r', encoding='utf-8') as f:
                return f.read()
        else:
            return f"project-sprint-status.md not found in {latest_sprint}."
    
    except Exception as e:
        return f"An error occurred: {str(e)}"
    
    
def load_latest_sprint_backlog(base_path):
    """
    Find the latest sprint directory and load the project-sprint-backlog.json file.
    
    Args:
    base_path (str): Path to the directory containing sprint folders.
    
    Returns:
    dict: Content of the project-sprint-backlog.json file from the latest sprint.
    """
    try:
        # List all directories in the base path
        directories = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]
        
        # Filter and sort sprint directories
        sprint_dirs = sorted([d for d in directories if d.startswith('sprint') and d[6:].isdigit()],
                             key=lambda x: int(x[6:]),
                             reverse=True)
        
        if not sprint_dirs:
            return {"error": "No sprint directories found."}
        
        # Get the latest sprint directory
        latest_sprint = sprint_dirs[0]
        sprint_path = os.path.join(base_path, latest_sprint)
        
        # Look for project-sprint-backlog.json in the latest sprint directory
        backlog_file = os.path.join(sprint_path, 'project-sprint-backlog.json')
        
        if os.path.exists(backlog_file):
            with open(backlog_file, 'r', encoding='utf-8') as f:
                return json.load(f)
        else:
            return {"error": f"project-sprint-backlog.json not found in {latest_sprint}."}
    
    except Exception as e:
        return {"error": f"An error occurred: {str(e)}"}
    
def export_transcript(state, folder_path):
    # Create the folder if it doesn't exist
    files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    id = len(files) + 1
    transcript = state["transcript"]
    
    filename = state["meeting_type"].replace(" ","_") + str(id) + ".txt"
    
    os.makedirs(os.path.join(folder_path), exist_ok=True)
    
    
    # Construct the full file path
    file_path = os.path.join(folder_path, filename)
    
    # Write the string to a text file
    with open(file_path, 'w', encoding='utf-8') as f:
        f.write(transcript)

def export_state(state, folder_path, filename):
    # Create the folder if it doesn't exist
    files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    id = len(files) + 1
    
    filename = filename+str(id)+".json"
    
    os.makedirs(folder_path, exist_ok=True)
    
    # Construct the full file path
    file_path = os.path.join(folder_path, filename)
    
    # Write the state dict to a JSON file
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(state, f, indent=4)

def load_txt_to_str(file_path):
    with open(file_path, 'r', encoding='utf-8') as txt_file:
        text_content = txt_file.read()
    return text_content

def load_from_json(file_path):
    """
    Load data from a JSON file.
    
    Args:
    file_path (str): Path to the JSON file.
    
    Returns:
    dict: A dictionary containing the loaded JSON data.
    """
    try:
        with open(file_path, 'r') as file:
            return json.load(file)
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return {}
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in file {file_path}")
        return {}
    
def render_mermaid_diagram(diagram_code: str) -> str:
    # Encode the Mermaid code
    encoded_diagram = base64.b64encode(diagram_code.encode('utf-8')).decode('utf-8')
    
    # Make a request to the Mermaid rendering service
    url = f"https://mermaid.ink/img/{encoded_diagram}"
    response = requests.get(url)
    
    if response.status_code == 200:
        # Return the URL of the rendered image
        return url
    else:
        # If rendering failed, return the original Mermaid code
        return f"```mermaid\n{diagram_code}\n```"
    
def format_mermaid(input_string):
    # Step 1: Remove redundant "```mermaid" at the start and end
    cleaned_string = input_string.replace('```mermaid', '').replace('```', '')
    
    # Step 2: Replace escaped newlines with actual newlines
    formatted_string = cleaned_string.replace(r'\n', '\n')
    
    # Step 3: Strip any leading/trailing whitespace
    formatted_string = formatted_string.strip()

    return formatted_string	


def export_meeting_history(state, output_file='meeting_history.json'):
    """
    Export the meeting history to a JSON file.
    
    Args:
    state (dict): The state dictionary containing the meeting history.
    output_file (str): The name of the output file. Defaults to 'meeting_history.json'.
    
    Returns:
    None
    """
    meeting_history = state.get("meeting_history", [])
    
    # Ensure meeting_history is a list
    if not isinstance(meeting_history, list):
        meeting_history = [meeting_history]
    
    try:
        with open(output_file, 'w') as file:
            json.dump(meeting_history, file, indent=2)
        print(f"Meeting history exported successfully to {output_file}")
    except IOError:
        print(f"Error: Unable to write to file {output_file}")

    return state  # Return the state to maintain consistency with your workflow


def manage_sprint_folders(state, project_folder):
    """
    Manages sprint folders based on the meeting type.
    Creates a new sprint folder if necessary and adds required files.
    
    :param state: The current state dictionary
    :param project_folder: Path to the project folder
    :return: Updated state with new sprint information
    """
    if "planning" in state.get("meeting_type", "").lower() or "plan" in state.get("meeting_type", "").lower():
        # List all directories in the project folder
        directories = [d for d in os.listdir(project_folder) if os.path.isdir(os.path.join(project_folder, d))]
        
        # Filter and find the highest sprint number
        sprint_numbers = [int(re.findall(r'\d+', d)[0]) for d in directories if d.startswith("sprint") and re.findall(r'\d+', d)]
        
        if sprint_numbers:
            new_sprint_number = max(sprint_numbers) + 1
        else:
            new_sprint_number = 1
        
        # Create new sprint folder
        new_sprint_folder = os.path.join(project_folder, f"sprint{new_sprint_number}")
        os.makedirs(new_sprint_folder, exist_ok=True)
        
        # Create project_sprint_status.md
        status_file_path = os.path.join(new_sprint_folder, "project_sprint_status.md")
        with open(status_file_path, 'w') as status_file:
            status_file.write(f"# Sprint {new_sprint_number} Status\n\nStatus details will be updated here.")
        
        # Create project_sprint_backlog.json
        backlog_file_path = os.path.join(new_sprint_folder, "project_sprint_backlog.json")
        initial_backlog = {}
        with open(backlog_file_path, 'w') as backlog_file:
            json.dump(initial_backlog, backlog_file, indent=2)
        
        # Update state with new sprint information
        state["current_sprint_number"] = new_sprint_number
        state["current_sprint_folder"] = new_sprint_folder
        state["sprint_status_file"] = status_file_path
        state["sprint_backlog_file"] = backlog_file_path
        
        print(f"Created new sprint folder: {new_sprint_folder}")
    else:
        print("Meeting type does not indicate a planning session. No new sprint folder created.")
    
    return state


def load_json(file_path: str) -> Dict[str, Any]:
    """
    Reads a JSON file and returns its contents as a dictionary.

    :param file_path: The path to the JSON file to be read
    :return: A dictionary containing the data from the JSON file
    :raises FileNotFoundError: If the specified file does not exist
    :raises json.JSONDecodeError: If the file is not valid JSON
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)
        print(f"Successfully loaded JSON from {file_path}")
        return data
    except FileNotFoundError:
        print(f"Error: The file {file_path} was not found.")
        raise
    except json.JSONDecodeError as e:
        print(f"Error: The file {file_path} is not valid JSON. Error: {str(e)}")
        raise
    except Exception as e:
        print(f"An unexpected error occurred while reading {file_path}: {str(e)}")
        raise


def get_latest_sprint_folder(project_folder: str) -> str:
    """
    Scans the project folder for sprint folders and returns the name of the latest sprint folder.

    :param project_folder: Path to the project folder (e.g., 'project1/')
    :return: Name of the latest sprint folder (e.g., 'sprint5'), or None if no sprint folders are found
    """
    # List all items in the project folder
    items = os.listdir(project_folder)

    # Filter for sprint folders and extract their numbers
    sprint_folders = []
    for item in items:
        if os.path.isdir(os.path.join(project_folder, item)):
            match = re.match(r'sprint(\d+)', item, re.IGNORECASE)
            if match:
                sprint_number = int(match.group(1))
                sprint_folders.append((item, sprint_number))

    # Sort sprint folders by number (descending) and return the latest
    if sprint_folders:
        latest_sprint = max(sprint_folders, key=lambda x: x[1])
        print(f"Latest sprint folder found: {latest_sprint[0]}")
        return latest_sprint[0]
    else:
        print("No sprint folders found.")
        return None

### Prompts

In [2]:
MEETING_PURPOSE_GENERATOR_PROMPT ="""
You are the Scrum Master in a company. Your work is passed to pipeline where the goal is to generate realistic meeting transcripts by simulating a small Tech company.
Your resbonsibility is to make a brief description about the next meeting.
Here you will find information about the company where you are giving advice as a Scrum Master: \n #### \n {company_data} \n #### \n
Here you will find information about the employees and their detailed profile: \n #### \n {employee_profiles} \n #### \n
The company currently working on this project: \n #### \n {project_general} \n #### \n
Here you will find the requirements that needs to be fufilled: \n #### \n {project_requirements} \n #### \n

Here you will find information about the project status overall: \n #### \n {project_state} \n #### \n
Here you will find information about the project backlog: \n #### \n {project_backlog} \n #### \n

To give you the context to decide what the next meeting should be. 
The company organizes the information into project and if there is a sprint going on you find information about it in the sprint state with a sprint backlog.

If there is a sprint in progress you will find information about it here: \n #### \n {project_sprint_state} \n #### \n
If there is a sprint you will find information about the sprint backlog here: \n #### \n {project_backlog} \n #### \n


Here you will find the past meetings that happened. \n #### \n {meeting_history} \n #### \n


As you are an experineced srum master your task is to help moving the project toward. To achive this you need to decide what should the team discuss in their next meeting.

Generate a brief description of the purpose for a meeting. 
Based on the company and project information and the current project state.
Since the company uses agile methodology the meeting type could be 
[Sprint planning meeting, 
Daily Scrum meeting, 
Backlog Refinement, 
Sprint review meeting, 
Sprint retrospective meeting,
Technical Debt Meetings, 
Design or Architecture Sessions]
Refer back to project state and to meeting history to see where the project is standing right now.
You have the make the description so the project can move foward.
If there is any techical condiseration that needs to be address then contain that in your description.
Set the name and the date with start to end when the meeting take place.
"""

MEETING_TPYE_SELECTOR ="""
You are the Meeting Type Selector in a pipeline for generating realistic meeting transcripts.
Your responsibility is to determine the most appropriate type of meeting based on the given input AND the time of the meeting.
You will be provided with a brief description of the meeting's purpose.

Description:
####
{meeting_purpose}
####
Choose from the following meeting types:
- Sprint planning meeting: Technical considerations are discussed when planning tasks for the upcoming sprint, including potential challenges and solutions.
- Daily Scrum meeting: While this is primarily for quick updates, team members can briefly mention technical challenges they're facing. More in-depth discussions are typically taken offline.
- Backlog Refinement: The team discusses and clarifies user stories, which often involves addressing technical aspects and potential solutions.
- Sprint review meeting: The team demonstrates completed work, which can lead to technical discussions about implementation details.
- Sprint retrospective meeting: Team members can bring up technical issues that affected the sprint and discuss ways to improve.
- Technical Debt Meetings: Some teams hold separate meetings to address technical debt and architectural concerns.
- Design or Architecture Sessions: These are ad-hoc meetings focused on solving specific technical problems or planning system architecture.
Respond only with the selected meeting type.
"""


TOPIC_OUTLINER_PROMPT = """
You are the Topic Outliner in a meeting transcript generation pipeline. Your task is to create a structured flow or outline of the meeting. 
Take into consideration that the meeting will be a {meeting_type}.
The purpose of the meeting is the following: \n####\n {meeting_purpose} \n####\n

You can find information about the company you are giving advice as a Scrum Master: \n####\n {company_data} \n####\n
You can find information about the employees and their detailed profile: \n####\n {employee_profiles} \n####\n 
You can find information about the current project that the team is working on this contains general information: \n####\n {project_general} \n####\n

To give you the context to decide what the next meeting should be about. 
The company organizes the information into project and if there is a sprint going on you find information about it in the sprint state with a sprint backlog.

Here you will find information about the project status overall: \n #### \n {project_state} \n #### \n
Here you will find information about the project backlog: \n #### \n {project_backlog} \n #### \n

If there is a sprint in progress you will find information about it here: \n #### \n {project_sprint_state} \n #### \n
If there is a sprint you will find information about the sprint backlog here: \n #### \n {project_backlog} \n #### \n


Provide the outline and the flow of the main topics and ideas or technical problems with a small description, that needs to be spoken about in the meeting.
"""


PARTICIPANT_DEFINER_PROMPT = """
You are the Participant Definer in a meeting transcript generation pipeline.
Your role is to determine the necessary participants for the meeting based on the meeting type and topic outline.

The meeting type: {meeting_type}
The meeting purpose: {meeting_purpose}
The meeting outline: {meeting_outline}

When deciding who needs to be there in the given meeting, take the employee profiles to consideration.

Here you can find the detailed description of the employees that can be participant:
####
{employee_profiles}
####
Provide a list the participants, including their names, roles, and key responsibilities relevant to the meeting topics.
"""


MEETING_LENGTH_ESTIMATOR = """
You are the Meeting Length Estimator in a meeting transcript generation pipeline.
Your job is to estimate an appropriate length for the meeting.
Take into consideration that the meeting will be a {meeting_type}.
The purpose of the meeting is the following: {meeting_purpose}.
This will be the outline of the meeting: {meeting_outline}

Note as the final output will be generated by a Large Language model it only can respand with 8192 token which is ≈ 5461 to 6301 words.
An average person speaks at a rate of about 125-150 words per minute in normal conversation. 
If we assume about 70 percent of meeting time involves active speaking, then the average pace: ~85-105 words per minute.
You can calculate the needed minutes: by total_length_in_words/average_pace
You can calculate the needed tokens with: total_length*average_pace*1.5
When you decide how long the meeting will be make sure if it will be able to fit the 8192 response output size. 
If not then response with: "MORE TURNS NEEDED"
"""

TRANSCRIPT_GENERATOR_PROMPT = """
You are the Conversation Generator, the final node in a meeting transcript generation pipeline.
Your task is to create a realistic meeting transcript based on all previous inputs.
You will receive the meeting type, topic outline, list of participants, and estimated meeting length.

Here are some the necessarily information, use these as a context. Each section will be separeted with four hashtag like ####. Use this as a delimiter. 
Information about the company: \n####\n {company_data} \n####\n
Information about the current project that the team is working on this contains general information about the project: \n####\n {project_general} \n####\n
Information about the employees and their detailed profile: \n####\n {employee_profiles} \n####\n

Here you will find information about the project status overall: \n #### \n {project_state} \n #### \n
Here you will find information about the project backlog: \n #### \n {project_backlog} \n #### \n

If there is a sprint in progress you will find information about it here: \n #### \n {project_sprint_state} \n #### \n
If there is a sprint you will find information about the sprint backlog here: \n #### \n {project_sprint_backlog} \n #### \n

Information about the past meetings that happened: \n####\n {meeting_history} \n####\n

Here are the necessarily information about the transcript. Use this to generate the final transcript. 
Meeting type: {meeting_type}\n
Meetinf purpose: \n{meeting_purpose}\n
Meeting outline: \n {meeting_outline}\n
Meeting participants - how actually takes part in the meeting: \n {meeting_participants} \n
Meeting estimated length: \n {meeting_length} \n


Generate a transcript that follows the topic outline, includes contributions from all participants according to their roles. 
Note: Only inculede those participants how have been listed as actual participants.
The transcript should be in a format where each speaker's name is in square brackets, followed by their dialogue. 
Ensure the conversation flows naturally and covers all outlined topics while maintaining realism and relevance to a software development project.
IMPORTANT: 
Be very verbose.
Only response with the transcript.
If you finished say "FINISHED". This is very important! It will cost you a lot if you dont say that! 
"""

UPDATE_MEETING_HISTORY_PROMPT = """
You are the Meeting History Updater in a meeting transcript generation pipeline.
Your task is to update the meeting history JSON file with the new meeting information.
Summarize the following meeting information into a concise, well-structured JSON format:

    Meeting Purpose: {meeting_purpose}
    Meeting Type: {meeting_type}
    Meeting Outline: {meeting_outline}
    Date: {meeting_type}

The JSON should include fields for 'date', 'type', 'summary' 'key_decisions'.
Limit the summary to 2-3 sentences and include up to 3 key decisions or action items.

IMPORTANT: Your response has to be a valid JSON object.
"""


### State

In [3]:
from typing import TypedDict
class DataGenerationState(TypedDict):
    company_data: str	# init
    project_general: str	# init
    project_requirements: str		# init + need to be updated - this could be get from project state
    project_state : str 
    project_sprint_state : str 
    project_backlog : str
    project_sprint_backlog : str
    employee_profiles: str	# init
    meeting_history: str	# init + need to be updated -this could be get from project state
    meeting_purpose: str	# [optional] generated by first node
    meeting_type: str		# generated by 2nd node
    meeting_outline: str		# generated by 3rd node
    meeting_participants: str		# 
    meeting_length: str
    transcript: str


In [4]:
from abc import ABC, abstractmethod

class Graph(ABC):
    @abstractmethod
    def create_graph(self):
        pass
    
    @abstractmethod
    def run_graph(self, project_folder):
        pass

### Node functions

In [5]:
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, AIMessage
from langchain_anthropic import ChatAnthropic
from  dotenv import load_dotenv
import os

load_dotenv()
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
#model = ChatAnthropic(model="claude-3-haiku-20240307", anthropic_api_key=anthropic_api_key)
model = ChatAnthropic(model="claude-3-5-sonnet-20240620", anthropic_api_key=anthropic_api_key, max_tokens= 8192)



def meeting_purpose_node(state: DataGenerationState) -> DataGenerationState:
    formatted_prompt = MEETING_PURPOSE_GENERATOR_PROMPT.format(
		company_data = state.get("company_data"),
		employee_profiles = state.get("employee_profiles"),
		project_general = state.get("project_general"),
		project_requirements = state.get("project_requirements"),
		project_state = state.get("project_state"),
		project_sprint_state = state.get("project_sprint_state"),
		project_backlog = state.get("project_backlog"),
		project_sprint_backlog = state.get("project_sprint_backlog"),
		meeting_history = state.get("meeting_history"),
    )
    messages = [
        SystemMessage(content=formatted_prompt),
        HumanMessage(content="Generate a meeting purpose based on the provided information.")
	]
    response = model.invoke(messages)
    state["meeting_purpose"] = response.content
    print(state["meeting_purpose"])
    return state

def meeting_type_node(state: DataGenerationState) -> DataGenerationState:
    formatted_prompt = MEETING_TPYE_SELECTOR.format(
		meeting_purpose = state.get("meeting_purpose")
	)
    messages = [
        SystemMessage(content=formatted_prompt),
        HumanMessage(content="Generate a meeting type based on the provided information.")
    ]
    response = model.invoke(messages)
    state["meeting_type"] = response.content
    print(state["meeting_type"])
    return state


def topic_outliner_node(state: DataGenerationState) -> DataGenerationState:
    formatted_prompt = TOPIC_OUTLINER_PROMPT.format(
        meeting_type = state.get("meeting_type"),
        meeting_purpose = state.get("meeting_purpose"), 
		company_data = state.get("company_data"),
		employee_profiles = state.get("employee_profiles"),
		project_general = state.get("project_general"),
		project_state = state.get("project_state"),
		project_sprint_state = state.get("project_sprint_state"),
        project_backlog = state.get("project_backlog"),
        project_sprint_backlog = state.get("project_sprint_backlog"),
	)
    messages = [
        SystemMessage(content=formatted_prompt),
        HumanMessage(content="Generate the meeting topics and outline based on the provided information.")
    ]
    response = model.invoke(messages)
    state["meeting_outline"] = response.content
    print(state["meeting_outline"])
    return state


def meeting_length_estimator_node(state: DataGenerationState) -> DataGenerationState:
    formatted_prompt = MEETING_LENGTH_ESTIMATOR.format(
        meeting_type = state.get("meeting_type"),
		meeting_purpose = state.get("meeting_purpose"),
		meeting_outline = state.get("meeting_outline"),
	)
    messages = [
        SystemMessage(content=formatted_prompt),
        HumanMessage(content="Generate the meeting length based on the provided information.")
    ]
    response = model.invoke(messages)
    state["meeting_length"] = response.content
    print(state["meeting_length"])
    return state


def participant_definer_node(state: DataGenerationState) -> DataGenerationState:
    formatted_prompt = PARTICIPANT_DEFINER_PROMPT.format(
        meeting_type = state.get("meeting_type"),
		meeting_purpose = state.get("meeting_purpose"),
		meeting_outline = state.get("meeting_outline"),
		employee_profiles = state.get("employee_profiles"),
	)
    messages = [
        SystemMessage(content=formatted_prompt),
        HumanMessage(content="List the participants who need to be present at the meeting based on the provided information.")
    ]
    response = model.invoke(messages)
    state["meeting_participants"] = response.content
    print(state["meeting_participants"])
    return state


def generate_transcript_node(state: DataGenerationState) -> DataGenerationState:
    formatted_prompt = TRANSCRIPT_GENERATOR_PROMPT.format(
		company_data = state.get("company_data"),
		project_general = state.get("project_general"),
		employee_profiles = state.get("employee_profiles"),
		project_state = state.get("project_state"),
		project_sprint_state = state.get("project_sprint_state"),
		project_backlog = state.get("project_backlog"),
		project_sprint_backlog = state.get("project_sprint_backlog"),
		meeting_history = state.get("meeting_history"),
		meeting_type = state.get("meeting_type"),
		meeting_purpose = state.get("meeting_purpose"),
		meeting_outline = state.get("meeting_outline"),
		meeting_participants = state.get("meeting_participants"),
		meeting_length = state.get("meeting_length"),
	)
    messages = [
        SystemMessage(content=formatted_prompt),
        HumanMessage(content="Generate the transcript based on the provided information.")
    ]
    
    full_transcript = ""
    more_turns_needed = True
    turn_count = 0
    max_turns = 5
    
    while more_turns_needed and turn_count < max_turns:
        response = model.invoke(messages)
        turn_transcript = response.content
        
        full_transcript += turn_transcript
        
        if "FINISHED" in turn_transcript:
            more_turns_needed = False
        else:
            messages.append(AIMessage(content=turn_transcript))
            messages.append(HumanMessage(content="Continue the transcript from where you left off."))
        
        
        turn_count += 1
        print(turn_count)

    state["transcript"] = full_transcript
    print(state["transcript"])
    return state

def update_meeting_history_node(state: DataGenerationState) -> DataGenerationState:
    formatted_prompt = UPDATE_MEETING_HISTORY_PROMPT.format(
        meeting_type = state.get("meeting_type"),
		meeting_purpose = state.get("meeting_purpose"),
		meeting_outline = state.get("meeting_outline"),
	)
    messages = [
        SystemMessage(content=formatted_prompt),
        HumanMessage(content="Update the meeting history with the provided information. Your response has to be a valid JSON object.")
    ]
    response = model.invoke(messages)
    print(response.content)
    # Parse the response content as JSON
    new_meeting_entry = json.loads(response.content)

    # Get the current meeting history
    current_history = state.get("meeting_history", [])
    print(current_history)
    # Ensure current_history is a list
    if not isinstance(current_history, list):
        current_history = [current_history] if current_history else []
    
    # Check if an entry for this date already exists
    existing_entry = next((entry for entry in current_history if entry.get('date') == new_meeting_entry['date']), None)
    
    if existing_entry:
        # Update the existing entry
        existing_entry.update(new_meeting_entry)
    else:
        # Add the new entry to the front of the list
        current_history.insert(0, new_meeting_entry)
    
    # Remove any empty dictionaries
    current_history = [entry for entry in current_history if entry]
    
    # Update the state with the new meeting history
    state["meeting_history"] = current_history
    
    return state

### Graph

In [16]:
from langgraph.graph import StateGraph, END
class GenerateMeeting(Graph):
    def __init__(self):
        self.workflow = None

    def create_graph(self):
        workflow = StateGraph(DataGenerationState)

        workflow.add_node("meeting_purpose_node", meeting_purpose_node)
        workflow.add_node("meeting_type_node", meeting_type_node)
        workflow.add_node("topic_outliner_node", topic_outliner_node)
        workflow.add_node("meeting_length_estimator_node", meeting_length_estimator_node)
        workflow.add_node("participant_definer_node", participant_definer_node)
        workflow.add_node("generate_transcript_node", generate_transcript_node)
        workflow.add_node("update_meeting_history_node", update_meeting_history_node)

        workflow.add_edge("meeting_purpose_node", "meeting_type_node")
        workflow.add_edge("meeting_type_node", "topic_outliner_node")
        workflow.add_edge("topic_outliner_node", "meeting_length_estimator_node")
        workflow.add_edge("meeting_length_estimator_node", "participant_definer_node")
        workflow.add_edge("participant_definer_node", "generate_transcript_node")
        workflow.add_edge("generate_transcript_node", "update_meeting_history_node")
        workflow.add_edge("update_meeting_history_node", END)

        workflow.set_entry_point("meeting_purpose_node")

        self.workflow = workflow.compile()
    
    def run_graph(self, project_folder) -> DataGenerationState:
        # TODO : make folder management and file handling

        if self.workflow is None:
            raise ValueError("Graph has not been created. Call create_graph() first.")
        current_sprint = get_latest_sprint_folder(project_folder=project_folder)
        
        state = initialize_data_generation_state(
            company_data=load_markdown_to_str(file_path=os.path.join(project_folder, "company-general.md")), # check
            project_general=load_markdown_to_str(file_path=os.path.join(project_folder, "project-general.md")), # check
            project_requirements=load_markdown_to_str(file_path=os.path.join(project_folder, "project-requirements.md")), # check
            employee_profiles =load_markdown_to_str(file_path=os.path.join(project_folder, "employee-profiles.md")), # check

            
            project_state=load_markdown_to_str(file_path=os.path.join(project_folder, "project-state.md")), # TODO
            project_backlog=load_from_json(file_path=os.path.join(project_folder, "project-backlog.json")), # TODO
            
            project_sprint_state=load_markdown_to_str(file_path=os.path.join(project_folder, current_sprint,"sprint-state.md")), # TODO
            project_sprint_backlog=load_markdown_to_str(file_path=os.path.join(project_folder, current_sprint,"sprint-backlog.md")), # TODO
            
            meeting_history =load_from_json(file_path=os.path.join(project_folder, "meeting-history.json")) # TODO
	    )

        #state = self.workflow.invoke(state)

        return state
    
    def export_files(self, state: DataGenerationState, project_folder: str):
        export_transcript(state=state, folder_path=os.path.join(project_folder, "transcripts/"))
        export_state(state=state, folder_path=os.path.join(project_folder, "state-logs/"), filename="generate_transcript")
        export_meeting_history(state=state, output_file=os.path.join(project_folder, "meeting-history.json"))
        manage_sprint_folders(state=state, project_folder=project_folder)


def initialize_data_generation_state(company_data: str, project_general: str, project_requirements: str, project_state : str, project_sprint_state: str, project_backlog :str, project_sprint_backlog :str, employee_profiles: str, meeting_history: str ) -> DataGenerationState:
    return DataGenerationState(
    	company_data=company_data,
        project_general = project_general,
    	project_requirements = project_requirements,
        employee_profiles=employee_profiles,

		project_state= project_state,	
        project_backlog = project_backlog,
        	
    	project_sprint_state= project_sprint_state,	
        project_sprint_backlog = project_sprint_backlog,
    	
        meeting_history = meeting_history,	
    	meeting_purpose="",	
    	meeting_type="",		
    	meeting_outline= "",		
    	meeting_participants="",		 
    	meeting_length= "",
    	transcript="",
    )


In [None]:
!dir "..\..\..\data_\project1\"

In [21]:
graph = GenerateMeeting()
graph.create_graph()
print(graph.workflow.get_graph().draw_mermaid())
state = graph.run_graph(project_folder="../../../data_/project1/")
state


%%{init: {'flowchart': {'curve': 'linear'}}}%%
graph TD;
	__start__([<p>__start__</p>]):::first
	meeting_purpose_node(meeting_purpose_node)
	meeting_type_node(meeting_type_node)
	topic_outliner_node(topic_outliner_node)
	meeting_length_estimator_node(meeting_length_estimator_node)
	participant_definer_node(participant_definer_node)
	generate_transcript_node(generate_transcript_node)
	update_meeting_history_node(update_meeting_history_node)
	__end__([<p>__end__</p>]):::last
	__start__ --> meeting_purpose_node;
	generate_transcript_node --> update_meeting_history_node;
	meeting_length_estimator_node --> participant_definer_node;
	meeting_purpose_node --> meeting_type_node;
	meeting_type_node --> topic_outliner_node;
	participant_definer_node --> generate_transcript_node;
	topic_outliner_node --> meeting_length_estimator_node;
	update_meeting_history_node --> __end__;
	classDef default fill:#f2f0ff,line-height:1.2
	classDef first fill-opacity:0
	classDef last fill:#bfb6fc

Latest sprint f

{'company_data': '# TechNova Solutions\n\n## Company Overview\nTechNova Solutions is a small, dynamic IT company specializing in web application development. With a team of 6 skilled professionals, they focus on creating innovative, user-friendly web solutions for small to medium-sized businesses.\n\n## Current Project: HealthTrack Pro\nTechNova is developing HealthTrack Pro, a comprehensive web application for personal health management. This application allows users to track their daily activities, nutrition, and health metrics, and provides insights and recommendations for a healthier lifestyle.\n\n## Team Structure\n1. ** Sarah Chen - Project Manager / Scrum Master**\n   - Oversees project progress, manages timelines, and facilitates communication\n   - Has a background in both frontend and backend development\n\n2. ** Alex Rodriguez - Senior Full-Stack Developer**\n   - Leads technical decisions and architecture design\n   - Proficient in both frontend and backend technologies\n\n

In [17]:
data = load_json("C:/Egyetem/7Felev/Szakdolgozat/summer/data_/project1/state-logs/state_log_1.json")
data


Successfully loaded JSON from C:/Egyetem/7Felev/Szakdolgozat/summer/data_/project1/state-logs/state_log_1.json


{'company_data': '# TechNova Solutions\n\n## Company Overview\nTechNova Solutions is a small, dynamic IT company specializing in web application development. With a team of 6 skilled professionals, they focus on creating innovative, user-friendly web solutions for small to medium-sized businesses.\n\n## Current Project: HealthTrack Pro\nTechNova is developing HealthTrack Pro, a comprehensive web application for personal health management. This application allows users to track their daily activities, nutrition, and health metrics, and provides insights and recommendations for a healthier lifestyle.\n\n## Team Structure\n1. ** Sarah Chen - Project Manager / Scrum Master**\n   - Oversees project progress, manages timelines, and facilitates communication\n   - Has a background in both frontend and backend development\n\n2. ** Alex Rodriguez - Senior Full-Stack Developer**\n   - Leads technical decisions and architecture design\n   - Proficient in both frontend and backend technologies\n\n