In [13]:
%load_ext autoreload
%autoreload 2
from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.output_parsers import ResponseSchema, StructuredOutputParser

from pydantic import BaseModel
import markdown_to_json
import json
import re


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
llm = Ollama(
    model="agent_pm", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
)

llm_json = Ollama(
    model="agent_json_fixer", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
)

In [3]:
class ResponseComponent(BaseModel):
    name: str
    description: str
    output_format: str

tasks = ResponseComponent(
        name="tasks",
        description="the project split into tasks",
        output_format="markdown list",
    )
timeline = ResponseComponent(
        name="timeline",
        description="the time required for each task",
        output_format="markdown list",
    )
deliverables =  ResponseComponent(
        name="deliverables",
        description="the project deliverables",
        output_format="markdown list",
    )
team = ResponseComponent(
        name="team",
        description="the required team's skills and experience level to deliver the project",
        output_format="markdown list",
    )
risks = ResponseComponent(
        name="risks",
        description="the project risks",
        output_format="markdown list",
    )
budget = ResponseComponent(
        name="budget",
        description="the budget required to deliver each task",
        output_format="markdown list",
    )
metrics = ResponseComponent(
        name="metrics",
        description="the project metrics to measure success against",
        output_format="markdown list",
    )

def structured_output_instructions_from_response_components(response_components: list[ResponseComponent]):
    base_instruction = """\
The output should be a markdown code snippet formatted in the following schema:
"""
    for n, response_component in enumerate(response_components):
        base_instruction += f"{n+1}. A markdown heading (#) with the title {response_component.name}, followed by a desciption of {response_component.description} as a {response_component.output_format}.\n"
    
    return base_instruction

response_components = [
    tasks,
    timeline,
    deliverables,
    team,
    risks,
    budget,
    metrics,
]

response_format = structured_output_instructions_from_response_components(response_components)

print(response_format)

The output should be a markdown code snippet formatted in the following schema:
1. A markdown heading (#) with the title tasks, followed by a desciption of the project split into tasks as a markdown list.
2. A markdown heading (#) with the title timeline, followed by a desciption of the time required for each task as a markdown list.
3. A markdown heading (#) with the title deliverables, followed by a desciption of the project deliverables as a markdown list.
4. A markdown heading (#) with the title team, followed by a desciption of the required team's skills and experience level to deliver the project as a markdown list.
5. A markdown heading (#) with the title risks, followed by a desciption of the project risks as a markdown list.
6. A markdown heading (#) with the title budget, followed by a desciption of the budget required to deliver each task as a markdown list.
7. A markdown heading (#) with the title metrics, followed by a desciption of the project metrics to measure success a

In [4]:
template = """\
You have been given the following project brief. Identify and plan the key project tasks step by step.

{format_instructions}

Project Brief:
{brief}

{emotive_prompt}
"""

brief = """\
- The project is for a big multinational firm.
- The firm employs about 10000 people.
- The firm is grouped into a hierarchical structure, where each group specialises in a different industry or competency.
- The firm is struggling to forecast its revenue accurately for one of the competencies.
- We are proposing to use machine learning and algorithms coupled with sourcing external market data to improve forecasting.
- We aim to achieve a better accuracy than their current forecasting process.
"""

emotive_prompt = "This is very important for my career."

prompt = PromptTemplate(
    template=template,
    input_variables=["brief", "emotive_prompt"],
    partial_variables={"format_instructions": response_format}
)

_input = prompt.format_prompt(brief=brief, emotive_prompt=emotive_prompt)
output_plan = llm(_input.to_string())

# Tasks:

1. Define the problem statement and identify the key performance indicators (KPIs) for the forecasting process.
	* Breakdown the problem into smaller, manageable tasks such as gathering historical data, identifying patterns, and determining the root cause of inaccurate forecasts.
2. Collect and cleanse external market data from reliable sources.
	* Identify relevant data sources such as industry reports, market trends, and economic indicators to improve the accuracy of forecasts.
3. Develop a machine learning model for forecasting revenue.
	* Select appropriate algorithms and techniques based on the problem statement and KPIs, and train the model using historical data.
4. Integrate the machine learning model with the firm's existing forecasting process.
	* Ensure seamless integration with the existing system by developing a user-friendly interface and ensuring data consistency.
5. Test and validate the new forecasting model.
	* Use historical data to test the accuracy of the 

In [5]:
output_plan_dict = markdown_to_json.dictify(output_plan)


def find_headings(response_components: ResponseComponent, text: str):
    if type(text) != str:
        return None
    text_to_test = text.lower()
    for component in response_components:
        if component.name.lower() in text_to_test and len(text_to_test) <= len(component.name)*2:
            return component.name

def flatten_list(the_list: list, nr_prefix=''):
    if not isinstance(the_list, list):
        return [the_list]
    new_list = []
    n = 0
    for list_item in the_list:  
        if type(list_item) == list:
            number_str = f"{nr_prefix}{str(n)}."
            new_list.extend(flatten_list(list_item, nr_prefix=number_str))
        else:
            n += 1
            number_str = f"{nr_prefix}{str(n)}."
            new_list.append(f"{number_str} {str(list_item)}")
    return new_list

def extract_hidden_list(text:str):
    hidden_list_strs = re.findall(r'\[[\"|\'](.*?)[\"|\']\]', text)
    # print('='*90)
    # print(text)
    # print('-'*10)
    # print(hidden_list_strs)
    # print('='*90)
    hidden_list = []
    for n, hl in enumerate(hidden_list_strs):
        loading_str = f'["{hl}"]'.replace('\n','').replace("',","\",").replace(", '",", \"")
        # print(loading_str)
        loaded = json.loads(loading_str)
        # print(type(loaded), loaded)
        hidden_list.extend(loaded)
    return hidden_list

def extract_content(input_dict: dict, response_components: list):
    result_dict = {}
    for l in input_dict:
        result = find_headings(response_components, l)
        if result is not None:
            result_dict[result] = input_dict[l]

    for k, v in result_dict.items():
        hidden_list = []
        # print(k, type(v))
        if isinstance(v, str):
            hidden_list = extract_hidden_list(v)
            # print(hidden_list)
        if hidden_list:
            v = hidden_list
        # if k == 'team':
        #     print(type(result_dict[k]))
        #     print(result_dict[k])
        result_dict[k] = flatten_list(v)
    return result_dict

result_dict = extract_content(output_plan_dict, response_components)

In [6]:
result_dict['team']

['1. Data Scientist with experience in machine learning and data analysis.',
 '1.1. Skills required include proficiency in programming languages such as Python or R, knowledge of machine learning algorithms and techniques, and expertise in data visualization and communication.',
 '2. Business Analyst with experience in process improvement and stakeholder management.',
 "2.1. Skills required include understanding of the firm's existing forecasting process, ability to communicate complex technical concepts to non-technical stakeholders, and experience in project management and coordination.",
 '3. Data Engineer with experience in data integration and management.',
 '3.1. Skills required include expertise in data warehousing, ETL (Extract, Transform, Load) processes, and ability to manage large datasets.',
 '4. Technical Writer with experience in creating user documentation and training materials.',
 '4.1. Skills required include proficiency in document management systems, ability to crea

In [7]:
# print(json.dumps(output_plan_dict, indent=2))

In [8]:
task_breakdown = ResponseComponent(
    name="task breakdown",
    description="the detailed step by step breakdown of each task",
    output_format="markdown list",
)

response_components_breakdown = [
    task_breakdown,
]

response_format = structured_output_instructions_from_response_components(response_components_breakdown)

print(response_format)

The output should be a markdown code snippet formatted in the following schema:
1. A markdown heading (#) with the title task breakdown, followed by a desciption of the detailed step by step breakdown of each task as a markdown list.



In [9]:
template = """\
Given the following project brief, and a list of tasks to solve for the brief. Take each task in the list and plan it step by step in detail.

{format_instructions}

Project Brief:
{brief}

Tasks:
{tasks}

{emotive_prompt}
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["brief", "tasks", "emotive_prompt"],
    partial_variables={"format_instructions": response_format}
)

_input = prompt.format_prompt(brief=brief, tasks=result_dict['tasks'], emotive_prompt=emotive_prompt)
task_plan = llm(_input.to_string())

# Task Breakdown: Project Brief

The project brief outlines a plan to improve the revenue forecasting process for a big multinational firm with over 10,000 employees. The proposed solution involves using machine learning and algorithms combined with external market data to enhance accuracy. We will break down each task into manageable steps to achieve this goal:

Task 1: Define the problem statement and identify key performance indicators (KPIs) for the forecasting process.

* Step 1: Gather information on the current forecasting process, including data sources, methods used, and accuracy levels.
* Step 2: Identify the root cause of inaccurate forecasts by analyzing historical data and identifying patterns.
* Step 3: Define the problem statement clearly, highlighting the specific areas where improvement is needed.
* Step 4: Determine KPIs to measure the success of the new forecasting process.

Task 1.1: Break down the problem into smaller tasks.

* Step 1: Identify the key tasks involv

In [10]:
result_dict_breakdown = extract_content(task_plan, response_components_breakdown)
result_dict_breakdown

{}