In [1]:
from llm_as_evaluator_client.client import LLMasEvaluator
from dotenv import load_dotenv
from langchain_core.messages import (
    AIMessage,
    FunctionMessage,
    HumanMessage,
)
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
import json
from Models.models import LLMModel
from enum import Enum
from typing import Annotated, List, Sequence, TypedDict
from langchain_core.messages import BaseMessage
from pydantic import BaseModel, Field
import operator

load_dotenv()

LOCAL_BASE_URL = "http://127.0.0.1:8887"
REMOTE_BASE_URL = "https://llm-as-evaluator.turing.com"
llm_as_evaluator = LLMasEvaluator(REMOTE_BASE_URL, "public-engagement", "beta-test")


{"detail":"User is not allowed to create run. UserProjectRole with User Email 'daniel.oluwakayode-c@turing.com', Engagement Name 'public-engagement' and Project Name 'beta-test' not found"}
Authentication failed.


In [12]:
file = "Data/new_chirag/algorithm_problems_using_python__4__19_03_2024_12_55_37_1.ipynb"
with open(file, "r") as f:
    # files as strings
    notebook_contents = f.read()
    
    
parsed_notebooks = llm_as_evaluator.parse_notebooks([notebook_contents])
parsed_notebooks

[{'status': 'OK',
  'metadata': {'metadata': '# Metadata\n\n**Topic** - algorithm problems using python\n\n**Target Number of Turns (User + Assistant) - 3**\n\n**Suggested Conversation :** - How can a Python developer implement a parallel merge sort algorithm using the multiprocessing module to sort a large dataset, and what considerations should be taken into account regarding data partitioning and process synchronization?'},
  'conversation': [{'cell_pos': 1,
    'role': 'User',
    'content': 'I need to sort a large dataset. Implement a parallel merge sort in Python using multiprocessing and explain what considerations should be taken into account regarding data partitioning and process synchronization.',
    'type': 'markdown'},
   {'cell_pos': 2,
    'role': 'Assistant',
    'content': 'A parallel merge sort can be implemented following these steps:\n1. define the number of processes to use. Usually, this number is the same as the CPU core count of the system\n2. split the data in

In [13]:
def extract_conversation(parsed_notebooks):
    conversations = []
    for notebook in parsed_notebooks:
        conversation = notebook.get('conversation', [])
        turn = []
        for i in range(len(conversation)):
            if i == 0 or conversation[i]['role'] != conversation[i-1]['role']:
                if turn:
                    conversations.append(turn)
                turn = [{'role': conversation[i]['role'], 'content': conversation[i]['content']}]
            else:
                turn.append({'role': conversation[i]['role'], 'content': conversation[i]['content']})
        if turn:
            conversations.append(turn)
    return conversations

# usage
conversation_turns = extract_conversation(parsed_notebooks)

In [26]:
conversation_turns[1]

[{'role': 'Assistant',
  'content': 'A parallel merge sort can be implemented following these steps:\n1. define the number of processes to use. Usually, this number is the same as the CPU core count of the system\n2. split the data into *n* chunks, where *n* is the number of processes.\n3. allocate each chunk to a process for sorting\n4. merge the sorted results\n\n\nHere is how it can be implemented:'},
 {'role': 'Assistant',
  'content': 'import math\nimport multiprocessing\n\n\ndef parallel_merge_sort(input, processes):\n    print(f"Sorting {len(input)} in parallel with {processes} processes")\n    # Divide the input array into n chunks\n    chunk_size = math.ceil(len(input) / processes)\n    chunks = [input[chunk_size * i : chunk_size * (i + 1)] for i in range(processes)]\n\n    # Create a process pool and map each chunk to be sorted\n    pool = multiprocessing.Pool(processes)\n    with pool:\n        mapping = pool.map(merge_sort_exception_handler, chunks)\n\n    if any(part is No

In [5]:
class TurnClassification(BaseModel):
    """TurnClassification"""
    has_code: bool = Field(description="Indicates if the turn has code")
    complete_code: bool = Field(description="Indicates if the code is complete and can be executed independently")
    can_be_tested: bool = Field(description="Indicates if the code can be tested")
    code: str = Field(description="The actual code as a string")
    dependencies: List[str] = Field(description="List of dependencies required for the code")


In [6]:



#Base Prompt Template   
def get_base_prompt_template(name=None, custom_instructions=None, agents=None):
    prompt_template = ChatPromptTemplate.from_messages(
    [
    (
    "system",
    "Your name is {name} and you are working along side other agents as listed below "
    "Agents: \n {agents} "
    "{custom_instructions} "
    ""
    )
    ]
    )
    
    prompt_template = prompt_template.partial(name=name)
    prompt_template = prompt_template.partial(agents=agents)
    prompt_template = prompt_template.partial(custom_instructions=custom_instructions if custom_instructions else "")
    return prompt_template

In [7]:
#CODE EXTRACTORS
code_extractor_prompt_template = get_base_prompt_template(
    name="code_extractor",
    custom_instructions="Your role is to extract the code provided by the AI assistant from the conversation in the notebook. "
                        "Once you've extracted the code, forward it to the agent named testers without making any changes. ",
    agents="testers"
)

code_extractor = LLMModel(
    provider="openai_api",
    model="gpt-4o",
    use_history=False,
    #use_tool=False,
    output_schema=TurnClassification,
    prompt_template=code_extractor_prompt_template
)

In [27]:
message = json.dumps(conversation_turns[1])
code_extractor([HumanMessage(message)])

Validating output schema.....


{'has_code': True,
 'complete_code': True,
 'can_be_tested': True,
 'code': 'import math\nimport multiprocessing\n\n\ndef parallel_merge_sort(input, processes):\n    print(f"Sorting {len(input)} in parallel with {processes} processes")\n    # Divide the input array into n chunks\n    chunk_size = math.ceil(len(input) / processes)\n    chunks = [input[chunk_size * i : chunk_size * (i + 1)] for i in range(processes)]\n\n    # Create a process pool and map each chunk to be sorted\n    pool = multiprocessing.Pool(processes)\n    with pool:\n        mapping = pool.map(merge_sort_exception_handler, chunks)\n\n    if any(part is None for part in mapping):\n        raise Exception("Error while sorting")\n\n    # Then merge the sorted arrays\n    return merge_results(mapping)\n\n\ndef merge_sort_exception_handler(input):\n    try:\n        return merge_sort(input)\n    except Exception as e:\n        print(f"Error: {e}")\n        return None\n\n\n# A simple merge sort\ndef merge_sort(input):\n 