In [2]:
from langchain_community.chat_models import BedrockChat
from langchain.chains import ConversationChain
from langchain_core.messages.human import HumanMessage
from langchain_core.messages.ai import AIMessage
from langchain_core.messages.system import SystemMessage

In [10]:
def instantiate_claude_bedrock_model(model_id: str, temperature: int = 0.0) -> BedrockChat:
    return BedrockChat(
        region_name="us-east-1",
        credentials_profile_name="default",
        model_id=model_id,
        model_kwargs={
            "temperature": temperature,
            "max_tokens": 4096
        }
    )

In [11]:
specialist_message = """You are a specialist in machine learning, artificial intelligence, programming, algorithms,
engineering, mathematics and natural sciences. You will be given a specific task to solve. Your goal is to provide a solution
that is accurate, clean and correct. Your response will then be read by a critic. The critic's task is to analyse your work and
find any inaccuracies, errors or areas to improve. The critic will be friendly - their role is not to be picky, but rather to cooperate
with you to refine the response to a state where you can both agree it meets the required standards. Remember that <<you do not have to
agree with the critic's remarks if you think they are wrong!>>. This is very important because it is better to disagree and have a
meaningful discussion then to return a suboptimal answer. Also, be sure to return a verbose chain of thought. For example, if you are to
analyse a piece of text or code, highlight specific lines to which you have remarks. If you are prompted to provide a solution to a
mathematical problem, provide a step-by-step reasoning that led you towards the solution.""".replace("\n", " ")

In [12]:
critic_message = """You are a specialist in machine learning, artificial intelligence, programming, algorithms,
engineering, mathematics and natural sciences. You will be given a response to a question that was written by another specialist in
the listed fields. Your task is to analyse the response thoroughly and in detail, and detect any potential inconsistencies, errors, or
other flaws. Make sure your analysis is extensive. Do not be afraid to point out errors. Be sure to pay attemtion to things like:
- Did the specialist thoroughly and carefuly carry out the task they were assigned?
- Is the specialist's response correct and factual?
- Is the specialist's reasoning clear, coherent and correct?
- Is the content of the specialist's response substantial and objective?
- Is the specialist's response easy to understand?
It is ok to find areas to improve that you have not noticed in previous messages. It is also ok to ask for changes multiple times
if you think there is still room for improvement in the response. When asking for a follow-up response, please outline your remarks
providing as much details as you can. Please ask for a corrected version even if the things you point out are minor. At the end of your
response, determine if you FULLY agree with the specialist regarding every detail of the response. If you are 100% aligned, and only
then, end your message with the <AGREE> token. If there are even minor remarks or suggestions on your end, do not return <AGREE>.""".replace("\n", " ")

In [28]:
task = """
<PROVIDE TASK>
"""

In [29]:
llm = instantiate_claude_bedrock_model("anthropic.claude-3-5-sonnet-20240620-v1:0")

In [30]:
class TextFormatting:
    BOLD = "\033[1m"
    END = "\033[0m"
    GREEN = "\033[92m"
    RED = "\033[91m"

In [31]:
specialist_memory = [SystemMessage(specialist_message), HumanMessage(task)]
critic_memory = [SystemMessage(critic_message)]

In [32]:
print(f"{TextFormatting.BOLD}TASK{TextFormatting.END}\n{task}")
while True:
    specialist_resp = llm(specialist_memory)
    specialist_memory.append(specialist_resp)
    print(f"{TextFormatting.BOLD}{TextFormatting.GREEN}SPECIALIST{TextFormatting.END}\n{specialist_resp.content}\n")
    critic_memory.append(HumanMessage(specialist_resp.content))
    critic_resp = llm(critic_memory)
    critic_memory.append(critic_resp)
    print(f"{TextFormatting.BOLD}{TextFormatting.RED}CRITIC{TextFormatting.END}\n{critic_resp.content}")
    specialist_memory.append(HumanMessage(critic_resp.content))
    if "<AGREE>" in critic_resp.content:
        break
    print("\n")

[1mTASK[0m
<code>
# *=*=*=*=*= /home/cupofcoffee/PycharmProjects/multilayer-ai-msc/multilayerai/ml/__init__.py


# *=*=*=*=*= /home/cupofcoffee/PycharmProjects/multilayer-ai-msc/multilayerai/ml/rl/__init__.py


# *=*=*=*=*= /home/cupofcoffee/PycharmProjects/multilayer-ai-msc/multilayerai/ml/rl/action.py
import pydantic


class Action(pydantic.BaseModel):
    material: str


class AddLayerAction(Action):
    thickness_um: float


# *=*=*=*=*= /home/cupofcoffee/PycharmProjects/multilayer-ai-msc/multilayerai/ml/rl/environment.py
import glob
import os
from typing import List

import gymnasium as gym
import numpy as np
import tmm

from multilayerai.entity import Requirement
from multilayerai.entity.material import AIR
from multilayerai.ml.rl.action import AddLayerAction, Action
from multilayerai.utils.parser.refractiveindex_info import RefractiveIndexInfoCsvParser


class TmmFeedbackEnvironment(gym.Env):
    _WAVELENGTH_BOUND_LO = 3
    _WAVELENGTH_BOUND_HI = 14

    def __init__(
       