In [26]:
!pip install openai
!pip install pydantic
!pip install typing-extensions



In [None]:
import os
# Add the directory containing 'module.py' to the Python path
import sys
sys.path.append('Speech2Text/pupper_llm/pupper_llm/simple_scripts')
import whisper_ping_wrapper as w
# Now, forcefully reload the module if you make changes to it
import importlib
importlib.reload(w)

os.environ["OPENAI_API_KEY"] = ""
from enum import Enum
from typing import Union
from pydantic import BaseModel
import openai
from openai import OpenAI

import base64

client = OpenAI()

In [28]:
# walking related classes
class Backward(BaseModel):
  steps: int

class Forward(BaseModel):
  steps: int

class TurnLeft(BaseModel):
  degrees: int

class TurnRight(BaseModel):
  degrees: int

class SitDown(BaseModel):
  pass

class StandUp(BaseModel):
  pass

class Complete(BaseModel):
  pass

TASK_PROMPTS = [ 
   # 1: Finite set up walking instructions
   """
   You are a walking assistant executing a predefined, finite series of instructions, e.g. 'walk 5 steps forward and sit down',
   but not 'walk to the green tennis ball'. \n When given movement instructions, use the TurnRight or TurnLeft tools for rotation commands,
    SitDown or StandUp tools for posture commands, and Forward or Backward tools for movement commands.
   """,
   # 2: General walking instructions
   """
   You are a walking assistant executing a general set of instructions, e.g. 'walk towards the tennis ball',
   but not specific instructions 'walk 5 steps forward and sit down'. \n When given movement instructions, use the TurnRight or TurnLeft tools for rotation commands,
   SitDown or StandUp tools for posture commands, and Forward or Backward tools for movement commands.
   """,
   # 3: Red light green light game
   """
   You are a robot dog that obeys commands based on hand signals in a game called red light green light. When shown hand signs, use the
   SitDown tool if the hand signal is closed or the words red light are used or StandUp tools if the hand signal is open or the words or the words green light are used.
   """            
]


TOOLS = [openai.pydantic_function_tool(Backward), openai.pydantic_function_tool(Forward),
         openai.pydantic_function_tool(TurnRight), openai.pydantic_function_tool(TurnLeft),
         openai.pydantic_function_tool(SitDown), openai.pydantic_function_tool(StandUp)]

SYSTEM_PROMPT = ""
SYSTEM_TOOLS = []

In [29]:
# This class is used determine which mode the user's description matches best
class GameModeSelector:
    def __init__(self):
        self.task_prompts = {
            "finite_walking": TASK_PROMPTS[0],
            "general_walking": TASK_PROMPTS[1],
            "red_light_green_light": TASK_PROMPTS[2]
        }
    
    def determine_mode(self, user_description):
        # Use LLM to determine which mode matches the user's description
        selection_prompt = """Given the following game description, determine which mode it matches best:
        1. Finite walking mode: For specific, countable movement instructions
        2. General walking mode: For goal-oriented movement instructions
        3. Red light green light mode: For the red light/green light game with hand signals
        
        Respond with only the number (1, 2, or 3) that best matches.
        
        Description: """
        
        messages = [{"role": "user", "content": selection_prompt + user_description}]
        
        response = client.chat.completions.create(
            model='gpt-4o-2024-08-06',
            messages=messages,
            max_tokens=1
        )
        
        mode_number = int(response.choices[0].message.content.strip()) - 1

        return mode_number  # Convert to 0-based index

In [30]:
class Games:

    # GAME 1
    def finite_walking_mode(self, user_prompt):
        global SYSTEM_PROMPT, SYSTEM_TOOLS  # Add global declaration
        
        messages = [{"role": "system", "content": SYSTEM_PROMPT}]
        messages.append({"role": "user", "content": user_prompt})
        response = client.chat.completions.create(
            model='gpt-4o-2024-08-06',
            messages=messages,
            tools=SYSTEM_TOOLS
        )

        # Process all tool calls
        tool_calls = response.choices[0].message.tool_calls
        if tool_calls:
            for tool_call in tool_calls:
                print(f"Function: {tool_call.function}")

        SYSTEM_TOOLS = []
        SYSTEM_PROMPT = ""


    # GAME 2
    def red_light_green_light_mode(self, user_prompt):
        global SYSTEM_PROMPT, SYSTEM_TOOLS  # Declare globals at the start of the function
        
        messages = [{"role": "system", "content": SYSTEM_PROMPT}]
        messages.append({"role": "user", "content": user_prompt})
        response = client.chat.completions.create(
            model='gpt-4o-2024-08-06',
            messages=messages,
            tools=SYSTEM_TOOLS
        )

        # Process all tool calls
        tool_calls = response.choices[0].message.tool_calls
        if tool_calls:
            for tool_call in tool_calls:
                print(f"Function: {tool_call.function}")

    # GAME 3
    def general_walking_mode(self, user_prompt):
        taskComplete = False
        walking_prompt = """You are a robot dog and the provided image shows the current state of the world. \n
        You should navigate the world to complete the user's instructions. The user's instructions are: {user_prompt} \n
        You have the following tools at your disposal: {TOOLS} \n 
        Use the TurnRight or TurnLeft tools for rotation commands (i.e. to avoid obstacles or a wall),
        and Forward or Backward tools for movement commands."""


        #TODO: need some sort of condition to break out of this loop if the user says something

        messages = [{"role": "system", "content": walking_prompt}]
        while not taskComplete:

            #TODO: replace this with a live image input
            image = "Wall is 5 steps ahead"
            messages.append({"role": "user", "content": image})
            response = client.chat.completions.create(
                model='gpt-4o-2024-08-06',
                messages=messages,
                tools=SYSTEM_TOOLS
            )
            tool_calls = response.choices[0].message.tool_calls
            if tool_calls:
                for tool_call in tool_calls:
                    print(f"Function: {tool_call.function}")



In [43]:
mode_selector = GameModeSelector()
game = Games()


user_description = input("Enter a task description: ")
mode_index = mode_selector.determine_mode(user_description)

modes = {
    0: (game.finite_walking_mode, user_description),
    1: (game.general_walking_mode, user_description),
    2: (game.red_light_green_light_mode, user_description),
}


while True:

    SYSTEM_TOOLS = TOOLS
    SYSTEM_PROMPT = TASK_PROMPTS[mode_index]

    if user_description == "break":
        break

    # if mode_index in modes:
    #     print(f"Task mode: {mode_index}")
    #     function, user_input = modes[mode_index]
    #     function(user_input)

    if mode_index == 0:
        print(f"Task mode: {mode_index}")
        game.finite_walking_mode(user_description)
    elif mode_index == 1:
        print(f"Task mode: {mode_index}")
        game.general_walking_mode(user_description)
    elif mode_index == 2:
        print(f"Task mode: {mode_index}")
        game.red_light_green_light_mode(user_description)

    # user_description = input("Enter a command: ")
    audio_data = w.record_audio(duration=5.0)
    wav_io = w.audio_to_wav(audio_data)
    filename = 'test_audio.wav'
    with open(filename, 'wb') as f:
        f.write(wav_io.read())

    user_description = w.transcribe_audio_with_whisper(filename= filename)

    mode_index = mode_selector.determine_mode(user_description)

Task mode: 2
Function: Function(arguments='{}', name='SitDown')
Function: Function(arguments='{}', name='StandUp')
Recording audio...
Audio recording finished.
Transcribing audio using Whisper model...
Task mode: 2
Function: Function(arguments='{}', name='SitDown')
Recording audio...
Audio recording finished.
Transcribing audio using Whisper model...
Task mode: 2
Function: Function(arguments='{}', name='StandUp')
Recording audio...
Audio recording finished.
Transcribing audio using Whisper model...
Task mode: 1
Function: Function(arguments='{"steps":5}', name='Forward')
Function: Function(arguments='{"steps":5}', name='Forward')


KeyboardInterrupt: 