<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/AGENT_DEMO_CONCEPT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install unsloth -q
!pip install transformers -q
!pip install torch -q
!pip install colab-env -q
!pip install nest_asyncio -q

## JFK TO LAX

In [4]:
from IPython import get_ipython
from IPython.display import display
import os
import colab_env
# Ensure these are set early in your notebook if you haven't already
os.environ['HF_HUB_DOWNLOAD_TIMEOUT'] = '300'
os.environ['HF_HUB_ETAG_TIMEOUT'] = '300'

from IPython import get_ipython
from IPython.display import display
import json
import time
import random
import asyncio
import torch
from transformers import TextStreamer
from unsloth import FastLanguageModel # Import FastLanguageModel explicitly
from requests.exceptions import ReadTimeout # Import the specific exception
# Import other necessary exceptions if needed, e.g., for asyncio
from asyncio import TimeoutError as AsyncioTimeoutError


# --- Configuration ---
MAX_SEQ_LENGTH = 2048
DTYPE = torch.bfloat16
LOAD_IN_4BIT = True

# --- Helper Function for Logging ---
def log_phase(log_list, log_type, title, content):
    """Adds a new phase entry to the agent's activity log."""
    log_list.append({
        'id': time.time(),
        'type': log_type,
        'title': title,
        'content': content
    })
    print(f"\n--- {title} ({log_type.replace('-', ' ').title()}) ---")
    print(content)
    print("-" * (len(title) + 12))

# --- Simulated APIs (Tools) ---
def simulate_weather_api(location):
    """Simulates fetching weather information for a given location."""
    time.sleep(1) # Simulate network delay
    if location and "storm" in location.lower():
        raise Exception(f"Severe weather advisory for {location}. Potential delays.")
    return {
        'status': 'success',
        'data': f"Current weather in {location or 'destination'} is clear with light winds.",
    }

def simulate_route_api(origin, destination):
    """Simulates finding an optimal flight route."""
    time.sleep(1.2) # Simulate network delay
    return {
        'status': 'success',
        'data': f"Optimal flight path from {origin} to {destination} identified. Estimated flight time: 5 hours.",
    }

def simulate_notams_api(location):
    """Simulates checking NOTAMs (Notices to Airmen) for a location."""
    time.sleep(0.8) # Simulate network delay
    return {
        'status': 'success',
        'data': f"No significant NOTAMs or airspace restrictions for {location or 'route'}.",
    }

def simulate_booking_api(details, attempt=1):
    """
    Simulates booking a flight.
    Includes a 60% chance of failure on the first attempt to demonstrate reflection.
    """
    time.sleep(1.5) # Simulate network delay
    if attempt == 1 and random.random() < 0.6: # 60% chance to fail on first try
        raise Exception(f"Booking failed for {details.get('flight', 'unknown flight')}. Seat unavailable or payment error.")
    return {
        'status': 'success',
        'data': f"Flight {details.get('flight', 'N/A')} from {details.get('origin', 'N/A')} to {details.get('destination', 'N/A')} booked successfully!",
    }

# --- LLM Interaction Function with Unsloth ---
# Global variables for model and tokenizer to avoid reloading on every call
_unsloth_model = None
_unsloth_tokenizer = None

async def initialize_unsloth_model(retries=10, delay=10): # Increased retries and initial delay
    """Initializes the Unsloth model and tokenizer with retries on timeout."""
    global _unsloth_model, _unsloth_tokenizer
    if _unsloth_model is None:
        for attempt in range(retries):
            try:
                print(f"\nAttempt {attempt + 1} of {retries}: Loading unsloth/DeepSeek-R1-Distill-Llama-8B model and tokenizer...")
                _unsloth_model, _unsloth_tokenizer = FastLanguageModel.from_pretrained(
                    model_name="unsloth/DeepSeek-R1-Distill-Llama-8B",
                    max_seq_length=MAX_SEQ_LENGTH,
                    dtype=DTYPE,
                    load_in_4bit=LOAD_IN_4BIT,
                    # token=True # Uncomment if you have logged in and want to use your token
                )
                print("DeepSeek-R1-Distill-Llama-8B model and tokenizer loaded successfully.")
                return _unsloth_model, _unsloth_tokenizer
            except (ReadTimeout, AsyncioTimeoutError, TimeoutError) as e: # Catch ReadTimeout, asyncio TimeoutError and built-in TimeoutError
                print(f"Timeout encountered: {e}. Retrying in {delay} seconds...")
                if attempt < retries - 1:
                    await asyncio.sleep(delay)
                    delay *= 1.5 # Slightly less aggressive exponential backoff
                else:
                    # If it's the last attempt, re-raise the specific error
                    raise Exception(f"Failed to load unsloth model after {retries} attempts due to timeout: {e}")
            except ImportError:
                raise ImportError("Unsloth and its dependencies (e.g., transformers, torch) are not installed. Please install them: pip install unsloth[cu121] transformers torch")
            except Exception as e:
                # Catch other exceptions during loading and re-raise
                raise Exception(f"Failed to load unsloth model: {e}")
    return _unsloth_model, _unsloth_tokenizer


async def call_unsloth_llm(prompt, schema=None):
    """
    Generates text using the locally loaded Unsloth model.
    Attempts to parse JSON if a schema is provided, with more robust extraction.
    """
    model, tokenizer = await initialize_unsloth_model()

    messages = [
        {"role": "user", "content": prompt}
    ]

    try:
        input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    except AttributeError:
        input_text = prompt

    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

    # You can uncomment this to stream output to console
    # streamer = TextStreamer(tokenizer, skip_prompt=True)

    outputs = model.generate(
        **inputs,
        max_new_tokens=512, # Adjust as needed for plan length
        use_cache=True,
        # streamer=streamer, # Uncomment to stream output
        pad_token_id=tokenizer.eos_token_id, # Important for batching or some models
    )
    response_text = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

    if schema:
        # Robustly attempt to find and parse JSON from the generated text
        try:
            # Find the first opening brace and last closing brace
            start_idx = response_text.find('{')
            end_idx = response_text.rfind('}')

            if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
                json_str = response_text[start_idx : end_idx + 1]
                try:
                    return json.loads(json_str)
                except json.JSONDecodeError as e:
                    # If found JSON structure is invalid, raise a specific error
                    print(f"Warning: LLM response contained text but could not be parsed as valid JSON. Raw text:\n{response_text}\nError: {e}")
                    raise ValueError(f"LLM response could not be parsed as JSON: {e}")
            else:
                # If no curly braces indicating a JSON object are found, raise a specific error
                print(f"Warning: LLM response did not contain a structure resembling JSON. Raw text:\n{response_text}")
                raise ValueError("LLM response did not contain a structure resembling JSON.")
        except Exception as e:
            # Catch any other errors during the JSON extraction process
            print(f"Warning: An unexpected error occurred during JSON extraction. Raw text:\n{response_text}\nError: {e}")
            raise ValueError(f"Error during JSON extraction: {e}")

    # If no schema is required, return the raw text response
    return response_text


# --- Agent Orchestration Logic ---
async def plan_flight_agent(flight_request):
    """
    Orchestrates the AI flight planning agent's operations,
    including planning, execution with tools, and reflection/re-planning.
    Takes the flight_request as an argument for automation.
    """
    # Initialize model once at the start of the agent run
    # Pass the updated retry parameters
    try:
        await initialize_unsloth_model(retries=10, delay=10)
    except Exception as e:
         print(f"\n--- Model Initialization Failed ---")
         print(f"Could not initialize the Unsloth model: {e}")
         print("---------------------------------")
         log_phase([], 'failure', 'Model Initialization Failed', f'Could not initialize the Unsloth model: {e}')
         return # Exit if model fails to load


    if not flight_request:
        print("Error: No flight request provided.")
        return

    agent_phases = []
    is_error = False
    error_message = ''
    booking_successful = False
    plan_steps = [] # Initialize plan_steps

    try:
        # 1. Initial User Request Perception
        log_phase(agent_phases, 'user', 'User Request', f'"{flight_request}"')

        # 2. Planning Phase - LLM generates initial plan
        log_phase(agent_phases, 'agent-thinking', 'Agent Planning (LLM)', 'Consulting the LLM to generate a flight plan...')


        planning_prompt = f"""You are an AI flight planning agent. Based on the user's request "{flight_request}", generate a step-by-step flight plan. Consider necessary steps like checking weather, finding optimal routes, checking NOTAMs, and booking the flight.
        You MUST output the plan ONLY as a valid JSON object with a single key "plan", which is an array of objects. Each object should have an "action" (string, e.g., "Check weather", "Get flight route", "Check NOTAMs", "Book flight") and "params" (object with relevant key-value pairs, e.g., {{"location": "New York"}}, {{"origin": "JFK", "destination": "LAX", "date": "tomorrow"}}).
        Assume the user is requesting a flight from "New York (JFK)" to "Los Angeles (LAX)" for "tomorrow" if not explicitly specified.
        DO NOT include any introductory text, conversational phrases, or explanations before or after the JSON. The output must be ONLY the valid JSON object.
        Example structure: {{"plan": [{{"action": "Check weather", "params": {{"location": "Los Angeles"}}}}, {{"action": "Get flight route", "params": {{"origin": "JFK", "destination": "LAX", "date": "tomorrow"}}}}, {{"action": "Check NOTAMs", "params": {{"location": "LAX"}}}}, {{"action": "Book flight", "params": {{"origin": "JFK", "destination": "LAX", "date": "tomorrow", "flight": "auto-selected"}}}}]}}
        """

        plan_schema = { # Used internally by call_unsloth_llm for JSON parsing hint
            "type": "OBJECT",
            "properties": {
                "plan": {
                    "type": "ARRAY",
                    "items": {
                        "type": "OBJECT",
                        "properties": {
                            "action": { "type": "STRING" },
                            "params": { "type": "OBJECT", "additionalProperties": True }
                        },
                        "required": ["action", "params"]
                    }
                }
            },
            "required": ["plan"]
        }

        # Call LLM for initial plan, handle potential JSON parsing error specifically
        try:
            llm_plan = await call_unsloth_llm(planning_prompt, plan_schema)
            plan_steps = llm_plan.get("plan", [])
            if not plan_steps:
                 # If the 'plan' key is missing or empty
                 raise ValueError("LLM response did not contain a valid 'plan' array.")

            log_phase(agent_phases, 'agent-output', 'LLM Generated Plan', f'Initial plan generated:\n{json.dumps(plan_steps, indent=2)}')

        except ValueError as e: # Catch specific ValueError from call_unsloth_llm's JSON handling
            error_message = f"Failed to get valid plan from LLM: {e}"
            is_error = True
            log_phase(agent_phases, 'failure', 'LLM Planning Failed', error_message)
            # Exit the planning phase and proceed to final status
            pass # Do not re-raise, let the outer try-except handle final status if needed

        # 3. Execution Phase with Reflection Loop (only if planning was successful)
        if not is_error:
            current_plan = list(plan_steps)
            attempt_count = 0
            MAX_ATTEMPTS = 2 # Allow one re-attempt after initial failure

            while not booking_successful and attempt_count < MAX_ATTEMPTS:
                attempt_count += 1
                reflection_needed = False
                last_failed_action = None
                last_error_message = None

                # Execute steps in the current plan
                for i, step in enumerate(current_plan):
                    log_phase(agent_phases, 'agent-executing', f'Executing Step {i + 1} (Attempt {attempt_count})',
                               f'Performing action: "{step.get("action")}" with params: {json.dumps(step.get("params"))}')

                    try:
                        tool_result = None
                        action_type = step.get("action", "").lower()
                        params = step.get("params", {})

                        if action_type == 'check weather':
                            tool_result = await asyncio.to_thread(simulate_weather_api, params.get('location'))
                        elif action_type == 'get flight route':
                            tool_result = await asyncio.to_thread(simulate_route_api, params.get('origin'), params.get('destination'))
                        elif action_type == 'check notams':
                            tool_result = await asyncio.to_thread(simulate_notams_api, params.get('location'))
                        elif action_type == 'book flight':
                            tool_result = await asyncio.to_thread(simulate_booking_api, params, attempt_count)
                            booking_successful = True # Mark as successful if booking passes
                        else:
                            raise ValueError(f"Unknown action: {step.get('action')}")

                        log_phase(agent_phases, 'tool-output', f'Tool Output for {step.get("action")}',
                                   tool_result.get('data') or json.dumps(tool_result, indent=2))

                    except Exception as tool_error:
                        # 4. Reflection/Error Handling - LLM re-plans on failure
                        last_failed_action = step.get("action")
                        last_error_message = str(tool_error)
                        log_phase(agent_phases, 'failure', f'Failure during {last_failed_action}',
                                   f'Error: {last_error_message}. Initiating reflection...')
                        reflection_needed = True
                        break # Break from current plan execution loop to reflect

                # Check if reflection is needed and within attempt limits
                if reflection_needed and attempt_count < MAX_ATTEMPTS:
                    log_phase(agent_phases, 'agent-thinking', 'Agent Reflection (LLM)', 'Encountered an issue. Consulting LLM for re-planning...')

                    reflection_prompt = f"""The previous attempt to plan the flight with request "{flight_request}" failed during the action "{last_failed_action}" with the error: "{last_error_message}". The current plan being executed was: {json.dumps(current_plan)}.
                    Please provide a revised plan or a suggestion on how to proceed.
                    Focus on addressing the error encountered. If a specific step failed, suggest an alternative or a retry strategy. If the failure was in booking, try to suggest re-attempting or suggest alternative options.
                    Provide the output ONLY as a valid JSON object with a single key "revisedPlan", which is an array of objects, similar to the initial plan structure. Ensure the JSON is well-formed and nothing else is included in the output.
                    If re-attempting a booking, ensure the "book flight" action is included with the necessary details from the previous attempt if possible.
                    """
                    try:
                        llm_reflection = await call_unsloth_llm(reflection_prompt, plan_schema) # Use plan_schema for parsing revisedPlan as well
                        # LLM might return 'plan' or 'revisedPlan' depending on its response structure.
                        # Prioritize 'revisedPlan', fall back to 'plan'.
                        new_plan = llm_reflection.get("revisedPlan") or llm_reflection.get("plan")
                        if not new_plan:
                            raise ValueError("LLM reflection did not provide a valid revised plan array.")
                        current_plan = list(new_plan) # Update current_plan with the revised plan

                        log_phase(agent_phases, 'agent-output', 'LLM Reflection & Revised Plan',
                                   f'LLM suggested revised plan:\n{json.dumps(current_plan, indent=2)}')

                    except ValueError as e: # Catch specific ValueError from call_unsloth_llm during reflection
                        error_message = f"Failed to get valid revised plan from LLM during reflection: {e}"
                        is_error = True
                        log_phase(agent_phases, 'failure', 'LLM Reflection Failed', error_message)
                        break # Exit the while loop if reflection fails
                    except Exception as e: # Catch other potential errors during reflection
                        error_message = f"An unexpected error occurred during LLM reflection: {e}"
                        is_error = True
                        log_phase(agent_phases, 'failure', 'LLM Reflection Failed', error_message)
                        break # Exit the while loop if reflection fails


                elif reflection_needed and attempt_count >= MAX_ATTEMPTS:
                    # Reached max attempts after needing reflection
                    error_message = "Max re-planning attempts reached. Agent unable to complete the task."
                    is_error = True
                    break # Exit the while loop if max attempts reached

            # Check final status after the execution loop
            if booking_successful:
                # 5. Completion Phase
                log_phase(agent_phases, 'success', 'Flight Planning Completed!', 'The flight planning process was successful.')
            elif not is_error:
                 # If not successful and no error flag yet, means loop finished without booking/error
                 # This case should theoretically be caught by max_attempts or an error
                 # but as a fallback:
                 error_message = "Flight planning process finished without successful booking or explicit error."
                 is_error = True
                 log_phase(agent_phases, 'failure', 'Execution Unfinished', error_message)


    except Exception as err:
        # This outer catch is for unexpected errors during orchestration itself
        print(f"\n--- Orchestration Error ---")
        print(f"Agent orchestration failed: {err}")
        print("--------------------------")
        is_error = True
        error_message = f"Agent orchestration failed: {err}"
        # Ensure this error is logged if not already from a specific phase
        # Check if the last log phase was already a failure related to this error
        if not agent_phases or agent_phases[-1].get('content') != error_message:
             log_phase(agent_phases, 'failure', 'System Error', error_message)


    # Final Status Report
    if is_error:
        print(f"\nFINAL STATUS: ERROR - {error_message}")
    elif booking_successful:
        print("\nFINAL STATUS: SUCCESS - Flight planning completed.")
    else:
         # Fallback print if neither error nor success flag was set correctly
         print("\nFINAL STATUS: UNKNOWN - Process finished in an unexpected state.")


# --- Main execution block for Jupyter notebook ---
# Define the flight request here for automation
automatic_flight_request = "Plan a flight from New York to Los Angeles for tomorrow."

# Await the main async function directly
await plan_flight_agent(automatic_flight_request)


Attempt 1 of 10: Loading unsloth/DeepSeek-R1-Distill-Llama-8B model and tokenizer...
==((====))==  Unsloth 2025.6.5: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    NVIDIA L4. Num GPUs = 1. Max memory: 22.161 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
DeepSeek-R1-Distill-Llama-8B model and tokenizer loaded successfully.

--- User Request (User) ---
"Plan a flight from New York to Los Angeles for tomorrow."
------------------------

--- Agent Planning (LLM) (Agent Thinking) ---
Consulting the LLM to generate a flight plan...
--------------------------------

--- LLM Generated Plan (Agent Output) ---
Initial plan generated:
[
  {
    "action": "Check weather",
    "params": {
      "location": "New York"
    

## YUL TO NRT

In [6]:
from IPython import get_ipython
from IPython.display import display
import os
import colab_env
import re # Import the regular expression module
# Ensure these are set early in your notebook if you haven't already
os.environ['HF_HUB_DOWNLOAD_TIMEOUT'] = '300'
os.environ['HF_HUB_ETAG_TIMEOUT'] = '300'

from IPython import get_ipython
from IPython.display import display
import json
import time
import random
import asyncio
import torch
from transformers import TextStreamer
from unsloth import FastLanguageModel # Import FastLanguageModel explicitly
from requests.exceptions import ReadTimeout # Import the specific exception
# Import other necessary exceptions if needed, e.g., for asyncio
from asyncio import TimeoutError as AsyncioTimeoutError


# --- Configuration ---
MAX_SEQ_LENGTH = 2048
DTYPE = torch.bfloat16
LOAD_IN_4BIT = True

# --- Helper Function for Logging ---
def log_phase(log_list, log_type, title, content):
    """Adds a new phase entry to the agent's activity log."""
    log_list.append({
        'id': time.time(),
        'type': log_type,
        'title': title,
        'content': content
    })
    print(f"\n--- {title} ({log_type.replace('-', ' ').title()}) ---")
    print(content)
    print("-" * (len(title) + 12))

# --- Simulated APIs (Tools) ---
def simulate_weather_api(location):
    """Simulates fetching weather information for a given location."""
    time.sleep(1) # Simulate network delay
    if location and "storm" in location.lower():
        raise Exception(f"Severe weather advisory for {location}. Potential delays.")
    return {
        'status': 'success',
        'data': f"Current weather in {location or 'destination'} is clear with light winds.",
    }

def simulate_route_api(origin, destination, flight_type="standard"):
    """Simulates finding an optimal flight route."""
    time.sleep(1.2) # Simulate network delay
    estimated_time = 5 # Default for New York to Los Angeles

    # Logic to simulate different flight times for long-haul
    # This now includes Montreal/Tokyo explicitly as long-haul
    is_long_haul = False
    if "long-haul" in flight_type.lower() or (
        (origin.lower() in ["montreal", "yul", "jfk", "new york", "lax", "los angeles"] and destination.lower() in ["tokyo", "nrt", "sydney", "london", "lhr"]) or
        (destination.lower() in ["montreal", "yul", "jfk", "new york", "lax", "los angeles"] and origin.lower() in ["tokyo", "nrt", "sydney", "london", "lhr"])
    ):
        estimated_time = random.randint(8, 16) # Random time between 8 and 16 hours for long haul
        is_long_haul = True
        return {
            'status': 'success',
            'data': f"Optimal long-haul flight path from {origin} to {destination} identified. Estimated flight time: {estimated_time} hours. This is a long-haul flight.",
            'flight_details': {'estimated_time': estimated_time, 'is_long_haul': is_long_haul}
        }
    else:
        # For non-long-haul requests, still check if the generated route is long-haul
        estimated_time = 5 # Default for New York to Los Angeles, keeping it short
        is_long_haul = (estimated_time > 7) # True if estimated_time exceeds 7 hours threshold

        return {
            'status': 'success',
            'data': f"Optimal flight path from {origin} to {destination} identified. Estimated flight time: {estimated_time} hours.",
            'flight_details': {'estimated_time': estimated_time, 'is_long_haul': is_long_haul}
        }


def simulate_notams_api(location):
    """Simulates checking NOTAMs (Notices to Airmen) for a location."""
    time.sleep(0.8) # Simulate network delay
    return {
        'status': 'success',
        'data': f"No significant NOTAMs or airspace restrictions for {location or 'route'}.",
    }

def simulate_booking_api(details, attempt=1):
    """
    Simulates booking a flight.
    Includes a 60% chance of failure on the first attempt to demonstrate reflection.
    """
    time.sleep(1.5) # Simulate network delay
    if attempt == 1 and random.random() < 0.6: # 60% chance to fail on first try
        raise Exception(f"Booking failed for {details.get('flight', 'unknown flight')}. Seat unavailable or payment error.")
    return {
        'status': 'success',
        'data': f"Flight {details.get('flight', 'N/A')} from {details.get('origin', 'N/A')} to {details.get('destination', 'N/A')} booked successfully!",
    }

# --- LLM Interaction Function with Unsloth ---
# Global variables for model and tokenizer to avoid reloading on every call
_unsloth_model = None
_unsloth_tokenizer = None

async def initialize_unsloth_model(retries=10, delay=10): # Increased retries and initial delay
    """Initializes the Unsloth model and tokenizer with retries on timeout."""
    global _unsloth_model, _unsloth_tokenizer
    if _unsloth_model is None:
        for attempt in range(retries):
            try:
                print(f"\nAttempt {attempt + 1} of {retries}: Loading unsloth/DeepSeek-R1-Distill-Llama-8B model and tokenizer...")
                _unsloth_model, _unsloth_tokenizer = FastLanguageModel.from_pretrained(
                    model_name="unsloth/DeepSeek-R1-Distill-Llama-8B",
                    max_seq_length=MAX_SEQ_LENGTH,
                    dtype=DTYPE,
                    load_in_4bit=LOAD_IN_4BIT,
                    # token=True # Uncomment if you have logged in and want to use your token
                )
                print("DeepSeek-R1-Distill-Llama-8B model and tokenizer loaded successfully.")
                return _unsloth_model, _unsloth_tokenizer
            except (ReadTimeout, AsyncioTimeoutError, TimeoutError) as e: # Catch ReadTimeout, asyncio TimeoutError and built-in TimeoutError
                print(f"Timeout encountered: {e}. Retrying in {delay} seconds...")
                if attempt < retries - 1:
                    await asyncio.sleep(delay)
                    delay *= 1.5 # Slightly less aggressive exponential backoff
                else:
                    # If it's the last attempt, re-raise the specific error
                    raise Exception(f"Failed to load unsloth model after {retries} attempts due to timeout: {e}")
            except ImportError:
                raise ImportError("Unsloth and its dependencies (e.g., transformers, torch) are not installed. Please install them: pip install unsloth[cu121] transformers torch")
            except Exception as e:
                # Catch other exceptions during loading and re-raise
                raise Exception(f"Failed to load unsloth model: {e}")
    return _unsloth_model, _unsloth_tokenizer


async def call_unsloth_llm(prompt, schema=None):
    """
    Generates text using the locally loaded Unsloth model.
    Attempts to parse JSON if a schema is provided, with more robust extraction from markdown blocks.
    """
    model, tokenizer = await initialize_unsloth_model()

    messages = [
        {"role": "user", "content": prompt}
    ]

    try:
        input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    except AttributeError:
        input_text = prompt

    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=1024, # Increased max_new_tokens for more complete output
        use_cache=True,
        pad_token_id=tokenizer.eos_token_id,
    )
    response_text = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

    if schema:
        extracted_json_str = None
        # Try to extract JSON from a markdown code block first
        # Regex to find ```json ... ``` or ``` ... ``` and capture content within
        # Using a non-greedy match (.*?) to find the *first* closing backticks
        # and then a specific lookahead for {.*} to ensure it's JSON-like
        markdown_json_match = re.search(r"```(?:json)?\s*(\{.*?\}\s*)```", response_text, re.DOTALL)

        if markdown_json_match:
            extracted_json_str = markdown_json_match.group(1).strip()
        else:
            # Fallback: if no markdown block, try to find the first `{...}` block
            first_brace_match = re.search(r"\{.*\}", response_text, re.DOTALL)
            if first_brace_match:
                extracted_json_str = first_brace_match.group(0).strip()
            else:
                # If still nothing, or it's not starting/ending with braces, try broader search
                start_idx = response_text.find('{')
                end_idx = response_text.rfind('}')
                if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
                    extracted_json_str = response_text[start_idx : end_idx + 1].strip()

        if extracted_json_str:
            try:
                return json.loads(extracted_json_str)
            except json.JSONDecodeError as e:
                print(f"Warning: LLM response contained text but could not be parsed as valid JSON. Raw text:\n{response_text}\nAttempted to parse: {extracted_json_str}\nError: {e}")
                raise ValueError(f"LLM response could not be parsed as JSON: {e}")
        else:
            print(f"Warning: LLM response did not contain a valid JSON structure or markdown block. Raw text:\n{response_text}")
            raise ValueError("LLM response did not contain a valid JSON structure.")

    # If no schema is required, return the raw text response
    return response_text


# --- Agent Orchestration Logic ---
async def plan_flight_agent(flight_request):
    """
    Orchestrates the AI flight planning agent's operations,
    including planning, execution with tools, and reflection/re-planning.
    Takes the flight_request as an argument for automation.
    """
    # Initialize model once at the start of the agent run
    # Pass the updated retry parameters
    try:
        await initialize_unsloth_model(retries=10, delay=10)
    except Exception as e:
         print(f"\n--- Model Initialization Failed ---")
         print(f"Could not initialize the Unsloth model: {e}")
         print("---------------------------------")
         log_phase([], 'failure', 'Model Initialization Failed', f'Could not initialize the Unsloth model: {e}')
         return # Exit if model fails to load


    if not flight_request:
        print("Error: No flight request provided.")
        return

    agent_phases = []
    is_error = False
    error_message = ''
    booking_successful = False
    plan_steps = [] # Initialize plan_steps

    try:
        # 1. Initial User Request Perception
        log_phase(agent_phases, 'user', 'User Request', f'"{flight_request}"')

        # 2. Planning Phase - LLM generates initial plan
        log_phase(agent_phases, 'agent-thinking', 'Agent Planning (LLM)', 'Consulting the LLM to generate a flight plan...')

        # Modified planning prompt to instruct LLM about long-haul and aircraft
        planning_prompt = f"""You are an AI flight planning agent. Based on the user's request "{flight_request}", generate a step-by-step flight plan. Consider necessary steps like checking weather, finding optimal routes, checking NOTAMs, and booking the flight.
        If the request implies a "long-haul" flight (typically over 7 hours) or specifies an aircraft like "Boeing 777", ensure the "Get flight route" action includes `flight_type: "long-haul"` in its parameters to reflect this. The `aircraft` parameter should also be included if specified by the user.
        You MUST output the plan ONLY as a valid JSON object with a single key "plan", which is an array of objects. Each object should have an "action" (string, e.g., "Check weather", "Get flight route", "Check NOTAMs", "Book flight") and "params" (object with relevant key-value pairs, e.g., {{"location": "New York"}}, {{"origin": "JFK", "destination": "LAX", "date": "tomorrow", "flight_type": "long-haul", "aircraft": "Boeing 777"}}).
        Assume the user is requesting a flight from "New York (JFK)" to "Los Angeles (LAX)" for "tomorrow" if not explicitly specified.
        DO NOT include any introductory text, conversational phrases, or explanations before or after the JSON. The output must be ONLY the valid JSON object.
        Example structure for long-haul/specific aircraft: {{"plan": [{{"action": "Check weather", "params": {{"location": "Montreal"}}}}, {{"action": "Get flight route", "params": {{"origin": "YUL", "destination": "NRT", "date": "tomorrow", "flight_type": "long-haul", "aircraft": "Boeing 777"}}}}, {{"action": "Check NOTAMs", "params": {{"location": "NRT"}}}}, {{"action": "Book flight", "params": {{"origin": "YUL", "destination": "NRT", "date": "tomorrow", "flight": "auto-selected"}}}}]}}
        """

        plan_schema = { # Used internally by call_unsloth_llm for JSON parsing hint
            "type": "OBJECT",
            "properties": {
                "plan": {
                    "type": "ARRAY",
                    "items": {
                        "type": "OBJECT",
                        "properties": {
                            "action": { "type": "STRING" },
                            "params": { "type": "OBJECT", "additionalProperties": True }
                        },
                        "required": ["action", "params"]
                    }
                }
            },
            "required": ["plan"]
        }

        # Call LLM for initial plan, handle potential JSON parsing error specifically
        try:
            llm_plan = await call_unsloth_llm(planning_prompt, plan_schema)
            plan_steps = llm_plan.get("plan", [])
            if not plan_steps:
                 # If the 'plan' key is missing or empty
                 raise ValueError("LLM response did not contain a valid 'plan' array.")

            log_phase(agent_phases, 'agent-output', 'LLM Generated Plan', f'Initial plan generated:\n{json.dumps(plan_steps, indent=2)}')

        except ValueError as e: # Catch specific ValueError from call_unsloth_llm's JSON handling
            error_message = f"Failed to get valid plan from LLM: {e}"
            is_error = True
            log_phase(agent_phases, 'failure', 'LLM Planning Failed', error_message)
            # Exit the planning phase and proceed to final status
            pass # Do not re-raise, let the outer try-except handle final status if needed

        # 3. Execution Phase with Reflection Loop (only if planning was successful)
        if not is_error:
            current_plan = list(plan_steps)
            attempt_count = 0
            MAX_ATTEMPTS = 2 # Allow one re-attempt after initial failure

            while not booking_successful and attempt_count < MAX_ATTEMPTS:
                attempt_count += 1
                reflection_needed = False
                last_failed_action = None
                last_error_message = None

                # Execute steps in the current plan
                for i, step in enumerate(current_plan):
                    log_phase(agent_phases, 'agent-executing', f'Executing Step {i + 1} (Attempt {attempt_count})',
                               f'Performing action: "{step.get("action")}" with params: {json.dumps(step.get("params"))}')

                    try:
                        tool_result = None
                        action_type = step.get("action", "").lower()
                        params = step.get("params", {})

                        if action_type == 'check weather':
                            tool_result = await asyncio.to_thread(simulate_weather_api, params.get('location'))
                        elif action_type == 'get flight route':
                            # Pass flight_type and aircraft (if present) to simulate_route_api
                            tool_result = await asyncio.to_thread(simulate_route_api,
                                                                 params.get('origin'),
                                                                 params.get('destination'),
                                                                 params.get('flight_type', 'standard'))
                            # Note: The simulate_route_api currently doesn't explicitly use the 'aircraft' param,
                            # but it's good practice for the LLM to include it if it's in the request.
                        elif action_type == 'check notams':
                            tool_result = await asyncio.to_thread(simulate_notams_api, params.get('location'))
                        elif action_type == 'book flight':
                            tool_result = await asyncio.to_thread(simulate_booking_api, params, attempt_count)
                            booking_successful = True # Mark as successful if booking passes
                        else:
                            raise ValueError(f"Unknown action: {step.get('action')}")

                        log_phase(agent_phases, 'tool-output', f'Tool Output for {step.get("action")}',
                                   tool_result.get('data') or json.dumps(tool_result, indent=2))

                    except Exception as tool_error:
                        # 4. Reflection/Error Handling - LLM re-plans on failure
                        last_failed_action = step.get("action")
                        last_error_message = str(tool_error)
                        log_phase(agent_phases, 'failure', f'Failure during {last_failed_action}',
                                   f'Error: {last_error_message}. Initiating reflection...')
                        reflection_needed = True
                        break # Break from current plan execution loop to reflect

                # Check if reflection is needed and within attempt limits
                if reflection_needed and attempt_count < MAX_ATTEMPTS:
                    log_phase(agent_phases, 'agent-thinking', 'Agent Reflection (LLM)', 'Encountered an issue. Consulting LLM for re-planning...')

                    reflection_prompt = f"""The previous attempt to plan the flight with request "{flight_request}" failed during the action "{last_failed_action}" with the error: "{last_error_message}". The current plan being executed was: {json.dumps(current_plan)}.
                    Please provide a revised plan or a suggestion on how to proceed.
                    Focus on addressing the error encountered. If a specific step failed, suggest an alternative or a retry strategy. If the failure was in booking, try to suggest re-attempting or suggest alternative options.
                    Provide the output ONLY as a valid JSON object with a single key "revisedPlan", which is an array of objects, similar to the initial plan structure. Ensure the JSON is well-formed and nothing else is included in the output.
                    If re-attempting a booking, ensure the "book flight" action is included with the necessary details from the previous attempt if possible.
                    If the original request was for a long-haul flight or specified a Boeing 777, ensure the "Get flight route" action in the revised plan still includes `flight_type: "long-haul"` and `aircraft: "Boeing 777"` if applicable.
                    """
                    try:
                        llm_reflection = await call_unsloth_llm(reflection_prompt, plan_schema) # Use plan_schema for parsing revisedPlan as well
                        # LLM might return 'plan' or 'revisedPlan' depending on its response structure.
                        # Prioritize 'revisedPlan', fall back to 'plan'.
                        new_plan = llm_reflection.get("revisedPlan") or llm_reflection.get("plan")
                        if not new_plan:
                            raise ValueError("LLM reflection did not provide a valid revised plan array.")
                        current_plan = list(new_plan) # Update current_plan with the revised plan

                        log_phase(agent_phases, 'agent-output', 'LLM Reflection & Revised Plan',
                                   f'LLM suggested revised plan:\n{json.dumps(current_plan, indent=2)}')

                    except ValueError as e: # Catch specific ValueError from call_unsloth_llm during reflection
                        error_message = f"Failed to get valid revised plan from LLM during reflection: {e}"
                        is_error = True
                        log_phase(agent_phases, 'failure', 'LLM Reflection Failed', error_message)
                        break # Exit the while loop if reflection fails
                    except Exception as e: # Catch other potential errors during reflection
                        error_message = f"An unexpected error occurred during LLM reflection: {e}"
                        is_error = True
                        log_phase(agent_phases, 'failure', 'LLM Reflection Failed', error_message)
                        break # Exit the while loop if reflection fails


                elif reflection_needed and attempt_count >= MAX_ATTEMPTS:
                    # Reached max attempts after needing reflection
                    error_message = "Max re-planning attempts reached. Agent unable to complete the task."
                    is_error = True
                    break # Exit the while loop if max attempts reached

            # Check final status after the execution loop
            if booking_successful:
                # 5. Completion Phase
                log_phase(agent_phases, 'success', 'Flight Planning Completed!', 'The flight planning process was successful.')
            elif not is_error:
                 # If not successful and no error flag yet, means loop finished without booking/error
                 # This case should theoretically be caught by max_attempts or an error
                 # but as a fallback:
                 error_message = "Flight planning process finished without successful booking or explicit error."
                 is_error = True
                 log_phase(agent_phases, 'failure', 'Execution Unfinished', error_message)


    except Exception as err:
        # This outer catch is for unexpected errors during orchestration itself
        print(f"\n--- Orchestration Error ---")
        print(f"Agent orchestration failed: {err}")
        print("--------------------------")
        is_error = True
        error_message = f"Agent orchestration failed: {err}"
        # Ensure this error is logged if not already from a specific phase
        # Check if the last log phase was already a failure related to this error
        if not agent_phases or agent_phases[-1].get('content') != error_message:
             log_phase(agent_phases, 'failure', 'System Error', error_message)


    # Final Status Report
    if is_error:
        print(f"\nFINAL STATUS: ERROR - {error_message}")
    elif booking_successful:
        print("\nFINAL STATUS: SUCCESS - Flight planning completed.")
    else:
         # Fallback print if neither error nor success flag was set correctly
         print("\nFINAL STATUS: UNKNOWN - Process finished in an unexpected state.")


# --- Main execution block for Jupyter notebook ---
# Define the flight request here for automation
# Request a long-haul flight from Montreal to Tokyo on a Boeing 777
automatic_flight_request = "Plan a flight from Montreal to Tokyo on a Boeing 777 for tomorrow."

# Await the main async function directly
await plan_flight_agent(automatic_flight_request)


Attempt 1 of 10: Loading unsloth/DeepSeek-R1-Distill-Llama-8B model and tokenizer...
==((====))==  Unsloth 2025.6.5: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    NVIDIA L4. Num GPUs = 1. Max memory: 22.161 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
DeepSeek-R1-Distill-Llama-8B model and tokenizer loaded successfully.

--- User Request (User) ---
"Plan a flight from Montreal to Tokyo on a Boeing 777 for tomorrow."
------------------------

--- Agent Planning (LLM) (Agent Thinking) ---
Consulting the LLM to generate a flight plan...
--------------------------------

--- LLM Generated Plan (Agent Output) ---
Initial plan generated:
[
  {
    "action": "Check weather",
    "params": {
      "location": "Mont