<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/AGENTIC_T2SQL_DEMO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## model test

step0

In [None]:
from google.colab import userdata
import os
hf_api_key = userdata.get('HF_TOKEN')
#print(hf_api_key)

# Consolidated Installations and Imports
!pip install -U langchain-community -q
!pip install -U crewai -q
!pip install 'crewai[tools]' -q
!pip install transformers -U -q
!pip install colab-env -q
!pip install unsloth -q
!pip install torch -q # Ensure torch is installed

from crewai.tools import BaseTool


step1

In [2]:
from langchain_core.language_models import BaseChatModel
from typing import Any, List, Dict, Optional
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage, AIMessage
from langchain_core.outputs import ChatResult, ChatGeneration, Generation # Need Generation for BaseChatModel return type
class UnslothCrewAILLM(BaseChatModel):
    """
    Custom Langchain-compatible LLM wrapper for models loaded via Unsloth or Transformers pipeline.
    """
    model: Any # The loaded model object (e.g., from FastLanguageModel)
    tokenizer: Any # The loaded tokenizer object
    pipeline: Any = None # Optional: the transformers pipeline

    # Pass generation parameters during initialization
    max_new_tokens: int = 1024
    temperature: float = 0.1
    do_sample: bool = False
    trust_remote_code: bool = True # Keep track if remote code is trusted

    def __init__(self, model, tokenizer, pipeline=None, max_new_tokens=1024, temperature=0.1, do_sample=False, trust_remote_code=True):
        super().__init__(
            model=model,
            tokenizer=tokenizer,
            pipeline=pipeline,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            do_sample=do_sample, # Determine do_sample based on temperature
            trust_remote_code=trust_remote_code,
        )
        # Set pad token ID on the tokenizer if it's None globally
        if self.tokenizer.pad_token_id is None:
             self.tokenizer.pad_token_id = self.tokenizer.eos_token_id


    def _generate(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Any = None, # Typically not needed for simple wrappers
        **kwargs: Any, # Langchain/CrewAI might pass additional generation args here
    ) -> ChatResult:
        """
        Generates a response from the LLM based on the input messages.
        Implements the core generation logic required by BaseChatModel.
        """
        if not messages:
             raise ValueError("No messages provided to the LLM wrapper.")

        # In CrewAI, the last message content often contains the main prompt from the Task.
        # For a text-to-SQL model fine-tuned on a specific prompt format (like the one
        # used in the Task description), we need to ensure that format is presented
        # to the model. The Task description includes the schema and the query.
        # Let's assume the content of the *last* message is the primary input prompt.

        final_message_content = messages[-1].content

        # Use the pipeline or manual generation based on availability
        if self.pipeline:
            try:
                # Pass generation arguments DIRECTLY to the pipeline call
                # Also include stop words if the pipeline supports it (Transformers pipeline does not directly take stop as a list in call)
                # Need to handle stop words separately or rely on task description formatting for the model
                response = self.pipeline(
                    final_message_content,
                    num_return_sequences=1,
                    return_full_text=False,
                    max_new_tokens=self.max_new_tokens, # Use stored or passed value from init
                    temperature=self.temperature,     # Use stored or passed value from init
                    do_sample=self.do_sample,         # Use stored or passed value from init
                    # Add other relevant generation parameters if needed
                )
                generated_text = response[0].get('generated_text', '').strip() if response else ""

            except Exception as e:
                print(f"Error during pipeline generation in wrapper: {e}")
                generated_text = f"Error generating response: {e}"

        elif self.model and self.tokenizer:
            # Fallback to manual generation if pipeline not available or fails
            try:
                # Encode the prompt text
                inputs = self.tokenizer(final_message_content, return_tensors="pt", truncation=True, max_length=self.tokenizer.model_max_length).to(self.model.device)

                # Ensure pad_token_id is set before generation
                if self.tokenizer.pad_token_id is None:
                    self.tokenizer.pad_token_id = self.tokenizer.eos_token_id

                # Pass generation arguments DIRECTLY to model.generate call
                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=self.max_new_tokens, # Use stored value from init
                    temperature=self.temperature,     # Use stored value from init
                    do_sample=self.do_sample,         # Use stored value from init
                    pad_token_id=self.tokenizer.pad_token_id,
                    stopping_criteria=stop, # Pass stop words from Langchain
                    # Add other relevant generation parameters as needed
                )
                # Decode generated tokens, excluding the input prompt
                input_length = inputs.input_ids.shape[1]
                generated_ids = outputs[0, input_length:]
                generated_text = self.tokenizer.decode(generated_ids, skip_special_tokens=True).strip()

            except Exception as e:
                print(f"Error during manual generation in wrapper: {e}")
                import traceback
                traceback.print_exc() # Print traceback for debugging manual gen failures
                generated_text = f"Error generating response: {e}"
        else:
            generated_text = "Error: Model or pipeline not loaded in wrapper."


        # Wrap the generated text in a Langchain ChatGeneration object
        # The LLM is expected to output the *answer* based on the prompt (which includes the task)
        message = AIMessage(content=generated_text)
        generation = ChatGeneration(message=message)

        # Return a ChatResult containing the generation
        return ChatResult(generations=[generation])

    # Implement other required methods (often just raising NotImplementedError unless needed)
    @property
    def _llm_type(self) -> str:
        return "unsloth_transformer_wrapper" # Custom type name

    # Async methods are usually required by BaseChatModel, implement if needed
    # For simplicity, we can delegate async to sync for this example
    # Note: A proper async implementation is better for performance
    async def _agenerate(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Any = None,
        **kwargs: Any,
    ) -> ChatResult:
        return self._generate(messages, stop, run_manager, **kwargs)


step2

In [3]:
fine_tuned_model_id = "frankmorales2020/deepseek_r1_text2sql_finetuned"
max_seq_length = 2048
load_in_4bit = True # This will be passed to unsloth loading
from transformers import pipeline, AutoConfig

# db_schema definition remains the same
db_schema = {
    "tables": {
        "products": ['id', 'name', 'price', 'category'],
        "orders": ['order_id', 'product_id', 'quantity', 'order_date']
    }
}


# --- Imports for Direct LLM Interaction with Unsloth ---
# We need specific imports from unsloth and transformers
try:
    from unsloth import FastLanguageModel
    # We might still need pipeline from transformers for easy generation after loading
    from transformers import pipeline, AutoConfig # Keep AutoConfig for trust_remote_code
    import torch
    import warnings # Import warnings to suppress potential warnings during loading
    print("Unsloth, Transformers, and Torch imports successful for direct interaction.")

    # --- Direct LLM Loading and Configuration with Unsloth ---
    print(f"\n--- Attempting Direct LLM Loading for {fine_tuned_model_id} using Unsloth ---")

    # Reuse configuration parameters defined earlier
    # fine_tuned_model_id is already defined
    # max_seq_length is already defined
    # selected_dtype_str is already defined (Unsloth prefers torch.float16 or torch.bfloat16)
    # load_in_4bit is already defined (Unsloth handles quantization)
    # llm_for_agents dictionary contains other config like temperature, max_tokens, device_map

    # Unsloth recommended dtype (bfloat16 if supported, else float16)
    unsloth_dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16
    # You might still pass load_in_4bit to unsloth.from_pretrained if you want 4-bit loading
    # but unsloth handles the quantization implementation.

    # Load the model and tokenizer using FastLanguageModel
    # Pass trust_remote_code and dtype directly
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=FutureWarning)
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name = fine_tuned_model_id, # Use the model ID
            max_seq_length = max_seq_length,   # Pass max sequence length
            dtype = unsloth_dtype,             # Use unsloth's preferred dtype
            load_in_4bit = load_in_4bit,       # Request 4-bit loading via unsloth
            # device_map is often handled internally by unsloth or transformers load_in_4bit
            # device_map="auto", # You could try adding this if needed, but unsloth's 4-bit often handles it
            trust_remote_code=True,            # Needed for Deepseek
        )
    print("Model and Tokenizer loaded successfully using Unsloth.")

    # Optional: Create a pipeline for easier text generation
    # Using the model and tokenizer loaded above ensures quantization/dtype are applied
    # Note: Pipelines with unsloth models can sometimes be tricky.
    # Manual generation using model.generate might be more reliable if pipeline fails.
    try:
        direct_pipeline = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            # Pass generation parameters from the config
            max_new_tokens=llm_for_agents.get("max_tokens", 1024), # Use max_tokens config, renamed for pipeline
            temperature=llm_for_agents.get("temperature", 0.1),
            do_sample=True if llm_for_agents.get("temperature", 0.1) > 0 else False, # Enable sampling if temp > 0
            # Add other relevant parameters if needed, might require model-specific ones
             pad_token_id=tokenizer.eos_token_id, # Often needed for batching, use EOS if PAD is not set
        )
        print("Text generation pipeline created.")
        use_pipeline = True
    except Exception as e:
        print(f"Warning: Could not create transformers pipeline: {e}. Falling back to manual generation.")
        direct_pipeline = None
        use_pipeline = False


    # --- Define the Query and Schema ---
    query_to_send_directly = "List all orders made after 2023-01-01."
    db_schema_string_for_prompt = str(db_schema) # Reuse db_schema defined earlier

    unsloth_wrapper_pipeline = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=1024, # Use a default or variable if needed
            temperature=0.1, # Use a default or variable if needed
            do_sample=False, # Use a default or variable if needed
            # Set pad token ID if tokenizer doesn't have one, needed for batching but also single generation
            pad_token_id=tokenizer.eos_token_id, # Safe default if PAD is None
            return_full_text=False, # Important for pipeline to not return the input prompt
    )

    llm_for_agents = UnslothCrewAILLM(
      model=model,
      tokenizer=tokenizer,
      pipeline=unsloth_wrapper_pipeline, # Pass the pipeline if created
      max_new_tokens=1024, # Matches original max_tokens
      temperature=0.1,   # Matches original temperature
      trust_remote_code=True, # Matches original setting
     )

    # --- Construct the Prompt for Direct LLM ---
    # This prompt is manually crafted to guide the LLM towards SQL generation
    # Adjusting prompt format might be necessary based on the fine-tuned model's training
    prompt_for_direct_llm = f"""Translate the following natural language query into a SQL query based on the provided database schema.

    Database Schema:
    {db_schema_string_for_prompt}

    Natural Language Query:
    {query_to_send_directly}

    Output ONLY the SQL query string, no additional text, explanation, or formatting like markdown.

    SQL:
    """

    print(f"\n--- Sending direct prompt to LLM ---")
    print("Prompt:")
    print(prompt_for_direct_llm)

    # --- Call the Direct LLM (using the pipeline or manual generation) ---
    if use_pipeline and direct_pipeline:
        try:
            direct_llm_response = direct_pipeline(
                prompt_for_direct_llm,
                num_return_sequences=1,
                return_full_text=False, # Important for pipeline to not return the input prompt
                # Add other generation parameters if needed
            )
            if direct_llm_response and isinstance(direct_llm_response, list) and len(direct_llm_response) > 0:
                 generated_text = direct_llm_response[0].get('generated_text', '').strip()
                 # Further post-processing might be needed depending on exact output format
                 final_direct_sql = generated_text.split(';')[0].strip() if ';' in generated_text else generated_text.split('\n')[0].strip()
            else:
                 final_direct_sql = "Generation failed or returned empty."
        except Exception as e:
            print(f"Error during pipeline generation: {e}")
            final_direct_sql = "Error during pipeline generation."
    else:
        # Manual generation using model.generate
        try:
            inputs = tokenizer(prompt_for_direct_llm, return_tensors="pt").to(model.device)
            outputs = model.generate(
                **inputs,
                #max_new_tokens=llm_for_agents.get("max_tokens", 1024),
                #temperature=llm_for_agents.get("temperature", 0.1),
                #do_sample=True if llm_for_agents.get("temperature", 0.1) > 0 else False,


                max_new_tokens=llm_for_agents.max_new_tokens,
                temperature=llm_for_agents.temperature,
                do_sample=llm_for_agents.do_sample, # Or calculate based on llm_for_agents.temperature if that's the logic you want



                # Add other generation parameters as needed for model.generate
                # eos_token_id=tokenizer.eos_token_id,
                # pad_token_id=tokenizer.eos_token_id, # Often helpful
            )
            # Decode the output, skipping the input tokens
            generated_text = tokenizer.decode(outputs[0, inputs.input_ids.shape[1]:], skip_special_tokens=True).strip()
            # Post-process to try and get just the SQL line
            final_direct_sql0 = generated_text.split(';')[0].strip() if ';' in generated_text else generated_text.split('\n')[0].strip()

        except Exception as e:
            print(f"Error during manual generation: {e}")
            final_direct_sql = "Error during manual generation."


    print(f"\n--- Direct LLM (Unsloth) Generated SQL ---")
    print(final_direct_sql0)

    # With direct interaction, there's no automatic validation or refinement step built-in.
    # You would have to manually take this output and potentially:
    # 1. Try to execute it against a real database.
    # 2. Manually analyze the result or any errors.
    # 3. If incorrect, manually formulate a new prompt or correction attempt for the LLM.


except ImportError:
     print("\n--- Skipping direct LLM interaction example: Unsloth or necessary libraries not installed/configured correctly. ---")
     print("Please ensure you have 'unsloth' and 'torch' installed and a compatible GPU/CUDA setup.")
except Exception as e:
     print(f"\n--- An error occurred during direct LLM interaction (Unsloth): {e} ---")
     import traceback
     traceback.print_exc() # Print full traceback for debugging


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
Unsloth, Transformers, and Torch imports successful for direct interaction.

--- Attempting Direct LLM Loading for frankmorales2020/deepseek_r1_text2sql_finetuned using Unsloth ---
Are you certain you want to do remote code execution?
==((====))==  Unsloth 2025.6.2: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    NVIDIA L4. Num GPUs = 1. Max memory: 22.161 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Unsloth 2025.6.2 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.
Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Model and Tokenizer loaded successfully using Unsloth.

--- Sending direct prompt to LLM ---
Prompt:
Translate the following natural language query into a SQL query based on the provided database schema.

    Database Schema:
    {'tables': {'products': ['id', 'name', 'price', 'category'], 'orders': ['order_id', 'product_id', 'quantity', 'order_date']}}

    Natural Language Query:
    List all orders made after 2023-01-01.

    Output ONLY the SQL query string, no additional text, explanation, or formatting like markdown.

    SQL:
    

--- Direct LLM (Unsloth) Generated SQL ---
SELECT ... FROM ... WHERE ... ORDER BY ...

    So, the task is to translate the query into SQL, using the correct table and column names, and using the correct operators and functions.

    Example:
    If the query was "List all products with price over $100", the SQL would be:
    SELECT p.name, p.price FROM products p WHERE p.price > 100


In [4]:
# --- Test Case Definition 1 ---

test_query = "Find the names and prices of all products in the 'Electronics' category."
expected_sql = "SELECT name, price FROM products WHERE category = 'Electronics';" # Define the expected SQL output

print("\n--- Running Test Case for Direct LLM Interaction ---")
print(f"Natural Language Query: {test_query}")
print(f"Expected SQL: {expected_sql}")


# --- Reuse existing setup (assuming model and tokenizer are already loaded) ---
# This block assumes that the previous cells where libraries were installed,
# modules were imported, the UnslothCrewAILLM class was defined,
# the model and tokenizer were loaded using Unsloth, and llm_for_agents
# and db_schema were successfully created and are available.

if 'model' not in locals() or 'tokenizer' not in locals() or 'llm_for_agents' not in locals():
    print("\nSkipping test: Model, tokenizer, or llm_for_agents not loaded. Please run the model loading cell(s) first.")
else:
    try:
        # --- Define the Query and Schema for the test ---
        # db_schema is already defined in the previous cell
        if 'db_schema' not in locals():
            print("\nSkipping test: db_schema not defined. Please ensure the schema definition cell was run.")
        else:
            db_schema_string_for_prompt = str(db_schema)

            # --- Construct the Prompt for the Test LLM Call ---
            # Use the test_query defined above
            prompt_for_test_llm = f"""Translate the following natural language query into a SQL query based on the provided database schema.

Database Schema:
{db_schema_string_for_prompt}

Natural Language Query:
{test_query}

Output ONLY the SQL query string, no additional text, explanation, or formatting like markdown.

SQL:
"""

            print(f"\n--- Sending test prompt to LLM ---")
            print("Prompt:")
            print(prompt_for_test_llm)

            # --- Call the Direct LLM (using manual generation as it's more reliable) ---
            # We will prefer manual generation for consistent testing
            try:
                inputs = tokenizer(prompt_for_test_llm, return_tensors="pt").to(model.device)

                # Ensure pad_token_id is set before generation
                if tokenizer.pad_token_id is None:
                    tokenizer.pad_token_id = tokenizer.eos_token_id

                # Use generation parameters from the llm_for_agents object
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=llm_for_agents.max_new_tokens, # Use stored value from init
                    temperature=llm_for_agents.temperature,     # Use stored value from init
                    do_sample=llm_for_agents.do_sample,         # Use stored value from init
                    pad_token_id=tokenizer.pad_token_id,
                    # Add other relevant generation parameters as needed
                    # eos_token_id=tokenizer.eos_token_id, # Might be needed depending on model training
                )

                # Decode generated tokens, excluding the input prompt
                generated_text = tokenizer.decode(outputs[0, inputs.input_ids.shape[1]:], skip_special_tokens=True).strip()

                # Post-process to try and get just the SQL line
                # Reuse the same parsing logic as before
                generated_sql = generated_text.split(';')[0].strip() if ';' in generated_text else generated_text.split('\n')[0].strip()

                print(f"\n--- LLM Generated SQL for Test Case ---")
                print(generated_sql)

                # --- Compare Generated SQL with Expected SQL ---
                # Simple comparison - might need more sophisticated comparison
                # if whitespace or casing variations are acceptable.
                if generated_sql.lower() == expected_sql.lower():
                    print("\n**Test Passed: Generated SQL matches expected SQL.**")
                else:
                    print("\n**Test Failed: Generated SQL does NOT match expected SQL.**")
                    print(f"Expected: {expected_sql}")
                    print(f"Got:      {generated_sql}")


            except Exception as e:
                print(f"\n--- An error occurred during LLM generation for the test case: {e} ---")
                import traceback
                traceback.print_exc() # Print full traceback for debugging

    except Exception as e:
        print(f"\n--- An error occurred during the test setup or prompt creation: {e} ---")
        import traceback
        traceback.print_exc() # Print full traceback for debugging

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



--- Running Test Case for Direct LLM Interaction ---
Natural Language Query: Find the names and prices of all products in the 'Electronics' category.
Expected SQL: SELECT name, price FROM products WHERE category = 'Electronics';

--- Sending test prompt to LLM ---
Prompt:
Translate the following natural language query into a SQL query based on the provided database schema.

Database Schema:
{'tables': {'products': ['id', 'name', 'price', 'category'], 'orders': ['order_id', 'product_id', 'quantity', 'order_date']}}

Natural Language Query:
Find the names and prices of all products in the 'Electronics' category.

Output ONLY the SQL query string, no additional text, explanation, or formatting like markdown.

SQL:


--- LLM Generated SQL for Test Case ---
SELECT name, price FROM products WHERE category = 'Electronics'

**Test Failed: Generated SQL does NOT match expected SQL.**
Expected: SELECT name, price FROM products WHERE category = 'Electronics';
Got:      SELECT name, price FROM pr

In [5]:
# --- Test Case Definition 2 ---

test_query = "Find the names and prices of all products in the 'Electronics' category."
# Corrected expected_sql to match the LLM's output format (no trailing semicolon)
expected_sql = "SELECT name, price FROM products WHERE category = 'Electronics'"

print("\n--- Running Test Case for Direct LLM Interaction ---")
print(f"Natural Language Query: {test_query}")
print(f"Expected SQL (for comparison): {expected_sql}") # Adjusted print message

# --- Reuse existing setup (assuming model and tokenizer are already loaded) ---
# This block assumes that the previous cells where libraries were installed,
# modules were imported, the UnslothCrewAILLM class was defined,
# the model and tokenizer were loaded using Unsloth, and llm_for_agents
# and db_schema were successfully created and are available.

if 'model' not in locals() or 'tokenizer' not in locals() or 'llm_for_agents' not in locals():
    print("\nSkipping test: Model, tokenizer, or llm_for_agents not loaded. Please run the model loading cell(s) first.")
else:
    try:
        # --- Define the Query and Schema for the test ---
        # db_schema is already defined in the previous cell
        if 'db_schema' not in locals():
            print("\nSkipping test: db_schema not defined. Please ensure the schema definition cell was run.")
        else:
            db_schema_string_for_prompt = str(db_schema)

            # --- Construct the Prompt for the Test LLM Call ---
            # Use the test_query defined above
            prompt_for_test_llm = f"""Translate the following natural language query into a SQL query based on the provided database schema.

Database Schema:
{db_schema_string_for_prompt}

Natural Language Query:
{test_query}

Output ONLY the SQL query string, no additional text, explanation, or formatting like markdown.

SQL:
"""

            print(f"\n--- Sending test prompt to LLM ---")
            print("Prompt:")
            print(prompt_for_test_llm)

            # --- Call the Direct LLM (using manual generation as it's more reliable) ---
            # We will prefer manual generation for consistent testing
            try:
                inputs = tokenizer(prompt_for_test_llm, return_tensors="pt").to(model.device)

                # Ensure pad_token_id is set before generation
                if tokenizer.pad_token_id is None:
                    tokenizer.pad_token_id = tokenizer.eos_token_id

                # Use generation parameters from the llm_for_agents object
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=llm_for_agents.max_new_tokens, # Use stored value from init
                    temperature=llm_for_agents.temperature,     # Use stored value from init
                    do_sample=llm_for_agents.do_sample,         # Use stored value from init
                    pad_token_id=tokenizer.pad_token_id,
                    # Add other relevant generation parameters as needed
                    # eos_token_id=tokenizer.eos_token_id, # Might be needed depending on model training
                )

                # Decode generated tokens, excluding the input prompt
                generated_text = tokenizer.decode(outputs[0, inputs.input_ids.shape[1]:], skip_special_tokens=True).strip()

                # Post-process to try and get just the SQL line
                # Reuse the same parsing logic as before
                # This logic correctly handles cases with or without a trailing semicolon
                generated_sql = generated_text.split(';')[0].strip() if ';' in generated_text else generated_text.split('\n')[0].strip()

                print(f"\n--- LLM Generated SQL for Test Case ---")
                print(generated_sql)

                # --- Compare Generated SQL with Expected SQL ---
                # The comparison now expects no trailing semicolon in both
                if generated_sql.lower() == expected_sql.lower():
                    print("\n**Test Passed: Generated SQL matches expected SQL.**")
                else:
                    print("\n**Test Failed: Generated SQL does NOT match expected SQL.**")
                    print(f"Expected: {expected_sql}")
                    print(f"Got:      {generated_sql}")


            except Exception as e:
                print(f"\n--- An error occurred during LLM generation for the test case: {e} ---")
                import traceback
                traceback.print_exc() # Print full traceback for debugging

    except Exception as e:
        print(f"\n--- An error occurred during the test setup or prompt creation: {e} ---")
        import traceback
        traceback.print_exc() # Print full traceback for debugging

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



--- Running Test Case for Direct LLM Interaction ---
Natural Language Query: Find the names and prices of all products in the 'Electronics' category.
Expected SQL (for comparison): SELECT name, price FROM products WHERE category = 'Electronics'

--- Sending test prompt to LLM ---
Prompt:
Translate the following natural language query into a SQL query based on the provided database schema.

Database Schema:
{'tables': {'products': ['id', 'name', 'price', 'category'], 'orders': ['order_id', 'product_id', 'quantity', 'order_date']}}

Natural Language Query:
Find the names and prices of all products in the 'Electronics' category.

Output ONLY the SQL query string, no additional text, explanation, or formatting like markdown.

SQL:


--- LLM Generated SQL for Test Case ---
SELECT name, price FROM products WHERE category = 'Electronics'

**Test Passed: Generated SQL matches expected SQL.**
