In [None]:
# T5 Text-to-SQL Evaluation Script
# Generates predictions and prepares files for local evaluation.
# VERSION: Aligned with T5-Large training (No fp16)
# Includes fix for missing tokenizer files and points to checkpoint.

# First, mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True) # Added force_remount=True just in case

# Check GPU availability
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU device: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

# Show GPU info
!nvidia-smi

# Install required packages
print("Installing required packages (matching training environment)...")
!pip install -q datasets transformers evaluate tensorboard accelerate huggingface-hub pandas
print("Packages installed.")

# --- Verify Installation ---
print("\nVerifying package versions...")
!pip show datasets transformers evaluate tensorboard accelerate huggingface-hub torch pandas
print("-" * 30)


import json
import os
import sys
import time
import pandas as pd
import evaluate
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import subprocess
from tqdm.notebook import tqdm

# --- Configuration ---
DRIVE_BASE_DIR = "/content/drive/MyDrive/text2sql"
EXPERIMENT_NAME = "t5_large_sql_types_schema_v5"
SCHEMA_FORMAT = "sql"
LOCAL_DATASET_DIR = "/content/datasets/spider"
MODEL_PATH = f"{DRIVE_BASE_DIR}/{EXPERIMENT_NAME}"
OUTPUT_DIR = f"{DRIVE_BASE_DIR}/eval_results/{EXPERIMENT_NAME}_best"
NUM_BEAMS = 8
LIMIT = 0
RUN_SPIDER_EVAL = False
SPIDER_EVAL_SCRIPT_PATH = f"{DRIVE_BASE_DIR}/spider_evaluation/evaluation.py"
LOCAL_DB_DIR = f"{LOCAL_DATASET_DIR}/database"

# Create directories
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(LOCAL_DATASET_DIR, exist_ok=True)

print(f"--- Evaluation Configuration ---")
print(f"Model path: {MODEL_PATH}")
print(f"Schema format: {SCHEMA_FORMAT} (Expecting Types)")
print(f"Output directory (in Google Drive): {OUTPUT_DIR}")
print(f"Local Colab Dataset directory: {LOCAL_DATASET_DIR}")
print(f"Num beams: {NUM_BEAMS}")
print(f"Limit examples: {'All' if LIMIT <= 0 else LIMIT}")
print(f"Run Spider eval script automatically in Colab: {RUN_SPIDER_EVAL}")
if not RUN_SPIDER_EVAL:
     print(f" -> Files for local evaluation will be saved to: {OUTPUT_DIR}")
print("-" * 30)

# --- Schema Utilities ---
if not os.path.exists('schema_utils.py'):
    print("schema_utils.py not found, writing from string...")
    schema_utils_content = """
import json
import os
import pandas as pd
from typing import Dict, List, Any

# Use LOCAL_DATASET_DIR which should be populated by setup_local_dataset_from_drive
LOCAL_DATASET_DIR_SCHEMA = "/content/datasets/spider"

def load_tables_json(tables_path: str = 'tables.json') -> Dict[str, Any]:
    \"\"\"Load the tables.json file containing schema information for all databases.\"\"\"
    actual_path = os.path.join(LOCAL_DATASET_DIR_SCHEMA, tables_path)
    print(f"Schema Utils: Loading tables.json from: {actual_path}")
    try:
        with open(actual_path, 'r', encoding='utf-8') as f:
            tables_data = json.load(f)
    except FileNotFoundError:
        print(f"CRITICAL Error in schema_utils: tables.json not found at {actual_path}")
        raise
    db_schemas = {}
    for db_info in tables_data:
        db_id = db_info['db_id']
        db_schemas[db_id] = db_info
    return db_schemas

def get_sql_schema_string(db_id: str, db_schemas: Dict[str, Any]) -> str:
    \"\"\"Create SQL schema string including types, PKs, FKs.\"\"\"
    if db_id not in db_schemas: raise ValueError(f"DB ID '{db_id}' not found")
    schema_info = db_schemas[db_id]
    tables = schema_info['table_names_original']
    columns = schema_info['column_names_original']
    column_types = schema_info['column_types']
    primary_keys = set(schema_info.get('primary_keys', []))
    fk_dict = {}
    if isinstance(schema_info.get('foreign_keys'), list):
        for fk_pair in schema_info['foreign_keys']:
             if isinstance(fk_pair, (list, tuple)) and len(fk_pair) == 2:
                 col1_idx, col2_idx = fk_pair
                 if isinstance(col1_idx, int) and isinstance(col2_idx, int): fk_dict[col1_idx] = col2_idx
    table_defs = []
    for i, table in enumerate(tables):
        table_columns = []
        for col_idx, (tab_idx, col_name) in enumerate(columns):
            if tab_idx == i:
                col_type = column_types[col_idx].upper()
                col_info = f"{col_name} ({col_type})"
                if col_idx in primary_keys: col_info += " (PRIMARY KEY)"
                if col_idx in fk_dict:
                    ref_col_idx = fk_dict[col_idx]
                    if 0 <= ref_col_idx < len(columns):
                         ref_tab_idx, ref_col_name = columns[ref_col_idx]
                         if 0 <= ref_tab_idx < len(tables):
                              ref_table = tables[ref_tab_idx]
                              col_info += f" (FOREIGN KEY -> {ref_table}.{ref_col_name})"
                table_columns.append(col_info)
        if table_columns:
            table_columns.sort()
            table_def = f"Table: {table}\\nColumns: {', '.join(table_columns)}"
            table_defs.append(table_def)
    table_defs.sort()
    return "\\n".join(table_defs)

def get_compact_schema_string(db_id: str, db_schemas: Dict[str, Any]) -> str:
    \"\"\"Create compact schema string.\"\"\"
    if db_id not in db_schemas: raise ValueError(f"DB ID '{db_id}' not found")
    schema_info = db_schemas[db_id]
    tables = schema_info['table_names_original']
    columns = schema_info['column_names_original']
    table_columns = {}
    for i, table in enumerate(tables):
        cols = []
        for tab_idx, col_name in columns:
            if tab_idx == i: cols.append(col_name)
        if cols:
             cols.sort()
             table_columns[table] = cols
    parts = []
    for table in sorted(table_columns.keys()):
        cols = table_columns[table]
        part = f"{table}({', '.join(cols)})"
        parts.append(part)
    return " ".join(parts)

def enhance_prompts_with_schema(data_df: pd.DataFrame, db_schemas: Dict[str, Any], schema_format: str = "sql") -> pd.DataFrame:
    \"\"\"Enhance input prompts with schema information.\"\"\"
    enhanced_rows = []
    skipped_count = 0
    total_count = len(data_df)
    print_interval = max(1, total_count // 10)
    print(f"Enhancing {total_count} prompts...")
    for index, example in data_df.iterrows():
        if index > 0 and index % print_interval == 0: print(f"  Processed {index}/{total_count} examples...")
        db_id = example['db_id']
        try:
            if schema_format == "compact":
                schema_str = get_compact_schema_string(db_id, db_schemas)
                input_text = f"translate English to SQL: {example['question']} | database: {db_id} | schema: {schema_str}"
            elif schema_format == "sql":
                schema_str = get_sql_schema_string(db_id, db_schemas) # Uses func with types
                input_text = f"translate English to SQL: {example['question']} | database: {db_id} | schema:\\n{schema_str}"
            elif schema_format == "both":
                compact_str = get_compact_schema_string(db_id, db_schemas)
                sql_str = get_sql_schema_string(db_id, db_schemas) # Uses func with types
                input_text = f"translate English to SQL: {example['question']} | database: {db_id} | schema: {compact_str}\\nDetailed schema:\\n{sql_str}"
            else: raise ValueError(f"Unknown schema format: {schema_format}")
            output_text = example['query']
            enhanced_rows.append({"input_text": input_text, "output_text": output_text})
        except Exception as e:
             print(f"Warning: Skipping example for db_id '{db_id}' due to error: {e}")
             skipped_count += 1
    print(f"  Processed {total_count}/{total_count} examples...")
    if skipped_count > 0: print(f"Skipped {skipped_count} examples.")
    else: print("Successfully enhanced all prompts.")
    if not enhanced_rows: print("Warning: No rows enhanced.")
    return pd.DataFrame(enhanced_rows)
    """
    try:
        with open('schema_utils.py', 'w', encoding='utf-8') as f:
            f.write(schema_utils_content.strip())
        print("Successfully wrote schema_utils.py")
    except Exception as e:
        print(f"Error writing schema_utils.py: {e}")
        raise

# --- Function to setup dataset ---
def setup_local_dataset_from_drive():
    """Copies essential Spider JSON files from Drive to local Colab storage."""
    drive_dataset_source_dir = f"{DRIVE_BASE_DIR}/datasets/spider"
    print(f"\n--- Setting up Dataset ---")
    print(f"Attempting to copy dataset JSON files from Google Drive path: {drive_dataset_source_dir}")
    drive_tables_path = f"{drive_dataset_source_dir}/tables.json"
    drive_dev_path = f"{drive_dataset_source_dir}/dev.json"
    dev_exists = os.path.exists(drive_dev_path)
    tables_exists = os.path.exists(drive_tables_path)
    if dev_exists and tables_exists:
        print("Required JSON dataset files found. Copying to local Colab storage...")
        try:
            os.makedirs(LOCAL_DATASET_DIR, exist_ok=True)
            !cp -v "{drive_dev_path}" "{LOCAL_DATASET_DIR}/"
            !cp -v "{drive_tables_path}" "{LOCAL_DATASET_DIR}/"
            print(f"Successfully copied JSON files to: {LOCAL_DATASET_DIR}")
        except Exception as e:
            print(f"CRITICAL Error: Failed to copy JSON files from Google Drive: {e}")
            raise
    else:
        print(f"CRITICAL Error: Essential dataset JSON files not found in Google Drive.")
        if not dev_exists: print(f"  Missing: {drive_dev_path}")
        if not tables_exists: print(f"  Missing: {drive_tables_path}")
        raise FileNotFoundError("Essential dataset JSON files not found in Google Drive.")

# --- Import schema utils and setup dataset ---
try:
    from schema_utils import load_tables_json, enhance_prompts_with_schema
    print("Successfully imported from schema_utils.py")
except ImportError as e:
     print(f"Error: Could not import from schema_utils.py: {e}")
     raise

# Setup dataset (copy dev.json and tables.json from Drive)
setup_local_dataset_from_drive()

# --- Main Evaluation Function ---
def evaluate_model():
    """Loads model, data, generates predictions, calculates BLEU, prepares files for Spider eval."""
    print(f"\n--- Starting Evaluation ---")
    print(f"Evaluating model: {MODEL_PATH}")
    print(f"Using schema format: {SCHEMA_FORMAT} (with Types)")

    start_time = time.time()

    # 1) Load database schemas from LOCAL storage
    tables_path = os.path.join(LOCAL_DATASET_DIR, "tables.json")
    try:
        db_schemas = load_tables_json('tables.json')
        print(f"Loaded schemas for {len(db_schemas)} databases from {tables_path}")
    except Exception as e:
        print(f"Failed to load schemas: {e}")
        raise

    # 2) Load dev data from LOCAL storage
    dev_path = os.path.join(LOCAL_DATASET_DIR, "dev.json")
    try:
        with open(dev_path, 'r', encoding='utf-8') as f:
            dev_spider_data = json.load(f)
    except Exception as e:
        print(f"Failed to load dev data: {e}")
        raise

    # Apply limit if specified
    original_dev_count = len(dev_spider_data)
    if LIMIT > 0 and LIMIT < original_dev_count:
        print(f"Limiting evaluation to first {LIMIT} examples (out of {original_dev_count})")
        dev_spider_data = dev_spider_data[:LIMIT]

    dev_df = pd.DataFrame(dev_spider_data)
    print(f"Loaded {len(dev_df)} development examples for evaluation.")

    # 3) Enhance prompts
    try:
        dev_t5_data = enhance_prompts_with_schema(dev_df, db_schemas, schema_format=SCHEMA_FORMAT)
        if dev_t5_data.empty and len(dev_df) > 0: raise ValueError("Prompt enhancement failed.")
        eval_dataset = Dataset.from_pandas(dev_t5_data)
    except Exception as e:
         print(f"Failed during prompt enhancement: {e}")
         raise

    if len(eval_dataset) == 0:
        print("\nWarning: Evaluation dataset is empty after processing.")
        return {"error": "Evaluation dataset empty", "bleu": None}
    print("\nSample prompt (with types):")
    print(eval_dataset[0]["input_text"][:600] + "...")

    # --- Ensure Tokenizer Files Exist ---
    print(f"\nChecking/Ensuring tokenizer files exist in: {MODEL_PATH}")
    tokenizer_config_path = os.path.join(MODEL_PATH, "tokenizer_config.json")
    if not os.path.exists(tokenizer_config_path):
          print(f"Tokenizer config not found in {MODEL_PATH}. Saving base t5 tokenizer there...")
          try:
              exp_name_only = os.path.basename(MODEL_PATH)
              exp_parts = exp_name_only.split('_')

              model_size_in_path = "large"
              found_size = False
              for part in exp_parts:
                  if part in ["small", "base", "large"]:
                      model_size_in_path = part
                      found_size = True
                      print(f"Determined model size '{model_size_in_path}' from experiment name.")
                      break

              if not found_size:
                  print(f"Warning: Could not reliably determine model size from experiment name '{exp_name_only}'. Assuming default '{model_size_in_path}'.")

              base_model_name = f"t5-{model_size_in_path}"

              print(f"Loading base tokenizer '{base_model_name}' to save...")
              base_tokenizer = AutoTokenizer.from_pretrained(base_model_name)
              # Saving tokenizer files to the main experiment directory
              base_tokenizer.save_pretrained(MODEL_PATH)
              print(f"Base tokenizer saved to {MODEL_PATH}.")
              # Add a small delay for Drive sync if saving just happened
              print("Pausing briefly for Drive sync...")
              time.sleep(10)
          except Exception as e:
              print(f"CRITICAL Error: Failed to load/save base tokenizer to {MODEL_PATH}: {e}")
              print("Cannot proceed without tokenizer files.")
              raise
    else:
          print("Tokenizer files presumed to exist.")

    # 4) Load Model & Tokenizer (Now should find tokenizer files)
    print(f"\nLoading model and tokenizer from: {MODEL_PATH}")
    if not os.path.isdir(MODEL_PATH): raise FileNotFoundError(f"Model directory not found: {MODEL_PATH}")
    try:
        tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
        model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_PATH)
        print("Model and tokenizer loaded successfully.")
    except OSError as e:
         # Specific check for missing model weights file error
         if "pytorch_model.bin" in str(e) or "model.safetensors" in str(e):
              print(f"\nCRITICAL Error: Model weight file (e.g., pytorch_model.bin or model.safetensors) not found in {MODEL_PATH}.")
              print("This likely means the training save was incomplete.")
              print("Please verify the contents of the directory in Google Drive.")
              print("If checkpoint folders exist, modify MODEL_PATH to point to the latest checkpoint.")
         else:
              print(f"Failed to load model/tokenizer from {MODEL_PATH}: {e}")
         raise
    except Exception as e:
         print(f"Failed to load model/tokenizer from {MODEL_PATH}: {e}")
         raise

    # Setup device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    try:
        model.to(device)
    except Exception as e:
         print(f"Warning: Failed to move model to {device}. Using CPU. Error: {e}")
         device = torch.device("cpu")
         model.to(device)
    model.eval()

    # 5) Generate Predictions
    print("\nGenerating predictions...")
    predictions = []
    references = []
    max_input_length = 1024 if SCHEMA_FORMAT == "sql" else 512
    generation_max_length = 256

    for example in tqdm(eval_dataset, desc="Generating SQL"):
        inputs = tokenizer(
            example["input_text"], return_tensors="pt", truncation=True,
            max_length=max_input_length, padding=False
        ).to(device)
        try:
            with torch.no_grad():
                output_ids = model.generate(
                    input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"],
                    max_length=generation_max_length, num_beams=NUM_BEAMS, early_stopping=True,
                )[0]
            pred_text = tokenizer.decode(output_ids, skip_special_tokens=True)
            predictions.append(pred_text)
            references.append(example["output_text"])
        except Exception as e:
             print(f"\nError during generation: {e}")
             predictions.append("GENERATION_ERROR")
             references.append(example["output_text"])

    # 6) Compute BLEU
    print("\nComputing BLEU score...")
    bleu_score = None
    try:
        if predictions and references:
             bleu_metric = evaluate.load("bleu")
             valid_preds = [p for p in predictions if p != "GENERATION_ERROR"]
             valid_refs = [[r] for i, r in enumerate(references) if predictions[i] != "GENERATION_ERROR"]
             if valid_preds and valid_refs:
                  results = bleu_metric.compute(predictions=valid_preds, references=valid_refs)
                  bleu_score = results['bleu']
                  print(f"Dev BLEU (on {len(valid_preds)} valid examples): {bleu_score:.4f}")
             else: print("No valid predictions/references for BLEU.")
        else: print("Predictions/references empty, skipping BLEU.")
    except Exception as e: print(f"Failed to compute BLEU: {e}")
    bleu_file_path = os.path.join(OUTPUT_DIR, "bleu_score.txt")
    try:
        with open(bleu_file_path, "w") as f: f.write(f"{bleu_score if bleu_score is not None else 'N/A'}")
        print(f"BLEU score saved to: {bleu_file_path}")
    except Exception as e: print(f"Error saving BLEU score: {e}")


    # 7) Save Predictions
    print("\nSaving predictions (JSON)...")
    predictions_list = []
    for i in range(len(predictions)):
        if i < len(dev_df):
             predictions_list.append({
                 "question": dev_df.iloc[i]["question"],
                 "gold_sql": references[i],
                 "pred_sql": predictions[i],
                 "db_id": dev_df.iloc[i]["db_id"]
             })
    predictions_file = os.path.join(OUTPUT_DIR, "predictions.json")
    try:
        with open(predictions_file, "w", encoding="utf-8") as f: json.dump(predictions_list, f, indent=2)
        print(f"Predictions saved to: {predictions_file}")
    except Exception as e: print(f"Error saving predictions JSON: {e}")


    # 8) Prepare Files for Spider Evaluation Format
    print("\nPreparing files for local Spider evaluation...")
    gold_file = os.path.join(OUTPUT_DIR, "gold_sql.txt")
    pred_file = os.path.join(OUTPUT_DIR, "pred_sql.txt")
    try:
        with open(gold_file, "w", encoding="utf-8") as gold_f, \
             open(pred_file, "w", encoding="utf-8") as pred_f:
            for item in predictions_list:
                clean_gold = str(item["gold_sql"]).replace('\t', ' ').replace('\n', ' ')
                clean_pred = str(item["pred_sql"]).replace('\t', ' ').replace('\n', ' ')
                gold_f.write(clean_gold + "\t" + item["db_id"] + "\n")
                pred_f.write(clean_pred + "\t" + item["db_id"] + "\n")
        print(f"Spider evaluation files prepared in {OUTPUT_DIR}:\n  - {os.path.basename(gold_file)}\n  - {os.path.basename(pred_file)}")
    except Exception as e:
        print(f"Error writing Spider format files: {e}")


    # 9) Run Spider evaluation
    print(f"\n-> Skipping automatic Spider evaluation step in Colab.")
    print(f"-> To get EX/EM scores, run evaluation.py locally using the files generated in:")
    print(f"   {OUTPUT_DIR}")
    print(f"   Required files: {os.path.basename(gold_file)}, {os.path.basename(pred_file)}")
    print(f"   You will also need locally: evaluation.py, tables.json, database directory")


    elapsed_time = time.time() - start_time
    minutes, seconds = divmod(elapsed_time, 60)
    print(f"\n--- Colab evaluation script finished in {int(minutes)}m {int(seconds)}s ---")

    return {
        "model_path": MODEL_PATH,
        "schema_format": SCHEMA_FORMAT,
        "bleu": bleu_score,
        "predictions_file": predictions_file,
        "gold_file": gold_file,
        "pred_file": pred_file
    }

# --- Main Execution ---
if __name__ == "__main__":
    try:
        results = evaluate_model()
        print("\nColab Script Results Summary:")
        if results:
             print(json.dumps(results, indent=2))
        else:
             print("Evaluation function did not return results.")

    except Exception as e:
         print(f"\n--- An error occurred during evaluation: {e} ---")
         import traceback
         traceback.print_exc()

