In [3]:
# -*- coding: utf-8 -*-

# This script requires the Looker SDK, Google Cloud Secret Manager,
# and the Vertex AI SDK to be installed.
# Please install them using pip if you haven't already:
# !pip install looker-sdk google-cloud-secret-manager google-cloud-aiplatform --upgrade --user

from google.cloud import secretmanager
import os
import looker_sdk
# Removed problematic WriteQuery/models imports. Will construct query body as dict.
import configparser
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models
import sys # For potential error handling exits
import json # For potentially saving results
import time # For potential delays between API calls
import collections # For defaultdict
import typing # For type hinting if needed
import textwrap # For formatting comments
import re # For parsing recommendations

# --- Configuration ---

# Google Cloud Project and Secret Manager details for Looker credentials
# !!! REPLACE WITH YOUR ACTUAL VALUES !!!
looker_project_id = "joey-looker" # Project where the secret is stored
looker_secret_name = "looker_ini"     # Name of the secret in Secret Manager
looker_secret_version = "latest"                   # Use 'latest' or a specific version number

# Vertex AI Configuration
# !!! REPLACE WITH YOUR ACTUAL VALUES !!!
vertex_project_id = "joey-looker" # Project where Vertex AI is enabled
vertex_location = "us-central1"                   # Choose a location supporting the model (e.g., "us-central1")
# Choose the Gemini model (check Vertex AI documentation for available models)
gemini_model_name = "gemini-2.5-pro-exp-03-25"        # Example: Use a recent, capable model like 1.5 Flash or Pro

# Optional: Delay between Gemini API calls in seconds (to help avoid rate limits)
API_CALL_DELAY = 1 # Set to 0 for no delay

# --- History Data Configuration ---
# History data is now fetched via API using parameters below
TOP_N_FIELDS = 15 # Number of top fields to report per explore based on usage
AGENT_INSTRUCTION_TOP_FIELDS = 5 # Number of top fields to include in agent instructions

# Define weights for different query sources
# Higher weight for interactive sources, lower for passive/automated
SOURCE_WEIGHTS = {
    # Higher weight for interactive sources
    "explore": 3.0,
    "drill_modal": 3.0, # Renamed from drill_down
    "suggest": 2.0,    # Suggestions imply interaction
    "merge_query": 2.0,# Merged queries are actively created
    "api": 1.5,        # API calls can be varied, moderate weight
    "sql_runner": 3.0, # Direct user interaction

    # Lower weight for automated/passive sources
    "dashboard": 1.0,
    "look": 1.0,
    "scheduled_task": 0.5, # Low weight for scheduled tasks
    "cache": 0.0,      # Ignore cache hits entirely
    "embed": 1.0,      # Embed usage might be similar to dashboard
    # Add other potential sources if needed
}
DEFAULT_WEIGHT = 0.5 # Default weight for sources not explicitly listed

# --- Target Explores Configuration ---
# Define specific explores to analyze (format: ["model_name/explore_name", ...])
# If empty (TARGET_EXPLORES = []), the script will analyze ALL non-hidden explores.
# Example: TARGET_EXPLORES = ["my_model/orders", "my_other_model/users"]
TARGET_EXPLORES = ["basic_ecomm/basic_order_items"]

# --- CA LookML Naming Convention ---
CA_SUFFIX = "_ca" # Suffix to add to extended views and explores for CA


# --- Secret Manager Setup ---
def get_looker_config_from_secret_manager(project_id, secret_name, version):
    """Fetches Looker config from Secret Manager."""
    print(f"Attempting to fetch secret '{secret_name}' version '{version}' from project '{project_id}'...")
    try:
        client = secretmanager.SecretManagerServiceClient()
        secret_id = f"projects/{project_id}/secrets/{secret_name}/versions/{version}"
        response = client.access_secret_version(request={"name": secret_id})
        secret_string = response.payload.data.decode("UTF-8")
        print(f"Successfully fetched secret '{secret_name}'.")

        config = configparser.ConfigParser()
        config.read_string(secret_string)

        # Validate required keys exist in the config
        if 'Looker' not in config or not all(k in config['Looker'] for k in ['base_url', 'client_id', 'client_secret']):
             missing_keys = {'base_url', 'client_id', 'client_secret'} - set(config.get('Looker', {}).keys())
             raise ValueError(f"Missing required keys {missing_keys} in [Looker] section of the secret '{secret_name}'.")

        print("Successfully parsed [Looker] section from secret.")
        return config['Looker']
    except Exception as e:
        print(f"ERROR: Failed to fetch or parse secrets from Secret Manager: {e}")
        print("Troubleshooting steps:")
        print(f"  1. Verify the secret '{secret_name}' exists in project '{project_id}'.")
        print("  2. Ensure the secret is formatted as INI with a [Looker] section containing 'base_url', 'client_id', 'client_secret'.")
        print("  3. Check that the service account/user running this script has the 'Secret Manager Secret Accessor' IAM role.")
        print(f"  4. Confirm '{project_id}' is the correct Google Cloud project ID.")
        sys.exit(1) # Exit if we can't get credentials

# --- Looker SDK Setup ---
def initialize_looker_sdk(config):
    """Initializes the Looker SDK using config."""
    print("Initializing Looker SDK...")
    try:
        # Set environment variables for the SDK from the fetched config
        os.environ["LOOKERSDK_BASE_URL"] = config['base_url']
        os.environ["LOOKERSDK_CLIENT_ID"] = config['client_id']
        os.environ["LOOKERSDK_CLIENT_SECRET"] = config['client_secret']
        # Optional: Add environment variables for SSL verification or timeout if needed
        # Default to verifying SSL unless explicitly set to 'false' in the secret
        os.environ["LOOKERSDK_VERIFY_SSL"] = config.get('verify_ssl', "true").lower()
        # Default to a 120-second timeout if not specified in the secret
        os.environ["LOOKERSDK_TIMEOUT"] = config.get('timeout', "120")

        # Use init40() for Looker API version 4.0
        sdk = looker_sdk.init40()

        # Test connection by fetching the current user (requires 'see_users' permission)
        user = sdk.me(fields="id,display_name") # Request specific fields
        print(f"Looker SDK initialized successfully. Connected as user: {user.display_name} (ID: {user.id})")
        return sdk
    except Exception as e:
        print(f"ERROR: Failed to initialize Looker SDK or verify connection: {e}")
        print("Troubleshooting steps:")
        print("  1. Verify 'base_url', 'client_id', and 'client_secret' in your secret are correct.")
        print("  2. Ensure the Looker API user associated with the credentials exists and is enabled.")
        print("  3. Check that the Looker API user has sufficient permissions (e.g., 'see_lookml_dashboards', 'see_user_dashboards', 'explore', 'access_data', 'see_looks', 'see_users').")
        print(f"  4. Confirm the Looker instance is reachable at: {config.get('base_url', 'URL NOT FOUND IN CONFIG')}")
        print(f"  5. Check LOOKERSDK_VERIFY_SSL setting (currently: {os.environ.get('LOOKERSDK_VERIFY_SSL', 'Not Set')}). Set to 'false' in secret if using self-signed certs (use with caution).")
        sys.exit(1) # Exit if SDK initialization fails


# --- Vertex AI Setup ---
def initialize_vertex_ai(project_id, location):
    """Initializes the Vertex AI environment."""
    print(f"Initializing Vertex AI for project '{project_id}' in location '{location}'...")
    try:
        vertexai.init(project=project_id, location=location)
        print(f"Vertex AI initialized successfully.")
    except Exception as e:
        print(f"ERROR: Failed to initialize Vertex AI: {e}")
        print("Troubleshooting steps:")
        print(f"  1. Ensure the Vertex AI API is enabled for project '{project_id}'.")
        print("  2. Verify your environment is authenticated to Google Cloud (e.g., run 'gcloud auth application-default login' locally, or check service account permissions on Cloud resources).")
        print("  3. Confirm the authenticated user/service account has necessary Vertex AI permissions (e.g., 'Vertex AI User' role).")
        print(f"  4. Check if the location '{location}' is valid and supports the intended Gemini model.")
        sys.exit(1)

# --- History Data Processing ---
# Removed problematic type hint looker_sdk.SDKClient for 'sdk' parameter
def fetch_and_process_history(sdk, weights: dict, default_weight: float):
    """Fetches Looker history data via API and processes it to calculate weighted field usage."""
    print("Fetching and processing history data via Looker API...")

    # Define the parameters for the inline query based on the provided URL
    model_name = "system__activity"
    explore_name = "history" # Use explore name as 'view' for run_inline_query
    fields = [
        "query.view",             # Explore name used in the query
        "history.query_run_count",# Number of times the query ran
        "query.model",            # Model name used
        "query.fields",           # List of fields used (view_name.field_name)
        "history.source",         # Source of the query (dashboard, explore, etc.)
        "user.count"              # *** ADDED user.count field ***
    ]
    filters = {
        "history.created_date": "90 days",       # Filter for the last 90 days
        "query.model": "-NULL,-system__activity" # Exclude NULL models and system__activity itself
        # Assuming empty matches_filter in URL means no filter on source and view
    }
    sorts = ["history.query_run_count desc"] # Sort by run count descending
    limit = "5000" # API expects limit as string, match URL

    # Construct the query body as a dictionary instead of WriteQuery object
    query_body_dict = {
        "model": model_name,
        "view": explore_name,
        "fields": fields,
        "filters": filters,
        "sorts": sorts,
        "limit": limit
        # Add other WriteQuery parameters here if needed, e.g., "pivots": None
    }
    # print(f"Constructed query body dictionary for run_inline_query: {query_body_dict}") # Less verbose


    # Execute the inline query
    try:
        print(f"Running inline query on {model_name}/{explore_name}...")
        # Pass the dictionary directly as the body parameter
        response_json_str = sdk.run_inline_query(result_format="json", body=query_body_dict)
        history_data = json.loads(response_json_str)
        print(f"Successfully fetched {len(history_data)} history records via API.")
    except looker_sdk.error.SDKError as e:
        print(f"ERROR: Looker API error fetching history data: {e}")
        # Attempt to print more details from the SDKError if available
        if hasattr(e, 'message'): print(f"       Message: {e.message}")
        if hasattr(e, 'errors'): print(f"       Errors: {e.errors}")
        print("Please ensure the API user has permissions to query system__activity.")
        return None # Return None to indicate failure
    except json.JSONDecodeError as e:
        print(f"ERROR: Failed to decode JSON response from Looker API history query: {e}")
        print(f"Response received (first 500 chars): {response_json_str[:500]}...") # Log partial response
        return None
    except Exception as e:
        print(f"ERROR: Unexpected error fetching history data via API: {e}")
        # import traceback
        # print(traceback.format_exc()) # Uncomment for full traceback during debugging
        return None

    # Process the fetched data
    # Use nested defaultdicts: model -> explore -> field -> score
    usage_scores = collections.defaultdict(lambda: collections.defaultdict(lambda: collections.defaultdict(float)))
    processed_records = 0
    skipped_records_parse_error = 0
    skipped_records_no_list = 0
    skipped_records_zero_counts = 0

    if not isinstance(history_data, list):
        print(f"ERROR: Expected a list of records from history API, but received type {type(history_data)}.")
        print(f"       Data received: {str(history_data)[:500]}...")
        return None # Cannot process non-list data

    for record in history_data:
        processed_records += 1
        # Use keys matching the API response field names
        model = record.get("query.model")
        explore = record.get("query.view") # Explore name where the query ran
        fields_str = record.get("query.fields") # Get the field value (likely a string or list)
        source = record.get("history.source")
        run_count = record.get("history.query_run_count", 0)
        user_count = record.get("user.count", 0) # *** Get user_count ***

        # Skip if essential info is missing or counts are 0
        if not model or not explore or run_count == 0 or user_count == 0:
            skipped_records_zero_counts += 1
            continue

        fields_list = None
        # --- MODIFICATION START: Parse fields_str ---
        # Check if fields_str is a non-empty string before trying to parse
        if isinstance(fields_str, str) and fields_str.strip() and fields_str.lower() != 'null':
            try:
                # Attempt to parse the JSON string into a list
                parsed_data_field = json.loads(fields_str) # Use different var name
                # Ensure the parsed data is actually a list
                if isinstance(parsed_data_field, list):
                    fields_list = parsed_data_field
                else:
                     # Log if parsing succeeded but didn't yield a list
                     # print(f"  DEBUG WARNING: Parsed 'query.fields' but result is not a list (type: {type(parsed_data_field)}). Record: {record}")
                     skipped_records_no_list += 1
                     continue # Skip if not a list after parsing
            except json.JSONDecodeError:
                # Log if the string is not valid JSON
                # print(f"  DEBUG WARNING: Failed to JSON decode 'query.fields' string: '{fields_str}'. Record: {record}")
                skipped_records_parse_error += 1
                continue # Skip if JSON parsing fails
        elif isinstance(fields_str, list):
             # Handle case where API *does* return a list directly (more robust)
             fields_list = fields_str
        else:
            # Handle cases where fields_str is None, empty string, the literal string 'null', or not a string/list
            skipped_records_no_list += 1
            continue # Skip if not a parseable string or list
        # --- MODIFICATION END ---

        # Proceed only if fields_list was successfully created as a non-empty list
        if not fields_list: # This check covers empty lists as well
             skipped_records_no_list += 1 # Count empty lists as skipped
             continue

        # Get weight for the source
        weight = weights.get(source, default_weight) if source else default_weight

        # Calculate weighted score incorporating user_count
        # Score = Total Runs * Unique Users * Source Weight
        query_score_per_field = float(run_count) * float(user_count) * weight

        # Add score to each field mentioned in this query record
        for field in fields_list:
            if isinstance(field, str) and field: # Ensure field is a non-empty string
                # Aggregate score: usage_scores[model_name][explore_name][field_name]
                usage_scores[model][explore][field] += query_score_per_field

    print(f"Finished processing history data:")
    print(f"  - Total records processed from API: {processed_records}")
    print(f"  - Records skipped (zero run/user count): {skipped_records_zero_counts}")
    print(f"  - Records skipped (field JSON parse error): {skipped_records_parse_error}")
    print(f"  - Records skipped (no fields/not list/empty): {skipped_records_no_list}")

    # Check if any scores were actually calculated
    if not usage_scores:
        print("WARNING: No field usage scores were calculated. Check history data and processing logic.")

    return usage_scores


# --- Prompt Engineering ---
def generate_gemini_prompt(model_name, explore_name, explore_lookml_json):
    """Generates a prompt for Gemini to analyze LookML explore definition for CA readiness."""

    # Note: Passing the explore_lookml definition as a JSON string.
    prompt = f"""
You are an expert LookML developer optimizing Looker Explores specifically for Looker's Conversational Analytics feature (Gemini in Looker). This feature translates natural language questions into Looker API queries based on LookML metadata (fields, labels, descriptions) and data values. Your goal is to evaluate the provided Explore definition for CA readiness and suggest actionable improvements based on CA best practices.

**Analyze the following LookML Explore definition:**

* **Model:** `{model_name}`
* **Explore:** `{explore_name}`

**Explore Definition (JSON representation from Looker SDK):**
```json
{explore_lookml_json}
```

**Analysis Task:**

Evaluate the readiness of this Explore for Conversational Analytics, focusing on common pitfalls and best practices:

1.  **Clarity for Natural Language:**
    * **Labels:** Are field `label` values clear, concise, business-friendly, and unambiguous? Do they reflect terms users would naturally use? Suggest improved labels where needed.
    * **Descriptions:** Are `description` attributes thorough and helpful? **Descriptions are CRITICAL for CA.** They should define the field, provide business context, list synonyms or common terms users might use for this field, and explain calculations if applicable. Identify fields lacking good descriptions or needing more detail/synonyms for CA.
    * **Naming Conflicts:** Are there fields with similar names or labels that could cause ambiguity for CA when mapping user questions? Suggest specific ways to resolve ambiguity (e.g., better labels/descriptions, hiding one field).

2.  **Field Curation:**
    * **Field Bloat:** Does the explore expose too many fields? Are technical fields (Primary Keys, Foreign Keys, intermediate calculations) potentially visible and not hidden?
    * **Relevance:** Are the exposed fields relevant for typical conversational questions users might ask of this data? Suggest hiding fields that are irrelevant for conversational use (`hidden: yes`).

3.  **Structure & Simplicity:**
    * **Joins:** Are the joins relatively simple and logical? Highly complex joins can hinder CA performance. Note if joins seem overly complex.
    * **Group Labels:** Are fields logically grouped using `group_label` or `group_item_label` for better organization in the UI, which aids discoverability? Suggest adding group labels where appropriate.

4.  **Data Representation:**
    * **Data Types:** Are field types (`type`) appropriate for the data (e.g., number, date_*, string, yesno)?
    * **Persistent Logic:** Is essential business logic (commonly found in dashboard table calculations or custom fields) defined persistently within LookML dimensions or measures so CA can access it? Identify potential opportunities to convert such logic.

**Output Requirements:**

Provide your analysis in the following structured format using these exact Markdown headers:

### GRADE
Assign an overall readiness grade from 0 (Not Ready) to 100 (Excellent). **Output only the integer number.**

### RATIONALE
Briefly explain the reasoning behind the grade, highlighting key strengths and weaknesses based on the CA best practices above (especially description quality, labeling, field curation, and clarity).

### RECOMMENDATIONS
Provide a numbered list of specific, actionable recommendations focused on improving this explore *for Conversational Analytics*. Prioritize high-impact changes. Be specific about *which* view/field needs changing and *what* change is needed (e.g., "Add `description: \"Detailed explanation including synonyms...\"` to `view.field`.", "Add `label: \"User Friendly Name\"` to `view.field`.", "Recommend adding `hidden: yes` to `view.primary_key` in the base view.", "Add `group_label: \"Group Name\"` to related fields like `view.field1`, `view.field2`.").

### GENERATED LOOKML SUGGESTIONS
Provide 1-2 concise LookML code snippets demonstrating *how* to implement a key recommendation, focusing on adding descriptions or labels. Example: `dimension: my_field {{ description: "Detailed explanation..." }}`. If no specific code changes are easily suggested, state "No specific LookML snippets suggested based on the provided definition."

"""
    return prompt

# --- Agent Instruction Generation ---
def generate_agent_instructions(top_used_fields: list, recommendations: list) -> list:
    """Generates suggested agent instructions based on analysis results."""
    instructions = []

    # 1. Instruction for most important fields
    if top_used_fields:
        # Get the names of the top N fields (N defined by AGENT_INSTRUCTION_TOP_FIELDS)
        top_field_names = [field_data[0] for field_data in top_used_fields[:AGENT_INSTRUCTION_TOP_FIELDS]]
        if top_field_names:
            instruction = f"The most important fields in this data source are likely: {', '.join(top_field_names)}."
            instructions.append(instruction)

    # 2. (Future/Optional) Instruction for fields to avoid
    #    Requires more sophisticated parsing of recommendations text.

    # 3. (Future/Optional) Instruction for default date field

    # 4. (Future/Optional) Instruction for field disambiguation

    if not instructions:
        instructions.append("No specific agent instructions generated automatically. Review recommendations and top fields manually.")

    return instructions

# --- LookML Extension File Generation ---
def generate_synonyms(field_part: str, label: str) -> list:
    """Generates potential synonyms based on field name and label."""
    words = set()
    # Split field name by underscore
    for word in field_part.split('_'):
        if len(word) > 2 and word not in ['id', 'pk', 'fk', 'key', 'date', 'time', 'ts', 'at', 'count', 'sum', 'avg', 'min', 'max', 'p50', 'p90', 'p99']: # Basic stop words
            words.add(word.lower())
    # Split label by space
    if label:
        for word in label.split(' '):
            cleaned_word = re.sub(r'[^\w]', '', word) # Remove punctuation
            if len(cleaned_word) > 2:
                words.add(cleaned_word.lower())

    # Basic pluralization/singularization (very naive)
    synonyms = set(words)
    # for word in words:
    #     if word.endswith('s'):
    #         synonyms.add(word[:-1])
    #     else:
    #         synonyms.add(word + 's')

    # Return unique, sorted list
    return sorted(list(synonyms))


def generate_ca_lookml_file_content(explore_key: str, parsed_data: dict, explore_definition_str: str) -> str:
    """Generates the content for a LookML file containing CA-specific extended views and explore."""
    model_name, explore_name = explore_key.split('/', 1)
    lines = []
    indent = "  " # Standard LookML indentation
    ca_explore_name = explore_name + CA_SUFFIX
    view_name_map = {} # Maps original view name to CA extended view name

    # --- Helper to safely parse explore definition ---
    explore_def = {}
    explore_fields_details = {} # Store details like primary_key, description, label
    if explore_definition_str and not explore_definition_str.startswith("# Error"):
        try:
            explore_def = json.loads(explore_definition_str)
            # Extract field details for checking primary_key status and getting original desc/label
            if 'fields' in explore_def and isinstance(explore_def['fields'], dict):
                 for field_type in ['dimensions', 'measures', 'dimension_groups']:
                     if field_type in explore_def['fields'] and isinstance(explore_def['fields'][field_type], list):
                         for field_detail in explore_def['fields'][field_type]:
                             if isinstance(field_detail, dict) and 'name' in field_detail:
                                 # Store details using the full field name (view.field) as key
                                 explore_fields_details[field_detail['name']] = {
                                     'primary_key': field_detail.get('primary_key', False),
                                     'description': field_detail.get('description'),
                                     'label': field_detail.get('label'),
                                     'group_label': field_detail.get('group_label'),
                                     'type': field_detail.get('type') # Store type for dim/measure guess
                                 }

        except json.JSONDecodeError:
            print(f"  WARNING: Could not parse explore definition JSON for {explore_key} when generating CA LookML file.")
            explore_def = {} # Use empty dict if parsing fails

    # --- Extract view names ---
    involved_views = set()
    base_view = explore_def.get('view_name')
    if base_view:
        involved_views.add(base_view)
        view_name_map[base_view] = base_view + CA_SUFFIX # Map base view
    joins = explore_def.get('joins', [])
    original_joins_structure = [] # Store original join details
    if isinstance(joins, list):
        for join in joins:
            # 'name' usually refers to the view being joined
            view_from_join = join.get('name')
            if view_from_join and isinstance(view_from_join, str):
                involved_views.add(view_from_join)
                view_name_map[view_from_join] = view_from_join + CA_SUFFIX # Map joined view
                original_joins_structure.append(join) # Keep original join details

    # --- Prepare context mapping (recommendations/top fields per view/field) ---
    view_context = collections.defaultdict(lambda: {'top_fields': [], 'recommendations': collections.defaultdict(list), 'view_recs': []})
    # Simple pattern: look for view_name.field_name
    # Make pattern more specific to avoid matching things like "schema.table"
    field_pattern = re.compile(r"\b([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)\b")

    # Map top fields
    top_used_fields = parsed_data.get('top_used_fields', [])
    for field_name, score in top_used_fields:
         match = field_pattern.match(field_name)
         if match:
             view_name, field_part = match.groups()
             # Ensure the view is tracked, even if not in joins (e.g., from base view)
             if view_name not in view_name_map: # If view only appeared in top fields
                 involved_views.add(view_name)
                 view_name_map[view_name] = view_name + CA_SUFFIX
             view_context[view_name]['top_fields'].append({'name': field_name, 'field_part': field_part, 'score': score})
         # else: field doesn't match view.field pattern, ignore for view mapping

    # Map recommendations (best effort)
    recommendations = parsed_data.get('recommendations', [])
    explore_recommendations = []
    if recommendations: # Check if recommendations exist
        for rec in recommendations:
            matches = field_pattern.findall(rec)
            mapped_to_field = False
            if matches:
                # Try mapping to all fields mentioned
                for view_name, field_part in matches:
                    if view_name in involved_views: # Only map if the view is relevant to this explore
                        view_context[view_name]['recommendations'][field_part].append(rec)
                        mapped_to_field = True
            # If not mapped to a specific field, check if it mentions a view
            if not mapped_to_field:
                found_view = False
                for view_name in involved_views:
                     # Use word boundary to avoid partial matches (e.g., 'user' in 'user_id')
                     if re.search(rf'\b{re.escape(view_name)}\b', rec):
                         view_context[view_name]['view_recs'].append(rec)
                         found_view = True
                         # Don't break, recommendation might apply to multiple views
                if not found_view:
                     explore_recommendations.append(rec) # Keep as explore-level rec


    # --- Generate File Content ---
    # Header Comments & Connection/Include Placeholders
    lines.append(f"# LookML File for CA-Optimized Explore: {explore_key}")
    lines.append(f"# Generated by Conversational Readiness Analyzer Script on {time.strftime('%Y-%m-%d %H:%M:%S')}")
    lines.append("#")
    lines.append("# Purpose: This file defines extended Views and a new Explore based on")
    lines.append(f"#          '{explore_name}', curated for Conversational Analytics (CA).")
    lines.append("#")
    lines.append("# Instructions:")
    lines.append("# 1. Save this file in your LookML project (e.g., 'ca_extensions/{explore_name}_ca.explore.lkml').")
    lines.append("# 2. Replace 'CONNECTION_NAME_PLACEHOLDER' with your actual connection name.")
    lines.append("# 3. Verify the 'include:' paths point correctly to your original view files.")
    lines.append("# 4. Include this file in your model file (e.g., include: \"/ca_extensions/*.explore.lkml\")")
    lines.append("# 5. Review and refine the auto-generated labels and descriptions below.")
    lines.append("# 6. IMPORTANT: Add 'fields_hidden_by_default: yes' to the *original base view files*")
    lines.append("#    referenced via 'extends' to ensure only explicitly unhidden fields are exposed in the CA explore.")
    lines.append("#")
    lines.append("connection: \"CONNECTION_NAME_PLACEHOLDER\"")
    # Include original views needed for extension
    include_paths = set()
    for view_name in involved_views:
        # Attempt a generic path - USER MUST VERIFY THIS
        include_paths.add(f"/views/{view_name}.view.lkml") # Common pattern, but needs verification
    for path in sorted(list(include_paths)):
         lines.append(f"include: \"{path}\" # Verify this path is correct")

    lines.append("\n")
    lines.append("# --- Analysis Summary (For Context) ---")
    lines.append(f"# Grade: {parsed_data.get('grade', 'N/A')}/100")
    lines.append("# Rationale:")
    rationale = parsed_data.get('rationale', 'N/A')
    rationale_lines = textwrap.wrap(rationale if rationale else 'N/A', width=80, initial_indent="#   ", subsequent_indent="#   ")
    lines.extend(rationale_lines)
    lines.append("# Top Used Fields (Weighted Score):")
    if top_used_fields:
        for field, score in top_used_fields:
            lines.append(f"#   - {field} ({score})")
    else:
        lines.append("#   N/A")
    lines.append("# ------------------------------------")
    lines.append("\n")

    # Extended View Blocks
    if not involved_views:
         lines.append("# WARNING: Could not identify views associated with this explore from the definition.")
    else:
        lines.append("# --- Extended Views for CA ---")
        lines.append("# These views extend the original views and contain CA-specific refinements.")
        lines.append("# Add 'fields_hidden_by_default: yes' to original views for best practice.")
        lines.append("")

        processed_fields_in_view = collections.defaultdict(set)

        for view_name in sorted(list(involved_views)):
            ca_view_name = view_name_map.get(view_name, view_name + CA_SUFFIX) # Get CA view name
            lines.append(f"view: {ca_view_name} extends: [{view_name}] {{")
            lines.append(f"{indent}# IMPORTANT: Consider adding 'fields_hidden_by_default: yes' to the original '{view_name}.view.lkml' file.")
            lines.append("")
            context = view_context[view_name]
            has_content = False # Track if view refinement has actionable items

            # Add view-level recommendations (concise)
            if context['view_recs']:
                has_content = True
                lines.append(indent + "# General Recommendations for this view:")
                for i, rec in enumerate(context['view_recs']):
                     # Keep recommendations concise here
                     lines.append(f"{indent}# - {rec[:100]}{'...' if len(rec)>100 else ''}") # Truncate long recs
                lines.append("") # Blank line after view recs

            # Combine top fields and other recommended fields for processing
            fields_to_process = {}
            for field_info in context['top_fields']:
                fields_to_process[field_info['field_part']] = field_info
            for field_part in context['recommendations']:
                 if field_part not in fields_to_process:
                     # Find original field name if possible
                     field_name = f"{view_name}.{field_part}"
                     fields_to_process[field_part] = {'name': field_name, 'field_part': field_part, 'score': None} # Score is None if not a top field

            if fields_to_process:
                has_content = True
                lines.append(indent + "# Field Refinements (Focus on Top Fields & Recommendations):")
                # Process fields, prioritizing top fields shown first
                sorted_fields_to_process = sorted(
                    fields_to_process.values(),
                    key=lambda x: x['score'] if x['score'] is not None else -1, # Sort top fields first
                    reverse=True
                )

                for field_info in sorted_fields_to_process:
                    field_part = field_info['field_part']
                    field_name = field_info['name'] # Full name like view.field
                    score = field_info['score']
                    original_details = explore_fields_details.get(field_name, {})
                    is_pk = original_details.get('primary_key', False)
                    original_label = original_details.get('label')
                    original_description = original_details.get('description')
                    original_group_label = original_details.get('group_label')
                    field_type_guess = "measure" if any(x in field_part for x in ['count', 'sum', 'average', 'total', 'min', 'max', 'percent', 'ratio', '_revenue', '_amount']) else "dimension"

                    lines.append(f"{indent}# --- Field: {field_name} {'(Score: ' + str(score) + ')' if score is not None else ''} ---")

                    # Add concise comment if specific recommendations exist
                    field_recs = context['recommendations'].get(field_part, [])
                    if field_recs:
                        lines.append(f"{indent}# ACTION: Review Gemini recommendations for this field (see full list below view).")

                    lines.append(f"{indent}{field_type_guess}: {field_part} {{")
                    if is_pk:
                         lines.append(f"{indent}{indent}# This is a primary key. Should likely remain hidden in the base view.")
                         lines.append(f"{indent}{indent}hidden: yes # Ensure hidden in base or uncomment to explicitly hide here.")
                    else:
                         lines.append(f"{indent}{indent}hidden: no # Explicitly unhide for CA")
                         # Attempt to auto-populate label
                         default_label = field_part.replace('_',' ').title()
                         suggested_label = original_label if original_label else default_label
                         lines.append(f"{indent}{indent}# Verify/Refine user-friendly label.")
                         lines.append(f"{indent}{indent}label: \"{suggested_label}\"")
                         # Attempt to auto-populate description with synonyms
                         description_parts = []
                         if original_description:
                             description_parts.append(original_description)
                         # Generate synonyms
                         synonyms = generate_synonyms(field_part, suggested_label)
                         if synonyms:
                             description_parts.append(f"(Synonyms: {', '.join(synonyms)})")

                         suggested_description = " ".join(description_parts).strip()
                         if not suggested_description: # Handle empty case
                             suggested_description = f"Description for {field_name}"

                         lines.append(f"{indent}{indent}# Verify/Refine detailed description for CA context, synonyms & calculations.")
                         lines.append(f"{indent}{indent}description: \"{suggested_description}\"")
                         # Group Label
                         lines.append(f"{indent}{indent}# Assign group_label if applicable for better UI grouping")
                         lines.append(f"{indent}{indent}group_label: \"{original_group_label if original_group_label else ''}\"")

                    lines.append(f"{indent}}}")
                    lines.append("") # Blank line after field block

            if not has_content:
                 lines.append(indent + "# No specific top fields or recommendations identified for this view.")
                 lines.append(indent + "# Add manual refinements if needed.")

            # List full recommendations for this view at the end for reference
            if any(context['recommendations'].values()):
                 lines.append(indent + "# Full Recommendations Mentioning Fields in this View:")
                 for field_part, recs in context['recommendations'].items():
                     for i, rec in enumerate(recs):
                          rec_lines = textwrap.wrap(f"{view_name}.{field_part}: {rec}", width=70, initial_indent=f"{indent}# ", subsequent_indent=f"{indent}#   ")
                          lines.extend(rec_lines)
                 lines.append("")


            lines.append("}}") # Close view block
            lines.append("\n")

    # New Explore Definition using Extended Views
    lines.append("\n" + "-"*30 + "\n")
    lines.append("# --- CA-Optimized Explore Definition ---")
    lines.append(f"explore: {ca_explore_name} {{")
    lines.append(f"{indent}# This explore uses the extended *_ca views defined above.")
    lines.append("")
    lines.append(indent + "# Add a user-friendly label for conversational use.")
    lines.append(indent + f"label: \"{explore_def.get('label', explore_name)} (Conversational)\" # Example label")
    lines.append(indent + "# Add a clear description explaining the explore's purpose for CA.")
    lines.append(indent + f"description: \"{explore_def.get('description', '')} (Optimized for Conversational Analytics)\"") # Seed with original desc
    lines.append("")

    # Define the 'from' using the extended base view
    ca_base_view = view_name_map.get(base_view)
    if ca_base_view:
        lines.append(f"{indent}from: {ca_base_view}")
    else:
        lines.append(f"{indent}# ERROR: Could not determine base view for explore '{explore_name}'.")

    # Recreate joins using extended view names
    if original_joins_structure:
        lines.append("")
        lines.append(f"{indent}# Joins using *_ca extended views") # Changed comment style
        for join in original_joins_structure:
            original_join_view = join.get('name')
            ca_join_view = view_name_map.get(original_join_view)
            # Determine the 'from' view for the join - defaults to base view if not specified in join def
            original_from_view = join.get('from', base_view)
            ca_from_view = view_name_map.get(original_from_view)

            if ca_join_view and ca_from_view: # Only add join if both views could be mapped
                lines.append(f"{indent}join: {ca_join_view} {{")
                # Copy essential join parameters
                if join.get('type'): lines.append(f"{indent}{indent}type: {join['type']}")
                if join.get('relationship'): lines.append(f"{indent}{indent}relationship: {join['relationship']}")
                if join.get('sql_on'): lines.append(f"{indent}{indent}sql_on: {join['sql_on']} ;;")
                # Reference the extended 'from' view
                lines.append(f"{indent}{indent}from: {ca_from_view}")
                # Copy view_label if it existed
                if join.get('view_label'): lines.append(f"{indent}{indent}view_label: \"{join['view_label']}\"")
                lines.append(f"{indent}}}")
            else:
                lines.append(f"{indent}# WARNING: Could not map original join for '{original_join_view}'. Original join details:")
                lines.append(f"{indent}# {json.dumps(join)}")


    lines.append("}}") # Close explore block
    lines.append("\n")


    return "\n".join(lines)


# --- Looker Interaction ---
def get_all_explores(sdk):
    """Fetches all non-hidden explores from the Looker instance."""
    all_explores_list = []
    print("Fetching LookML models and their explores from Looker...")
    try:
        # Request fields including explores and ensure we only get models with explores
        # Also fetch explore label and description if available
        models = sdk.all_lookml_models(fields="name,project_name,explores(name,hidden,label,description)")
        if not models:
             print("WARNING: No LookML models found or returned by the SDK. Check Looker permissions or if models exist.")
             return []

        model_count = 0
        explore_count = 0
        for model in models:
            model_count += 1
            # Ensure model has a name and explores attribute before iterating
            if model.name and model.explores:
                for explore in model.explores:
                    # Include only explores that have a name and are not hidden
                    if explore.name and not explore.hidden:
                        # Store as a tuple (model_name, explore_name)
                        all_explores_list.append((model.name, explore.name))
                        explore_count += 1
            elif model.name:
                 print(f"  Info: Model '{model.name}' has no explores defined or returned.")
            else:
                 print("  Warning: Encountered a model without a name.")


        print(f"Successfully scanned {model_count} models and found {explore_count} non-hidden explores.")
        if not all_explores_list:
             print("WARNING: No non-hidden explores were found across all models.")
        return all_explores_list
    except Exception as e:
        print(f"ERROR: Failed while fetching models or explores from Looker: {e}")
        print("Check SDK authentication and permissions (user needs access to see models and explores).")
        return [] # Return empty list on error

def get_explore_lookml_definition(sdk, model_name, explore_name):
    """
    Fetches the detailed LookML definition of a specific explore using the SDK.
    Returns a JSON string representation of the explore details.
    """
    print(f"  Fetching definition for explore: {model_name}/{explore_name}...")
    try:
        # Fetch detailed explore information. Request fields relevant for analysis.
        # Make sure to request 'view_name' and 'joins(name, type, relationship, sql_on, from, view_label)' to identify views and reconstruct joins
        # Also fetch primary_key status for dimensions
        explore_details = sdk.lookml_model_explore(
            lookml_model_name=model_name,
            explore_name=explore_name,
            # Request fields needed for view identification and analysis
            # Ensure we get original label/description/group_label for fields
            fields="""
                name, label, description, hidden, group_label, view_name,
                joins(name, view_label, from, relationship, type, sql_on),
                fields(
                    dimensions(name, label, label_short, description, type, sql, primary_key, hidden, group_label, group_item_label),
                    measures(name, label, label_short, description, type, sql, hidden, group_label, group_item_label),
                    dimension_groups(name, label, label_short, description, type, timeframes, sql, hidden, group_label, group_item_label),
                    filters(name, label, label_short, description, type, hidden, group_label, group_item_label),
                    parameters(name, label, label_short, description, type, hidden, group_label, group_item_label)
                ),
                always_filter, conditionally_filter, index_fields
            """
        )

        # Convert the SDK object to JSON using a default handler for object attributes.
        try:
            # Use default=vars might be slightly cleaner than lambda o: o.__dict__ sometimes
            explore_definition_json = json.dumps(explore_details, default=vars, indent=2)
        except TypeError as json_error:
             # Fallback if vars doesn't work
             try:
                 explore_definition_json = json.dumps(explore_details, default=lambda o: o.__dict__, indent=2)
             except TypeError as json_error_fallback:
                 print(f"  ERROR: Failed to serialize Looker SDK object to JSON (tried vars and __dict__): {json_error_fallback}")
                 print(f"  Object type: {type(explore_details)}")
                 return f"# Error: Failed to serialize explore details for {model_name}/{explore_name}: {json_error_fallback}"

        if not explore_definition_json or explore_definition_json == "{}":
             print(f"  Warning: Could not retrieve a valid definition for {model_name}/{explore_name}. API returned empty details or serialization failed.")
             return f"# Error: Could not retrieve valid definition for {model_name}/{explore_name}"

        print(f"  Successfully fetched and formatted definition for {model_name}/{explore_name}.")
        return explore_definition_json

    except looker_sdk.error.SDKError as looker_error:
        if looker_error.status == 404:
             print(f"  ERROR: Explore '{model_name}/{explore_name}' not found (404). Skipping.")
             return f"# Error: Explore not found (404) for {model_name}/{explore_name}"
        else:
             print(f"  ERROR: Looker SDK error fetching definition for '{model_name}/{explore_name}': {looker_error}")
             return f"# Error: Looker SDK error ({looker_error.status}) for {model_name}/{explore_name}: {looker_error.message}"
    except Exception as e:
        print(f"  ERROR: Unexpected error fetching LookML definition for explore '{explore_name}' in model '{model_name}': {e}")
        return f"# Error: Unexpected error fetching definition for {model_name}/{explore_name}: {e}"


# --- Gemini Interaction ---
def analyze_with_gemini(model, prompt, model_name, explore_name):
    """Sends a prompt to the Gemini model and returns the response text."""
    print(f"  Sending request to Gemini ({gemini_model_name}) for analysis of {model_name}/{explore_name}...")
    try:
        # Configure safety settings and generation parameters
        generation_config = generative_models.GenerationConfig(
            max_output_tokens=8192,
            temperature=0.2,
            top_p=0.95,
            top_k=40
        )
        safety_settings = {
            generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
            generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
            generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
            generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
        }

        gemini_model_instance = model

        response = gemini_model_instance.generate_content(
            [prompt],
            generation_config=generation_config,
            safety_settings=safety_settings,
            stream=False,
        )

        print(f"  Received response from Gemini for {model_name}/{explore_name}.")

        # --- Enhanced Response Validation ---
        if not response.candidates:
             print("  ERROR: Gemini response did not contain any candidates.")
             return "Error: No response candidates returned by Gemini."

        candidate = response.candidates[0]

        if candidate.finish_reason != FinishReason.STOP:
            finish_reason_name = candidate.finish_reason.name
            print(f"  WARNING: Gemini response finished with reason: {finish_reason_name}")
            if candidate.finish_reason == FinishReason.SAFETY:
                 print("  ERROR: Response was blocked by Gemini's safety filters.")
                 return f"Error: Gemini response blocked due to safety filters for {model_name}/{explore_name}."
            elif candidate.finish_reason == FinishReason.MAX_TOKENS:
                 print("  ERROR: Response may be truncated because maximum output tokens were reached.")
                 if candidate.content and candidate.content.parts:
                     return f"Error: Response truncated (MAX_TOKENS). Analysis might be incomplete.\n\n{candidate.content.parts[0].text}"
                 else:
                     return "Error: Response truncated (MAX_TOKENS) and no content returned."
            else:
                 return f"Error: Gemini generation stopped unexpectedly ({finish_reason_name}) for {model_name}/{explore_name}."

        if not candidate.content or not candidate.content.parts:
             print("  ERROR: Gemini response candidate does not contain content parts.")
             return f"Error: No text content found in Gemini response for {model_name}/{explore_name}."

        generated_text = candidate.content.parts[0].text
        print(f"  Successfully extracted analysis text from Gemini response.")
        return generated_text
        # --- End Enhanced Response Validation ---

    except Exception as e:
        print(f"  ERROR: Unexpected error interacting with Gemini API for {model_name}/{explore_name}: {e}")
        return f"Error: Unexpected error during Gemini analysis for {model_name}/{explore_name}: {e}"

# --- Main Script Logic ---
def main():
    """Main function to orchestrate the LookML explore analysis."""

    start_time = time.time()
    print("--- Starting LookML Conversational Readiness Analysis ---")
    print(f"Script started at: {time.strftime('%Y-%m-%d %H:%M:%S')}")

    # 1. Get Looker Config from Secret Manager
    print("\nStep 1: Fetching Looker credentials...")
    looker_config = get_looker_config_from_secret_manager(
        looker_project_id, looker_secret_name, looker_secret_version
    )

    # 2. Initialize Looker SDK
    print("\nStep 2: Initializing Looker SDK...")
    sdk = initialize_looker_sdk(looker_config)

    # 3. Initialize Vertex AI
    print("\nStep 3: Initializing Vertex AI...")
    initialize_vertex_ai(vertex_project_id, vertex_location)
    # Instantiate the Gemini model to be used for analysis
    try:
        print(f"Instantiating Gemini model: {gemini_model_name}...")
        gemini_model = GenerativeModel(gemini_model_name)
        print("Gemini model instantiated successfully.")
    except Exception as e:
        print(f"ERROR: Failed to instantiate Gemini model '{gemini_model_name}': {e}")
        sys.exit(1)

    # 4. Fetch and Process History Data via API (Do this after SDK is initialized)
    print("\nStep 4: Fetching and Processing Looker History Data via API...")
    history_scores = fetch_and_process_history(sdk, SOURCE_WEIGHTS, DEFAULT_WEIGHT)
    if history_scores is None:
        print("WARNING: Proceeding without field usage analysis due to history fetching/processing error.")
        history_scores = {} # Use an empty dict to avoid errors later


    # 5. Determine Explores to Analyze
    print("\nStep 5: Determining explores to analyze...")
    explores_to_analyze = []
    if TARGET_EXPLORES:
        # Use specified target explores
        print(f"  Using the {len(TARGET_EXPLORES)} specified target explore(s).")
        for target in TARGET_EXPLORES:
            parts = target.split('/')
            if len(parts) == 2:
                explores_to_analyze.append((parts[0].strip(), parts[1].strip())) # Add stripped parts
            else:
                print(f"  WARNING: Skipping invalid target explore format: '{target}'. Use 'model_name/explore_name'.")
        if not explores_to_analyze:
             print("ERROR: TARGET_EXPLORES list was provided, but no valid explores could be parsed. Exiting.")
             sys.exit(1)
    else:
        # Get All Explores from Looker
        print("  TARGET_EXPLORES is empty. Fetching all non-hidden explores from Looker...")
        explores_to_analyze = get_all_explores(sdk)
        if not explores_to_analyze:
            print("No explores found or error occurred while fetching. Exiting script.")
            return # Exit gracefully if no explores

    total_explores = len(explores_to_analyze)
    print(f"Prepared {total_explores} explores for analysis.")


    # 6. Analyze Each Explore
    print(f"\nStep 6: Starting analysis loop for {total_explores} explores...")
    results = {} # Dictionary to store analysis results for each explore
    processed_count = 0

    for model_name, explore_name in explores_to_analyze:
        processed_count += 1
        explore_key = f"{model_name}/{explore_name}"
        print(f"\n--- Analyzing Explore {processed_count}/{total_explores}: {explore_key} ---")
        explore_start_time = time.time()
        # Store results for this explore, including the definition needed later
        current_result = {"explore_definition_json": None}

        # Get LookML definition (structure) for the current explore
        explore_definition_json = get_explore_lookml_definition(sdk, model_name, explore_name)
        current_result["explore_definition_json"] = explore_definition_json # Store for later use

        # Check if fetching the definition failed
        if explore_definition_json.startswith("# Error"):
            print(f"  Skipping analysis for {explore_key} due to error fetching definition.")
            current_result.update({"status": "Fetch Error", "error": explore_definition_json, "analysis": None, "top_used_fields": []})
            results[explore_key] = current_result
            continue # Move to the next explore

        # Generate the specific prompt for this explore
        prompt = generate_gemini_prompt(model_name, explore_name, explore_definition_json)

        # Call Gemini for analysis
        gemini_response_text = analyze_with_gemini(gemini_model, prompt, model_name, explore_name)

        # Store Gemini analysis result
        if gemini_response_text.startswith("Error:"):
             current_result.update({"status": "Analysis Error", "error": gemini_response_text, "analysis": None})
             print(f"  Analysis FAILED for {explore_key}.")
        else:
             current_result.update({"status": "Analysis Success", "error": None, "analysis": gemini_response_text})
             print(f"  Analysis SUCCEEDED for {explore_key}.")

        # Get Top Used Fields from processed history data
        # Use the explore_name from the loop which matches the explore definition fetched
        explore_usage_scores = history_scores.get(model_name, {}).get(explore_name, {})
        if explore_usage_scores:
            sorted_fields = sorted(explore_usage_scores.items(), key=lambda item: item[1], reverse=True)
            # Format as [field_name, score] pairs, rounded for readability
            top_fields = [[field, round(score, 2)] for field, score in sorted_fields[:TOP_N_FIELDS]]
            current_result["top_used_fields"] = top_fields
            print(f"  Found {len(top_fields)} top used fields based on history.")
        else:
            current_result["top_used_fields"] = []
            print(f"  No usage history found for {explore_key}.")

        # Store results for this explore
        results[explore_key] = current_result

        explore_end_time = time.time()
        print(f"  Time taken for {explore_key}: {explore_end_time - explore_start_time:.2f} seconds")

        # Optional: Add delay between API calls
        if API_CALL_DELAY > 0 and processed_count < total_explores:
            print(f"  Waiting for {API_CALL_DELAY} second(s) before next API call...")
            time.sleep(API_CALL_DELAY)

    print("\n--- Analysis Loop Complete ---")

    # 7. Process and Output Results (Parse and Summarize)
    print("\nStep 7: Processing and Outputting Results...")

    fetch_errors = 0
    analysis_errors = 0
    analysis_success = 0
    parsing_errors = 0
    parsed_results = {} # Store structured results after parsing

    for key, result_data in results.items():
        print(f"\n--- Processing Results for {key} ---")
        status = result_data["status"]
        error = result_data["error"]
        analysis_text = result_data["analysis"]
        top_used_fields = result_data.get("top_used_fields", [])
        explore_definition_json = result_data.get("explore_definition_json") # Get stored definition

        # Start with basic info + top fields
        parsed_data = {"status": status, "error": error, "top_used_fields": top_used_fields}
        # Initialize fields for parsed analysis results
        parsed_data["grade"] = None
        parsed_data["rationale"] = None
        parsed_data["recommendations"] = []
        parsed_data["generated_lookml_suggestions"] = None
        parsed_data["suggested_agent_instructions"] = []
        parsed_data["suggested_ca_lookml_file"] = None # Changed key name

        recommendations = [] # Local variable for recommendations

        if status == "Fetch Error":
            fetch_errors += 1
            print(f"  Status: {status}")
            print(f"  Error Details: {error}")
        elif status == "Analysis Error":
            analysis_errors += 1
            print(f"  Status: {status}")
            print(f"  Error Details: {error}")
        elif status == "Analysis Success":
            # Attempt to parse the structured output from the successful analysis
            try:
                # Use Markdown headers for splitting
                grade_str = analysis_text.split("### GRADE")[1].split("###")[0].strip()
                rationale = analysis_text.split("### RATIONALE")[1].split("###")[0].strip()
                recommendations_raw = analysis_text.split("### RECOMMENDATIONS")[1].split("###")[0].strip()
                suggestions = analysis_text.split("### GENERATED LOOKML SUGGESTIONS")[1].strip()

                # Convert grade to int
                try:
                    grade = int(grade_str)
                except ValueError:
                    print(f"  Warning: Could not parse grade '{grade_str}' as integer for {key}.")
                    grade = None

                # Split recommendations into a list
                recommendations = [
                    rec.strip().lstrip('0123456789. ')
                    for rec in recommendations_raw.split('\n')
                    if rec.strip() and (rec.strip()[0].isdigit() or rec.strip()[0] == '-') # Allow numbered or dashed lists
                ]

                # Update parsed_data with parsed Gemini analysis
                parsed_data.update({
                    "grade": grade,
                    "rationale": rationale,
                    "recommendations": recommendations,
                    "generated_lookml_suggestions": suggestions,
                    # "raw_analysis": analysis_text # Optionally keep raw text
                })
                print(f"  Parsing: SUCCESSFUL (Grade: {grade}, Recommendations: {len(recommendations)})")
                analysis_success += 1 # Increment success count here

            except IndexError:
                 print(f"  ERROR: Failed to parse structured output for {key}. Required headers missing or malformed.")
                 parsing_errors += 1
                 parsed_data["status"] = "Parsing Error" # Update status
                 parsed_data["error"] = "Failed to find expected Markdown headers in analysis text."
                 parsed_data["raw_analysis"] = analysis_text # Store raw text for debugging
            except Exception as parse_error:
                print(f"  ERROR: Unexpected error parsing structured output for {key}: {parse_error}")
                parsing_errors += 1
                parsed_data["status"] = "Parsing Error" # Update status
                parsed_data["error"] = f"Unexpected parsing error: {parse_error}"
                parsed_data["raw_analysis"] = analysis_text # Store raw text

        else: # Handle unknown status
             print(f"  Status: UNKNOWN ({status})")
             parsed_data["status"] = "Unknown Status"

        # --- Generate Agent Instructions (even if parsing failed, use top fields) ---
        generated_instructions = generate_agent_instructions(top_used_fields, recommendations)
        parsed_data["suggested_agent_instructions"] = generated_instructions
        # --- Generate LookML Extension File Content ---
        # Pass the explore definition string needed for view extraction
        # Use the new function name generate_ca_lookml_file_content
        ca_lookml_content = generate_ca_lookml_file_content(key, parsed_data, explore_definition_json)
        parsed_data["suggested_ca_lookml_file"] = ca_lookml_content # Changed key name

        if status != "Fetch Error": # Only print generation messages if analysis was attempted
             print(f"  Generated {len(generated_instructions)} agent instruction(s).")
             print(f"  Generated suggested CA LookML file content.")
        # --- End Generation ---

        # Add the potentially updated parsed_data to final results
        parsed_results[key] = parsed_data
        print(f"  Top Used Fields Found: {len(top_used_fields)}")
        print("-" * (len(key) + 24)) # Divider

    # --- Final Summary ---
    print("\n--- Final Summary ---")
    print(f"Total Explores Scanned:          {total_explores}")
    print(f"Successfully Analyzed & Parsed: {analysis_success}")
    print(f"Fetch/Definition Errors:        {fetch_errors}")
    print(f"Gemini Analysis Errors:         {analysis_errors}")
    print(f"Successful Analysis, BUT Parsing Failed: {parsing_errors}")
    print("---------------------")
    total_processed_check = fetch_errors + analysis_errors + parsing_errors + analysis_success
    print(f"Total Processed:                {total_processed_check} (Should match Total Scanned: {total_explores})")


    # 8. Save Results to File
    output_filename = "lookml_conversational_analysis_results.json"
    print(f"\nStep 8: Saving detailed results to '{output_filename}'...")
    try:
        # Optionally remove the raw explore definition before saving to keep JSON smaller
        # Also remove raw analysis text if present
        for key in parsed_results:
             if "explore_definition_json" in parsed_results[key]:
                 del parsed_results[key]["explore_definition_json"]
             if "raw_analysis" in parsed_results[key]:
                  del parsed_results[key]["raw_analysis"]


        with open(output_filename, "w", encoding='utf-8') as f:
            json.dump(parsed_results, f, indent=2, ensure_ascii=False)
        print(f"Successfully saved results (excluding raw definitions/analysis).")
    except Exception as e:
        print(f"ERROR: Failed to save results to JSON file '{output_filename}': {e}")

    end_time = time.time()
    total_duration = end_time - start_time
    print("\n--- Script Finished ---")
    print(f"Script finished at: {time.strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"Total execution time: {total_duration:.2f} seconds")

# --- Script Entry Point ---
if __name__ == "__main__":
    # Ensure configuration values are set before running main
    if "your-gcp-project" in looker_project_id or \
       "your-looker-secret-name" in looker_secret_name or \
       "your-gcp-project" in vertex_project_id:
        # Removed check for history_data_filepath as it's no longer used
        print("ERROR: Please update the placeholder values in the 'Configuration' section")
        print("       (looker_project_id, looker_secret_name, vertex_project_id)")
        print("       before running the script.")
        sys.exit(1)

    main()


--- Starting LookML Conversational Readiness Analysis ---
Script started at: 2025-04-18 18:20:25

Step 1: Fetching Looker credentials...
Attempting to fetch secret 'looker_ini' version 'latest' from project 'joey-looker'...
Successfully fetched secret 'looker_ini'.
Successfully parsed [Looker] section from secret.

Step 2: Initializing Looker SDK...
Initializing Looker SDK...
Looker SDK initialized successfully. Connected as user: Super Admin (ID: 1)

Step 3: Initializing Vertex AI...
Initializing Vertex AI for project 'joey-looker' in location 'us-central1'...
Vertex AI initialized successfully.
Instantiating Gemini model: gemini-2.5-pro-exp-03-25...
Gemini model instantiated successfully.

Step 4: Fetching and Processing Looker History Data via API...
Fetching and processing history data via Looker API...
Running inline query on system__activity/history...
Successfully fetched 57 history records via API.
Finished processing history data:
  - Total records processed from API: 57
  - R

In [2]:
# --- Configuration ---
# Make sure this matches the output filename from the main script
input_json_filename = "lookml_conversational_analysis_results.json"
# Base directory where the LookML files will be saved
output_base_dir = "ca_lookml_output"
# Suffix used in the generated LookML file content (should match main script)
CA_SUFFIX = "_ca"

# --- Helper function to sanitize names for filenames/paths ---
def sanitize_name(name):
    # Remove potentially problematic characters for filenames/directory names
    # Keep alphanumeric and underscores
    name = re.sub(r'[^\w_]+', '_', name)
    # Avoid names starting/ending with underscore or multiple underscores
    name = re.sub(r'_+', '_', name).strip('_')
    return name if name else "invalid_name"

# --- Main Logic ---
def save_lookml_files():
    print(f"Attempting to load results from: {input_json_filename}")
    try:
        with open(input_json_filename, 'r', encoding='utf-8') as f:
            results_data = json.load(f)
        print(f"Successfully loaded results for {len(results_data)} explores.")
    except FileNotFoundError:
        print(f"ERROR: Input JSON file not found: '{input_json_filename}'")
        print("Please ensure the main script ran successfully and created the JSON file.")
        return
    except json.JSONDecodeError as e:
        print(f"ERROR: Failed to decode JSON from '{input_json_filename}': {e}")
        return
    except Exception as e:
        print(f"ERROR: An unexpected error occurred while loading JSON: {e}")
        return

    print(f"Saving LookML files to base directory: '{output_base_dir}'")
    saved_count = 0
    skipped_count = 0

    for explore_key, data in results_data.items():
        lookml_content = data.get("suggested_ca_lookml_file")

        if not lookml_content or not isinstance(lookml_content, str):
            print(f"  Skipping {explore_key}: No LookML content found in results.")
            skipped_count += 1
            continue

        try:
            model_name, explore_name = explore_key.split('/', 1)

            # Sanitize names for path creation
            sane_model_name = sanitize_name(model_name)
            sane_explore_name = sanitize_name(explore_name)

            # Define output directory and filename
            model_dir = os.path.join(output_base_dir, sane_model_name)
            # Use the CA suffix convention for the filename
            output_filename = f"{sane_explore_name}{CA_SUFFIX}.explore.lkml"
            output_filepath = os.path.join(model_dir, output_filename)

            # Create directory if it doesn't exist
            os.makedirs(model_dir, exist_ok=True)

            # Write the LookML content to the file
            with open(output_filepath, 'w', encoding='utf-8') as f:
                f.write(lookml_content)

            print(f"  Saved: {output_filepath}")
            saved_count += 1

        except Exception as e:
            print(f"ERROR: Failed to save LookML for '{explore_key}': {e}")
            skipped_count += 1

    print("\n--- File Saving Summary ---")
    print(f"Successfully saved: {saved_count} file(s)")
    print(f"Skipped/Errors:   {skipped_count}")
    print(f"Output directory: '{os.path.abspath(output_base_dir)}'")

# --- Run the function ---
if __name__ == "__main__":
    save_lookml_files()


Attempting to load results from: lookml_conversational_analysis_results.json
Successfully loaded results for 1 explores.
Saving LookML files to base directory: 'ca_lookml_output'
  Saved: ca_lookml_output/basic_ecomm/basic_order_items_ca.explore.lkml

--- File Saving Summary ---
Successfully saved: 1 file(s)
Skipped/Errors:   0
Output directory: '/content/ca_lookml_output'
