In [None]:
import os
import pathlib
from dotenv import load_dotenv
load_dotenv()

# Install dependencies
LOCAL_DATABRICKS_NOTEBOOK_PATH = os.getenv('LOCAL_DATABRICKS_NOTEBOOK_PATH')
if LOCAL_DATABRICKS_NOTEBOOK_PATH and pathlib.Path(LOCAL_DATABRICKS_NOTEBOOK_PATH).exists():
    print(f"Installing databricks-notebook from {LOCAL_DATABRICKS_NOTEBOOK_PATH}")
    %pip install --editable "{LOCAL_DATABRICKS_NOTEBOOK_PATH}"
else:
    print("Installing databricks-notebook from git")
    %pip install git+https://github.com/datafold/databricks-notebook.git

# Restart to make dependencies available
# %restart_python on databricks notebook
import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)


Installing databricks-notebook from /Users/sergeyklinov/databricks-notebook
Obtaining file:///Users/sergeyklinov/databricks-notebook
  Installing build dependencies ... [?25ldone
[?25h  Checking if build backend supports build_editable ... [?25ldone
[?25h  Getting requirements to build editable ... [?25ldone
[?25h  Preparing editable metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: databricks-notebook
  Building editable for databricks-notebook (pyproject.toml) ... [?25ldone
[?25h  Created wheel for databricks-notebook: filename=databricks_notebook-0.1.0-0.editable-py3-none-any.whl size=2939 sha256=8d2920d0e775388bb44d79c31bd66dbbc8267dfd813d1b1b94e4af57c4689a79
  Stored in directory: /private/var/folders/3y/p4yqdnw167xfr84r44_t60lh0000gn/T/pip-ephem-wheel-cache-sb9nikem/wheels/d6/fe/61/e1ee441d5c3d6bacd1e078d8335cf494301163e0f54e0a9d49
Successfully built databricks-notebook
Installing collected packages: databricks-notebook
  Attempting uninstal

{'status': 'ok', 'restart': True}

: 

In [1]:
org_token = "my_secret_token" # do not change
host="https://sergey.st.datafold.io"
identity = None

# We collect basic identity information to help track and resolve any issues
# with SQL translation and provide you with the best experience. This data is
# used internally by Datafold only and helps us:
# - Diagnose translation errors specific to your workspace configuration
# - Improve translation quality based on real usage patterns
# - Provide better support when you need assistance
#
# If you prefer not to share certain information, you can comment out specific
# fields below or remove this entire code block. The tool will still work, but
# we may have limited ability to help troubleshoot issues.

# def get_context_info():
#     context = dbutils.notebook.entry_point.getDbutils().notebook().getContext()
#     return {
#         'workspace_id': context.workspaceId().get(),
#         'workspace_url': context.browserHostName().get(),
#         'cluster_id': context.clusterId().get(),
#         'notebook_path': context.notebookPath().get(),
#         'user': context.userName().get()
#     }

# identity = get_context_info()

In [2]:
# ========================================
# SQLite Database Setup
# ========================================
import sqlite3
from pathlib import Path
from datetime import datetime

# Database path
db_path = Path.home() / "dma" / "dma-pearson-assessment" / "translations.db"

def init_database():
    """Initialize the SQLite database with the translations table"""
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Create table if it doesn't exist
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS translations (
            query_hash TEXT PRIMARY KEY,
            asset_name TEXT,
            original_query TEXT,
            translation_status TEXT,
            translation TEXT,
            project_id INTEGER,
            translation_id TEXT,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    ''')
    
    # Create index on status for faster queries
    cursor.execute('''
        CREATE INDEX IF NOT EXISTS idx_translation_status 
        ON translations(translation_status)
    ''')
    
    conn.commit()
    conn.close()
    print(f"‚úì Database initialized: {db_path}")

def save_translation_results(results, project_id=None, translation_id=None):
    """
    Save or update translation results in the database
    Uses UPSERT (INSERT OR REPLACE) to handle duplicates
    """
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    current_time = datetime.now().isoformat()
    
    for result in results:
        cursor.execute('''
            INSERT INTO translations 
            (query_hash, asset_name, original_query, translation_status, 
             translation, project_id, translation_id, created_at, updated_at)
            VALUES (?, ?, ?, ?, ?, ?, ?, 
                    COALESCE((SELECT created_at FROM translations WHERE query_hash = ?), ?),
                    ?)
            ON CONFLICT(query_hash) DO UPDATE SET
                asset_name = excluded.asset_name,
                original_query = excluded.original_query,
                translation_status = excluded.translation_status,
                translation = excluded.translation,
                project_id = excluded.project_id,
                translation_id = excluded.translation_id,
                updated_at = excluded.updated_at
        ''', (
            result['query_hash'],
            result['asset_name'],
            result['original_query'],
            result['translation_status'],
            result['translation'],
            project_id,
            translation_id,
            result['query_hash'],  # for COALESCE lookup
            current_time,
            current_time
        ))
    
    conn.commit()
    rows_affected = cursor.rowcount
    conn.close()
    
    return rows_affected

def get_translation_stats():
    """Get summary statistics from the database"""
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    cursor.execute('''
        SELECT 
            COUNT(*) as total,
            SUM(CASE WHEN translation_status = 'success' THEN 1 ELSE 0 END) as success,
            SUM(CASE WHEN translation_status = 'failed' THEN 1 ELSE 0 END) as failed,
            SUM(CASE WHEN translation_status NOT IN ('success', 'failed') THEN 1 ELSE 0 END) as other
        FROM translations
    ''')
    
    result = cursor.fetchone()
    conn.close()
    
    return {
        'total': result[0] or 0,
        'success': result[1] or 0,
        'failed': result[2] or 0,
        'other': result[3] or 0
    }

def get_untranslated_queries(input_csv_path):
    """
    Find queries from input CSV that haven't been translated yet
    Returns list of query_hash values
    """
    import csv
    
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Get all translated query hashes
    cursor.execute('SELECT query_hash FROM translations')
    translated_hashes = {row[0] for row in cursor.fetchall()}
    conn.close()
    
    # Read all query hashes from input CSV
    all_hashes = []
    with open(input_csv_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            all_hashes.append(row['QueryHash'])
    
    # Find untranslated
    untranslated = [h for h in all_hashes if h not in translated_hashes]
    
    return untranslated

def export_to_csv(output_csv_path):
    """Export all translations from database to CSV file"""
    import csv
    
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    cursor.execute('''
        SELECT query_hash, asset_name, original_query, translation_status, translation
        FROM translations
        ORDER BY query_hash
    ''')
    
    with open(output_csv_path, 'w', newline='', encoding='utf-8') as f:
        fieldnames = ['query_hash', 'asset_name', 'original_query', 'translation_status', 'translation']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        
        for row in cursor.fetchall():
            writer.writerow({
                'query_hash': row[0],
                'asset_name': row[1],
                'original_query': row[2],
                'translation_status': row[3],
                'translation': row[4]
            })
    
    conn.close()
    print(f"‚úì Exported to CSV: {output_csv_path}")

# Initialize the database
init_database()

# Show current stats
stats = get_translation_stats()
print(f"\n=== Current Database Stats ===")
print(f"Total queries in DB: {stats['total']}")
print(f"Successfully translated: {stats['success']}")
print(f"Failed: {stats['failed']}")
print(f"Other: {stats['other']}")

‚úì Database initialized: /Users/sergeyklinov/dma/dma-pearson-assessment/translations.db

=== Current Database Stats ===
Total queries in DB: 0
Successfully translated: 0
Failed: 0
Other: 0


In [9]:
import csv
import os
from pathlib import Path
from databricks_notebook import translate_queries, view_translation_results_as_dict, _get_current_api_key

# Configuration
CHUNK_SIZE = 10  # Process queries at a time
TEST_MODE = True  # Set to False to process all queries
MAX_CHUNKS_TEST = 5  # Only process first 2 chunks in test mode
OFFSET = 100  # Start from this query number (0 = start from beginning)

input_csv_path = Path.home() / "dma" / "dma-pearson-assessment" / "included_queries.csv"
output_csv_path = Path.home() / "dma" / "dma-pearson-assessment" / "report.csv"

print(f"Reading queries from: {input_csv_path}")

# Read all queries from CSV
queries_data = []
with open(input_csv_path, 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for row in reader:
        queries_data.append({
            'query_hash': row['QueryHash'],
            'query_text': row['QueryText']
        })

total_queries = len(queries_data)
print(f"Found {total_queries} queries to translate")

# Apply offset
if OFFSET > 0:
    queries_data = queries_data[OFFSET:]
    print(f"‚ö†Ô∏è  OFFSET SET: Starting from query #{OFFSET + 1} (skipping first {OFFSET} queries)")
    print(f"   Remaining queries to process: {len(queries_data)}")

print(f"Processing in chunks of {CHUNK_SIZE}")

if TEST_MODE:
    print(f"\n‚ö†Ô∏è  TEST MODE ENABLED - Processing only first {MAX_CHUNKS_TEST} chunks ({MAX_CHUNKS_TEST * CHUNK_SIZE} queries max)")
    print(f"   Set TEST_MODE = False to process all queries\n")

# Show database stats before starting
stats = get_translation_stats()
print(f"\n=== Database Status ===")
print(f"Already in DB: {stats['total']} queries")
print(f"  Success: {stats['success']}, Failed: {stats['failed']}, Other: {stats['other']}\n")

# Get API key once at the beginning
api_key = _get_current_api_key(org_token, host)

# Process queries in chunks
chunks_processed = 0
queries_processed = 0

for chunk_start in range(0, len(queries_data), CHUNK_SIZE):
    # Stop after MAX_CHUNKS_TEST chunks if in test mode
    if TEST_MODE and chunks_processed >= MAX_CHUNKS_TEST:
        print(f"\n‚ö†Ô∏è  TEST MODE: Stopping after {chunks_processed} chunks")
        break
    
    chunk_end = min(chunk_start + CHUNK_SIZE, len(queries_data))
    chunk_queries_data = queries_data[chunk_start:chunk_end]
    
    actual_start = OFFSET + chunk_start + 1
    actual_end = OFFSET + chunk_end
    print(f"=== Processing queries {actual_start} to {actual_end} of {total_queries} (total in file) ===")
    
    # Extract query texts for this chunk
    queries_to_translate = [q['query_text'] for q in chunk_queries_data]
    
    # Translate this chunk
    print(f"Translating {len(queries_to_translate)} queries...")
    
    try:
        # Start translation and get IDs
        project_id, translation_id = translate_queries(api_key, queries_to_translate, host)
        
        print(f"\nüìã Recovery Info (save these if connection fails):")
        print(f"   Project ID: {project_id}")
        print(f"   Translation ID: {translation_id}")
        print(f"   Chunk Start Index: {OFFSET + chunk_start}\n")
        
        # Wait for and fetch results
        translation_results = view_translation_results_as_dict(
            api_key, 
            project_id, 
            translation_id, 
            host
        )
        
        print("Translation completed for this chunk!")
        
    except Exception as e:
        print(f"\n‚ùå Error during translation: {e}")
        print(f"\nüìã To recover, use the recovery cell with:")
        print(f"   RECOVERY_PROJECT_ID = {project_id if 'project_id' in locals() else 'NOT_AVAILABLE'}")
        print(f"   RECOVERY_TRANSLATION_ID = \"{translation_id if 'translation_id' in locals() else 'NOT_AVAILABLE'}\"")
        print(f"   RECOVERY_CHUNK_START = {OFFSET + chunk_start}")
        print(f"\nStopping processing due to error.\n")
        break
    
    # Prepare report data for this chunk
    chunk_report_rows = []
    translated_models = translation_results.get('translated_models', [])
    
    for i, query_data in enumerate(chunk_queries_data):
        # Match the query with its translation result by index
        if i < len(translated_models):
            model = translated_models[i]
            status = model.get('translation_status', '')
            
            report_row = {
                'query_hash': query_data['query_hash'],
                'asset_name': model.get('asset_name', ''),
                'original_query': query_data['query_text'],
                'translation_status': status,
                'translation': model.get('target_sql', '')
            }
        else:
            # In case there's a mismatch
            report_row = {
                'query_hash': query_data['query_hash'],
                'asset_name': '',
                'original_query': query_data['query_text'],
                'translation_status': 'not_translated',
                'translation': ''
            }
        
        chunk_report_rows.append(report_row)
    
    # Save to SQLite database (upserts by query_hash)
    save_translation_results(chunk_report_rows, project_id, translation_id)
    
    chunks_processed += 1
    queries_processed += len(chunk_queries_data)
    print(f"‚úì Chunk results saved to database ({queries_processed} queries processed in this run)\n")

print("=" * 50)
if TEST_MODE and chunks_processed >= MAX_CHUNKS_TEST:
    print(f"‚úì Test run completed! Processed {chunks_processed} chunks ({queries_processed} queries)")
    print(f"   To process all queries, set TEST_MODE = False")
else:
    print("‚úì All translations completed!")

# Show final stats
final_stats = get_translation_stats()
print(f"\n=== Final Database Stats ===")
print(f"Total queries in DB: {final_stats['total']}")
print(f"Successfully translated: {final_stats['success']}")
print(f"Failed: {final_stats['failed']}")
print(f"Other: {final_stats['other']}")

# Auto-export to CSV
print(f"\nüì§ Exporting results to CSV...")
export_to_csv(output_csv_path)
print(f"‚úì Report file: {output_csv_path}")

Reading queries from: /Users/sergeyklinov/dma/dma-pearson-assessment/included_queries.csv
Found 4285 queries to translate
‚ö†Ô∏è  OFFSET SET: Starting from query #101 (skipping first 100 queries)
   Remaining queries to process: 4185
Processing in chunks of 10

‚ö†Ô∏è  TEST MODE ENABLED - Processing only first 5 chunks (50 queries max)
   Set TEST_MODE = False to process all queries


=== Database Status ===
Already in DB: 65 queries
  Success: 0, Failed: 0, Other: 65

=== Processing queries 101 to 110 of 4285 (total in file) ===
Translating 10 queries...
‚úì Translation Project created with id 10035.
‚úì Uploaded queries to translate.
‚úì Started translation with id 5dba8abc-df31-4c26-9adb-4e99a4fe1591

üìã Recovery Info (save these if connection fails):
   Project ID: 10035
   Translation ID: 5dba8abc-df31-4c26-9adb-4e99a4fe1591
   Chunk Start Index: 100

‚úì Translation completed with status: done
Translation completed for this chunk!
‚úì Chunk results saved to database (10 queries

In [None]:
# ========================================
# RECOVERY CELL - Re-fetch results by project ID
# ========================================
# Use this cell if the connection failed during translation
# and you want to retrieve the results without re-translating

import csv
from pathlib import Path

# Configuration - Fill these in from the previous run
RECOVERY_MODE = True  # Set to True to enable recovery
RECOVERY_PROJECT_ID = 10017  # e.g., 10017
RECOVERY_TRANSLATION_ID = "27fda346-4635-48d2-ad58-d2db510461d2"  # e.g., "27fda346-4635-48d2-ad58-d2db510461d2"
RECOVERY_CHUNK_START = 0  # Which query index this chunk started at (from the error message)
RECOVERY_CHUNK_SIZE = 10  # Size of the chunk being recovered

input_csv_path = Path.home() / "dma" / "dma-pearson-assessment" / "included_queries.csv"
output_csv_path = Path.home() / "dma" / "dma-pearson-assessment" / "report.csv"

if RECOVERY_MODE:
    if RECOVERY_PROJECT_ID is None or RECOVERY_TRANSLATION_ID is None:
        print("‚ùå Error: Please set RECOVERY_PROJECT_ID and RECOVERY_TRANSLATION_ID")
    else:
        print(f"üîÑ Recovery Mode: Fetching results for project {RECOVERY_PROJECT_ID}, translation {RECOVERY_TRANSLATION_ID}")
        
        from databricks_notebook import view_translation_results_as_dict, _get_current_api_key
        
        # Get API key (should already be set from previous cell)
        api_key = _get_current_api_key(org_token, host)
        
        # Fetch the translation results
        print("Fetching translation results...")
        translation_results = view_translation_results_as_dict(
            api_key, 
            RECOVERY_PROJECT_ID, 
            RECOVERY_TRANSLATION_ID, 
            host
        )
        
        translated_models = translation_results.get('translated_models', [])
        print(f"‚úì Retrieved {len(translated_models)} translation results")
        
        # Display the results
        print("\n=== Translation Results ===")
        success_count = 0
        failed_count = 0
        other_count = 0
        
        for i, model in enumerate(translated_models, 1):
            status = model.get('translation_status', 'unknown')
            asset_name = model.get('asset_name', 'unnamed')
            icon = '‚úÖ' if status == 'success' else '‚ö†Ô∏è' if status == 'failed' else '‚ùì'
            print(f"{i}. {icon} {asset_name}: {status}")
            
            if status == 'success':
                success_count += 1
            elif status == 'failed':
                failed_count += 1
            else:
                other_count += 1
        
        # Read the original queries to get query hashes
        print(f"\nüîÑ Reading original CSV to match query hashes (chunk starting at index {RECOVERY_CHUNK_START})...")
        
        queries_data = []
        with open(input_csv_path, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            for i, row in enumerate(reader):
                # Only get the chunk we're recovering
                if i >= RECOVERY_CHUNK_START and i < RECOVERY_CHUNK_START + RECOVERY_CHUNK_SIZE:
                    queries_data.append({
                        'query_hash': row['QueryHash'],
                        'query_text': row['QueryText']
                    })
                if i >= RECOVERY_CHUNK_START + RECOVERY_CHUNK_SIZE:
                    break
        
        print(f"‚úì Matched {len(queries_data)} queries from CSV")
        
        # Prepare report rows
        chunk_report_rows = []
        for i, model in enumerate(translated_models):
            if i < len(queries_data):
                query_data = queries_data[i]
                status = model.get('translation_status', '')
                
                report_row = {
                    'query_hash': query_data['query_hash'],
                    'asset_name': model.get('asset_name', ''),
                    'original_query': query_data['query_text'],
                    'translation_status': status,
                    'translation': model.get('target_sql', '')
                }
                chunk_report_rows.append(report_row)
        
        # Save to SQLite database
        print(f"\nüíæ Saving results to database...")
        save_translation_results(chunk_report_rows, RECOVERY_PROJECT_ID, RECOVERY_TRANSLATION_ID)
        print(f"‚úì Successfully saved {len(chunk_report_rows)} rows to database")
        
        # Export to CSV
        print(f"\nüì§ Exporting to CSV: {output_csv_path}")
        export_to_csv(output_csv_path)
        
        print(f"\n=== Recovery Summary ===")
        print(f"Total recovered: {len(chunk_report_rows)}")
        print(f"Successfully translated: {success_count}")
        print(f"Failed: {failed_count}")
        print(f"Other: {other_count}")
        
        # Show overall database stats
        final_stats = get_translation_stats()
        print(f"\n=== Overall Database Stats ===")
        print(f"Total queries in DB: {final_stats['total']}")
        print(f"Successfully translated: {final_stats['success']}")
        print(f"Failed: {final_stats['failed']}")
        print(f"Other: {final_stats['other']}")
        
        print("\nYou can access the full results in the 'translation_results' variable")
else:
    print("‚ÑπÔ∏è  Recovery mode is disabled. Set RECOVERY_MODE = True to use this feature.")

In [None]:
# ========================================
# UTILITY CELL - Database Queries & Export
# ========================================
# Use this cell to query the database and export results

import sqlite3
from pathlib import Path

output_csv_path = Path.home() / "dma" / "dma-pearson-assessment" / "report.csv"

# === OPTION 1: View Statistics ===
print("=== Database Statistics ===")
stats = get_translation_stats()
print(f"Total queries: {stats['total']}")
print(f"Successfully translated: {stats['success']}")
print(f"Failed: {stats['failed']}")
print(f"Other statuses: {stats['other']}")

# === OPTION 2: Export to CSV ===
print(f"\nüì§ Exporting to CSV: {output_csv_path}")
export_to_csv(output_csv_path)

# === OPTION 3: Query specific records ===
# Uncomment to use:
# conn = sqlite3.connect(db_path)
# cursor = conn.cursor()

# # Get failed translations
# cursor.execute("SELECT query_hash, asset_name, translation_status FROM translations WHERE translation_status = 'failed'")
# failed = cursor.fetchall()
# print(f"\n=== Failed Translations ({len(failed)}) ===")
# for row in failed[:10]:  # Show first 10
#     print(f"  - {row[0]}: {row[1]} -> {row[2]}")

# # Get translations by status
# cursor.execute("SELECT translation_status, COUNT(*) FROM translations GROUP BY translation_status")
# status_breakdown = cursor.fetchall()
# print(f"\n=== Status Breakdown ===")
# for status, count in status_breakdown:
#     print(f"  {status}: {count}")

# conn.close()

# === OPTION 4: Find untranslated queries ===
# Uncomment to use:
# input_csv_path = Path.home() / "dma" / "dma-pearson-assessment" / "included_queries.csv"
# untranslated = get_untranslated_queries(input_csv_path)
# print(f"\n=== Untranslated Queries ===")
# print(f"Found {len(untranslated)} queries not yet in database")
# if len(untranslated) > 0:
#     print(f"First few: {untranslated[:5]}")

# === OPTION 5: View recent translations ===
# Uncomment to use:
# conn = sqlite3.connect(db_path)
# cursor = conn.cursor()
# cursor.execute("""
#     SELECT query_hash, asset_name, translation_status, updated_at 
#     FROM translations 
#     ORDER BY updated_at DESC 
#     LIMIT 10
# """)
# recent = cursor.fetchall()
# print(f"\n=== Recent Translations ===")
# for row in recent:
#     print(f"  {row[0]}: {row[1]} -> {row[2]} (updated: {row[3]})")
# conn.close()

print("\n‚úì Done!")

In [None]:
# ========================================
# RECOVERY CELL - Re-fetch results by project ID
# ========================================
# Use this cell if the connection failed during translation
# and you want to retrieve the results without re-translating

import csv
from pathlib import Path

# Configuration - Fill these in from the previous run
RECOVERY_MODE = True  # Set to True to enable recovery
RECOVERY_PROJECT_ID = 10017  # e.g., 10017
RECOVERY_TRANSLATION_ID = "27fda346-4635-48d2-ad58-d2db510461d2"  # e.g., "27fda346-4635-48d2-ad58-d2db510461d2"
RECOVERY_CHUNK_START = 0  # Which query index this chunk started at (from the error message)
RECOVERY_CHUNK_SIZE = 10  # Size of the chunk being recovered

output_csv_path = Path.home() / "dma" / "dma-pearson-assessment" / "report.csv"

if RECOVERY_MODE:
    if RECOVERY_PROJECT_ID is None or RECOVERY_TRANSLATION_ID is None:
        print("‚ùå Error: Please set RECOVERY_PROJECT_ID and RECOVERY_TRANSLATION_ID")
    else:
        print(f"üîÑ Recovery Mode: Fetching results for project {RECOVERY_PROJECT_ID}, translation {RECOVERY_TRANSLATION_ID}")
        
        from databricks_notebook import view_translation_results_as_dict, _get_current_api_key
        
        # Get API key (should already be set from previous cell)
        api_key = _get_current_api_key(org_token, host)
        
        # Fetch the translation results
        print("Fetching translation results...")
        translation_results = view_translation_results_as_dict(
            api_key, 
            RECOVERY_PROJECT_ID, 
            RECOVERY_TRANSLATION_ID, 
            host
        )
        
        translated_models = translation_results.get('translated_models', [])
        print(f"‚úì Retrieved {len(translated_models)} translation results")
        
        # Display the results
        print("\n=== Translation Results ===")
        success_count = 0
        failed_count = 0
        other_count = 0
        
        for i, model in enumerate(translated_models, 1):
            status = model.get('translation_status', 'unknown')
            asset_name = model.get('asset_name', 'unnamed')
            icon = '‚úÖ' if status == 'success' else '‚ö†Ô∏è' if status == 'failed' else '‚ùì'
            print(f"{i}. {icon} {asset_name}: {status}")
            
            if status == 'success':
                success_count += 1
            elif status == 'failed':
                failed_count += 1
            else:
                other_count += 1
        
        # Read the original queries to get query hashes
        print("\nüîÑ Reading original CSV to match query hashes...")
        input_csv_path = Path.home() / "dma" / "dma-pearson-assessment" / "included_queries.csv"
        
        queries_data = []
        with open(input_csv_path, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            for i, row in enumerate(reader):
                # Only get the chunk we're recovering
                if i >= RECOVERY_CHUNK_START and i < RECOVERY_CHUNK_START + RECOVERY_CHUNK_SIZE:
                    queries_data.append({
                        'query_hash': row['QueryHash'],
                        'query_text': row['QueryText']
                    })
                if i >= RECOVERY_CHUNK_START + RECOVERY_CHUNK_SIZE:
                    break
        
        print(f"‚úì Matched {len(queries_data)} queries from CSV")
        
        # Prepare report rows
        chunk_report_rows = []
        for i, model in enumerate(translated_models):
            if i < len(queries_data):
                query_data = queries_data[i]
                status = model.get('translation_status', '')
                
                report_row = {
                    'query_hash': query_data['query_hash'],
                    'asset_name': model.get('asset_name', ''),
                    'original_query': query_data['query_text'],
                    'translation_status': status,
                    'translation': model.get('target_sql', '')
                }
                chunk_report_rows.append(report_row)
        
        # Append to CSV file
        print(f"\nüíæ Writing results to {output_csv_path}...")
        
        # Check if file exists to determine if we need to write header
        file_exists = output_csv_path.exists()
        
        with open(output_csv_path, 'a', newline='', encoding='utf-8') as f:
            fieldnames = ['query_hash', 'asset_name', 'original_query', 'translation_status', 'translation']
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            
            # Write header if file doesn't exist
            if not file_exists:
                writer.writeheader()
            
            writer.writerows(chunk_report_rows)
        
        print(f"‚úì Successfully wrote {len(chunk_report_rows)} rows to report")
        print(f"\n=== Recovery Summary ===")
        print(f"Total recovered: {len(chunk_report_rows)}")
        print(f"Successfully translated: {success_count}")
        print(f"Failed: {failed_count}")
        print(f"Other: {other_count}")
        print("\nYou can access the full results in the 'translation_results' variable")
else:
    print("‚ÑπÔ∏è  Recovery mode is disabled. Set RECOVERY_MODE = True to use this feature.")

üîÑ Recovery Mode: Fetching results for project 10017, translation 27fda346-4635-48d2-ad58-d2db510461d2
Fetching translation results...
‚úì Translation completed with status: done
‚úì Retrieved 3 translation results

=== Translation Results ===
1. ‚ùì MXCI_SANDBOX.ASTRONOMY.problemxml_correct_answer: validation_pending
2. ‚ùì temp_access_subscriptions: validation_pending
3. ‚ùì NAIILS.SALES_CREDITING.OLP_SUBSCRIPTION_REPORT_2025_ONLY: validation_pending

üîÑ Reading original CSV to match query hashes...
‚úì Matched 10 queries from CSV

üíæ Writing results to /Users/sergeyklinov/dma/dma-pearson-assessment/report.csv...
‚úì Successfully wrote 3 rows to report

=== Recovery Summary ===
Total recovered: 3
Successfully translated: 0
Failed: 0
Other: 3

You can access the full results in the 'translation_results' variable
