In [11]:
from dotenv import load_dotenv
import os
from typing import List, Dict, Any, Optional, Union
from pathlib import Path

load_dotenv()


# Import from our Classes module
from Classes.model_classes import SQLLineageExtractor, SQLLineageResult, create_sql_lineage_extractor
from Classes.validation_classes import SQLLineageValidator


MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
PROVIDER = "scaleway"
HF_TOKEN = os.environ.get("HF_TOKEN")

In [12]:
# Get the current working directory
current_dir = Path.cwd()
# Construct the path relative to current directory
file_path = current_dir / 'data' / 'SQL.txt'

# Read file with example
with open(file_path, "r", encoding="utf-8") as f:
    SQL = f.read()

In [13]:

# Create extractor using factory function
extractor = create_sql_lineage_extractor(
    model=MODEL,
    provider=PROVIDER,
    hf_token=HF_TOKEN,
    max_new_tokens=2048,
    do_sample=False,
    max_retries=3,
    use_pydantic_parser=True
)


    
print("=" * 60)
print("SQL Lineage Extractor with langchain_huggingface")
print("=" * 60)

# Test connection
print(f"\nModel: {extractor.model}")
print(f"Provider: {extractor.provider}")

if extractor.test_connection():
    print("✓ Connection test successful")
else:
    print("✗ Connection test failed")

print(f"\nExtracting lineage from SQL ({len(SQL)} characters)...")

try:
    # Extract lineage
    result = extractor.extract(SQL)
    
    if "error" in result:
        print(f"✗ Error: {result['error']}")
    else:
        print("✓ Lineage extracted successfully!")
        print(f"\nTarget: {result.get('target', 'N/A')}")
        print(f"Sources ({result.get('source_count', len(result.get('sources', [])))}):")
        
        if result.get('sources'):
            for i, source in enumerate(result['sources'][:10], 1):  # Show first 10 sources
                print(f"  {i}. {source}")
            
            if len(result['sources']) > 10:
                print(f"  ... and {len(result['sources']) - 10} more")
        
        # Get as SQLLineageResult object
        lineage_result = extractor.extract_with_result(SQL)
        print(f"\nSQLLineageResult object:")
        #print(f"  String representation: {lineage_result}")
        print(f"  Source count: {lineage_result.source_count}")
        print(f"  As JSON: {lineage_result.to_json()}")

except Exception as e:
    print(f"\n✗ Unexpected error: {e}")
    import traceback
    traceback.print_exc()

SQL Lineage Extractor with langchain_huggingface

Model: Qwen/Qwen3-Coder-30B-A3B-Instruct
Provider: scaleway
✓ Connection test successful

Extracting lineage from SQL (17835 characters)...
✓ Lineage extracted successfully!

Target: s_grnplm_vd_t_bvd_db_dmslcl.d_agr_cred
Sources (21):
  1. s_grnplm_vd_t_bvd_db_dmslcl.d_agr_cred_tmp
  2. s_grnplm_as_t_didsd_010_vd_dwh.v_$eks_agrmnt_to_coa_3
  3. s_grnplm_as_t_didsd_010_vd_dwh.v_coa
  4. s_grnplm_as_t_didsd_010_vd_dwh.v_gl_main_acct
  5. s_grnplm_vd_t_bvd_db_dmslcl.a_agr_cred_coa_period
  6. s_grnplm_vd_t_bvd_db_dmslcl.d_agr_cred_optn
  7. s_grnplm_as_t_didsd_010_vd_dwh.v_loan_agrmnt_rate
  8. s_grnplm_vd_t_bvd_db_dmslcl.d_agr_cred_cust
  9. s_grnplm_as_t_didsd_029_vd_dwh.v_agr_cred
  10. s_grnplm_vd_t_bvd_db_dmslcl.d_agr_cred_core_uvdo
  ... and 11 more

SQLLineageResult object:
  Source count: 21
  As JSON: {
  "target": "s_grnplm_vd_t_bvd_db_dmslcl.d_agr_cred",
  "sources": [
    "s_grnplm_vd_t_bvd_db_dmslcl.d_agr_cred_tmp",
    "s_gr