In [None]:
# Cell 0: GitHub Setup and Auto-Logging

import os
import sys
import importlib
import importlib.util
import psycopg2

# GitHub credentials - use Kaggle secrets for security
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
github_token = user_secrets.get_secret("GITHUB_TOKEN")
repo_url = f"https://{github_token}@github.com/amiralpert/SmartReach.git"
local_path = "/kaggle/working/SmartReach"

print("📦 Setting up GitHub repository...")

# Clone or update repo with force pull
if os.path.exists(local_path):
    print(f"📂 Repository exists at {local_path}")
    print("🔄 Force updating from GitHub...")
    !cd {local_path} && git fetch origin
    !cd {local_path} && git reset --hard origin/main
    !cd {local_path} && git pull origin main
    print("✅ Repository updated")
    
    # Show current commit
    !cd {local_path} && echo "Current commit:" && git log --oneline -1
else:
    print(f"📥 Cloning repository to {local_path}")
    !git clone {repo_url} {local_path}
    print("✅ Repository cloned")

# Remove old module from cache if it exists
if 'patentlens_pipeline_v3' in sys.modules:
    print("🗑️ Removing cached pipeline module...")
    del sys.modules['patentlens_pipeline_v3']
    
# Clear any other related modules
modules_to_clear = [key for key in sys.modules.keys() if 'patentlens' in key.lower()]
for mod in modules_to_clear:
    del sys.modules[mod]
    print(f"  Cleared: {mod}")

# Add to Python path for regular imports
if f'{local_path}/BizIntel' in sys.path:
    sys.path.remove(f'{local_path}/BizIntel')
sys.path.insert(0, f'{local_path}/BizIntel')

# Direct import of pipeline
pipeline_path = f"{local_path}/BizIntel/Modules/SystemUno/Patents/patentlens_pipeline_v3.py"

# Check if file exists and show its modification time
if os.path.exists(pipeline_path):
    import time
    mod_time = os.path.getmtime(pipeline_path)
    print(f"📄 Pipeline file modified: {time.ctime(mod_time)}")
    
    # Check if our debug code is in the file
    with open(pipeline_path, 'r') as f:
        content = f.read()
        if "=== ATTEMPTING TO STORE EXTRACTION RESULTS ===" in content:
            print("✅ Debug code FOUND in pipeline file")
        else:
            print("⚠️ Debug code NOT FOUND in pipeline file - updates may not have synced")
else:
    print(f"❌ Pipeline file not found at {pipeline_path}")

spec = importlib.util.spec_from_file_location("patentlens_pipeline_v3", pipeline_path)
pipeline_module = importlib.util.module_from_spec(spec)
sys.modules["patentlens_pipeline_v3"] = pipeline_module
spec.loader.exec_module(pipeline_module)

PatentLensPipeline = pipeline_module.PatentLensPipeline
PatentData = pipeline_module.PatentData
KeywordManager = pipeline_module.KeywordManager

print("✓ Pipeline module imported from GitHub!")

# Set up database configuration
NEON_CONFIG = {
    'host': 'ep-royal-star-ad1gn0d4-pooler.c-2.us-east-1.aws.neon.tech',
    'database': 'BizIntelSmartReach',
    'user': 'neondb_owner',
    'password': 'npg_aTFt6Pug3Kpy',
    'sslmode': 'require'
}

# Try to set up logger, but don't fail if there are issues
try:
    # Create separate connection for logger
    logger_conn = psycopg2.connect(**NEON_CONFIG)
    print("✓ Database connected for logger")

    # Import auto-logger using direct file import
    logger_module_path = f"{local_path}/BizIntel/Scripts/KaggleLogger/auto_logger.py"
    if os.path.exists(logger_module_path):
        spec = importlib.util.spec_from_file_location("auto_logger", logger_module_path)
        auto_logger_module = importlib.util.module_from_spec(spec)
        sys.modules["auto_logger"] = auto_logger_module
        spec.loader.exec_module(auto_logger_module)

        setup_auto_logging = auto_logger_module.setup_auto_logging
        logger = setup_auto_logging(logger_conn, "PatentLens")
        print("✓ Auto-logging enabled!")
    else:
        print(f"✗ Auto-logger not found at {logger_module_path}")
        logger = None
except Exception as e:
    print(f"⚠️ Logger setup failed: {e}")
    print("  Continuing without auto-logging...")
    logger = None

print("\n✅ Setup complete. Pipeline ready to use.")

In [None]:
#Cell 1: Neon Database Configuration
NEON_CONFIG = {
    'host': 'ep-royal-star-ad1gn0d4-pooler.c-2.us-east-1.aws.neon.tech',
    'database': 'BizIntelSmartReach',
    'user': 'neondb_owner',
    'password': 'npg_aTFt6Pug3Kpy',
    'sslmode': 'require'
}

# Test database connection
def test_database_connection():
    try:
        conn = psycopg2.connect(**NEON_CONFIG)
        cursor = conn.cursor()
        
        # Check tables
        cursor.execute('''
            SELECT 
                (SELECT COUNT(*) FROM raw_data.patents_full_text) as patents,
                (SELECT COUNT(*) FROM core.companies) as companies,
                (SELECT COUNT(*) FROM system_uno.patents_processing_status) as status_records,
                (SELECT COUNT(*) FROM system_uno.patents_keywords) as keywords,
                (SELECT COUNT(*) FROM system_uno.patents_extracted_knowledge) as extractions
        ''')
        
        counts = cursor.fetchone()
        print("✓ Database connected successfully!")
        print(f"  Patents: {counts[0]}")
        print(f"  Companies: {counts[1]}")
        print(f"  Status records: {counts[2]}")
        print(f"  Keywords: {counts[3]}")
        print(f"  Extractions: {counts[4]}")
        
        cursor.close()
        conn.close()
        return True
        
    except Exception as e:
        print(f"✗ Database connection failed: {e}")
        return False

# Test connection
test_database_connection()