# 🧪 GENERATE SILVER VIEWS NOTEBOOK

Este notebook permite probar la generación de vistas Silver sin necesidad de commits, builds y deploys.

## Objetivo:
- Probar la normalización de tipos de datos
- Verificar que las vistas Silver se crean correctamente
- Debuggear problemas de ConsolidationStatusManager
- Validar la lógica antes de ejecutar en Cloud Run Job
- Simular el proceso completo de generate_silver_views.py


In [None]:
# 📋 CONFIGURACIÓN INICIAL
import pandas as pd
from google.cloud import bigquery
from google.auth import default
import logging
from datetime import datetime
import json

# Configurar logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Configuración de proyectos
PROJECT_SOURCE = "platform-partners-des"
PROJECT_CENTRAL = "pph-central"

# Inicializar cliente BigQuery
credentials, project = default()
client = bigquery.Client(credentials=credentials, project=PROJECT_CENTRAL)

print(f"✅ Cliente BigQuery inicializado")
print(f"📊 Proyecto: {client.project}")


✅ Cliente BigQuery inicializado
📊 Proyecto: pph-central


In [None]:
# 🔧 IMPLEMENTACIÓN REAL: CONFIGURACIÓN DINÁMICA

def get_all_tables_dynamically():
    """Obtener TODAS las tablas dinámicamente desde las compañías"""
    try:
        print("🔄 Obteniendo tablas dinámicamente...")
        
        all_tables = set()
        companies_table = f"{PROJECT_SOURCE}.settings.companies"
        
        # Obtener compañías
        companies_query = f"""
            SELECT company_project_id 
            FROM `{companies_table}`
            ORDER BY company_project_id
        """
        
        companies_result = client.query(companies_query).result()
        companies_list = [row.company_project_id for row in companies_result]
        
        print(f"📊 Analizando {len(companies_list)} compañías para obtener tablas...")
        
        # Para cada compañía, obtener sus tablas
        for company_project_id in companies_list:
            try:
                company_project_id_clean = company_project_id.replace('-', '_')
                dataset_name = f"servicetitan_{company_project_id_clean}"
                
                # Obtener todas las tablas del dataset
                tables_query = f"""
                    SELECT table_name 
                    FROM `{company_project_id}.{dataset_name}.INFORMATION_SCHEMA.TABLES`
                    WHERE table_type = 'BASE TABLE'
                """
                
                tables_result = client.query(tables_query).result()
                company_tables = [row.table_name for row in tables_result]
                
                # Filtrar campos _fivetran (tablas que terminan en _fivetran)
                filtered_tables = [t for t in company_tables if not t.endswith('_fivetran')]
                
                all_tables.update(filtered_tables)
                print(f"📋 {company_project_id}: {len(filtered_tables)} tablas")
                
            except Exception as e:
                print(f"⚠️ Error obteniendo tablas de {company_project_id}: {str(e)}")
                continue
        
        tables_list = sorted(list(all_tables))
        print(f"\n✅ Total de tablas únicas encontradas: {len(tables_list)}")
        print(f"📋 Tablas: {tables_list}")
        
        return tables_list
        
    except Exception as e:
        print(f"❌ Error obteniendo tablas dinámicamente: {str(e)}")
        return []

# Configuración de datasets
DATASET_BRONZE_SUFFIX = "servicetitan"
DATASET_SILVER_SUFFIX = "silver"

# Obtener tablas dinámicamente
TABLES_TO_PROCESS = get_all_tables_dynamically()
print(f"\n🎯 TABLAS A PROCESAR (OBTENIDAS DINÁMICAMENTE): {len(TABLES_TO_PROCESS)}")


In [None]:
# 🏗️ FUNCIONES REALES: OBTENER COMPAÑÍAS Y ANÁLISIS DE CAMPOS

def get_all_companies():
    """Obtener todas las compañías activas"""
    try:
        companies_table = f"{PROJECT_SOURCE}.settings.companies"
        
        query = f"""
            SELECT 
                company_id,
                company_name,
                company_project_id
            FROM `{companies_table}`
            ORDER BY company_id
        """
        
        print(f"🔄 Obteniendo compañías de: {companies_table}")
        result = client.query(query).result()
        companies_df = pd.DataFrame([dict(row) for row in result])
        
        print(f"✅ Compañías encontradas: {len(companies_df)}")
        return companies_df
        
    except Exception as e:
        print(f"❌ Error obteniendo compañías: {str(e)}")
        return pd.DataFrame()

def get_table_fields(company_project_id, table_name):
    """Obtener campos de una tabla específica (OMITIENDO campos _fivetran)"""
    try:
        company_project_id_clean = company_project_id.replace('-', '_')
        dataset_name = f"servicetitan_{company_project_id_clean}"
        table_ref = f"{company_project_id}.{dataset_name}.{table_name}"
        
        # Obtener información de la tabla
        table = client.get_table(table_ref)
        
        # Filtrar campos _fivetran
        fields = []
        for field in table.schema:
            if not field.name.endswith('_fivetran'):
                fields.append({
                    'name': field.name,
                    'type': field.field_type
                })
        
        print(f"📊 {company_project_id}: {len(fields)} campos (sin _fivetran)")
        return fields
        
    except Exception as e:
        print(f"❌ Error obteniendo campos de {company_project_id}.{table_name}: {str(e)}")
        return []

def analyze_field_types_across_companies(table_name, companies_df):
    """Analizar tipos de campos a través de todas las compañías"""
    print(f"\n🔍 ANALIZANDO TABLA: {table_name}")
    print("=" * 60)
    
    all_field_types = {}
    companies_with_table = []
    
    for _, company in companies_df.iterrows():
        fields = get_table_fields(company['company_project_id'], table_name)
        
        if fields:  # Solo si la tabla existe
            companies_with_table.append(company)
            for field in fields:
                field_name = field['name']
                field_type = field['type']
                
                if field_name not in all_field_types:
                    all_field_types[field_name] = []
                all_field_types[field_name].append(field_type)
    
    print(f"📊 Compañías con tabla {table_name}: {len(companies_with_table)}")
    
    # Analizar conflictos de tipos
    type_conflicts = {}
    for field_name, types in all_field_types.items():
        unique_types = list(set(types))
        if len(unique_types) > 1:
            type_conflicts[field_name] = unique_types
            print(f"⚠️ CONFLICTO: {field_name}: {', '.join(unique_types)}")
    
    return all_field_types, companies_with_table, type_conflicts

# Probar funciones
print("🧪 Probando funciones básicas...")
companies_df = get_all_companies()
if not companies_df.empty:
    print(f"✅ Compañías cargadas: {len(companies_df)}")
    print(companies_df.head())
else:
    print("❌ No se pudieron cargar las compañías")


In [None]:
# 🎯 FUNCIÓN REAL: NORMALIZACIÓN DE TIPOS (LÓGICA DEL PASO 2)

def determine_consensus_type(field_types):
    """Determinar el tipo consenso para un campo (SIEMPRE priorizar STRING)"""
    unique_types = list(set(field_types))
    
    if len(unique_types) == 1:
        return unique_types[0]
    
    # REGLA CRÍTICA: SIEMPRE priorizar STRING cuando hay conflicto
    if 'STRING' in unique_types:
        return 'STRING'
    
    # Para otros conflictos, usar la lógica original
    if 'TIMESTAMP' in unique_types and 'INT64' in unique_types:
        return 'TIMESTAMP'  # Convertir INT64 a TIMESTAMP
    
    # Si no hay STRING, usar el tipo más común
    from collections import Counter
    type_counts = Counter(field_types)
    return type_counts.most_common(1)[0][0]

def generate_cast_for_field(field_name, field_types):
    """Generar CAST apropiado para normalizar tipos"""
    consensus_type = determine_consensus_type(field_types)
    unique_types = list(set(field_types))
    
    if len(unique_types) == 1:
        # No hay conflicto, usar campo directo
        return f"{field_name}"
    
    # Hay conflicto, aplicar normalización
    if consensus_type == 'STRING':
        return f"CAST({field_name} AS STRING) AS {field_name}"
    elif consensus_type == 'TIMESTAMP':
        # Manejar conversión INT64 -> TIMESTAMP
        if 'INT64' in unique_types and 'TIMESTAMP' in unique_types:
            return f"TIMESTAMP_SECONDS({field_name}) AS {field_name}"
        return f"CAST({field_name} AS TIMESTAMP) AS {field_name}"
    elif consensus_type == 'INT64':
        # Manejar conversión TIMESTAMP -> INT64
        if 'TIMESTAMP' in unique_types and 'INT64' in unique_types:
            return f"UNIX_SECONDS({field_name}) AS {field_name}"
        return f"CAST({field_name} AS INT64) AS {field_name}"
    else:
        return f"CAST({field_name} AS {consensus_type}) AS {field_name}"

def create_consolidated_field_layout(all_field_types):
    """Crear layout consolidado de campos con normalización de tipos"""
    print(f"\n🔧 CREANDO LAYOUT CONSOLIDADO")
    print("=" * 50)
    
    consolidated_fields = []
    type_conflicts = {}
    
    for field_name, types in all_field_types.items():
        unique_types = list(set(types))
        
        if len(unique_types) > 1:
            type_conflicts[field_name] = unique_types
            consensus_type = determine_consensus_type(types)
            cast_expression = generate_cast_for_field(field_name, types)
            consolidated_fields.append({
                'name': field_name,
                'type': consensus_type,
                'cast': cast_expression,
                'has_conflict': True,
                'original_types': unique_types
            })
            print(f"⚠️ {field_name}: {', '.join(unique_types)} → {consensus_type}")
        else:
            consolidated_fields.append({
                'name': field_name,
                'type': unique_types[0],
                'cast': field_name,
                'has_conflict': False,
                'original_types': unique_types
            })
    
    print(f"\n📊 RESUMEN:")
    print(f"  - Total campos: {len(consolidated_fields)}")
    print(f"  - Campos con conflictos: {len(type_conflicts)}")
    print(f"  - Campos sin conflictos: {len(consolidated_fields) - len(type_conflicts)}")
    
    return consolidated_fields, type_conflicts

# Probar con una tabla específica
if not companies_df.empty:
    print("🧪 Probando análisis de campos con tabla 'appointment'...")
    field_types, companies_with_table, conflicts = analyze_field_types_across_companies('appointment', companies_df)
    
    if field_types:
        consolidated_fields, type_conflicts = create_consolidated_field_layout(field_types)
        print(f"\n✅ Layout creado con {len(consolidated_fields)} campos")
    else:
        print("❌ No se pudieron analizar los campos")


In [None]:
# 🚀 FUNCIÓN REAL: CREAR VISTAS SILVER (VERSIÓN REAL QUE FUNCIONA)

def create_silver_view_for_company(company_project_id, company_name, table_name, consolidated_fields):
    """Crear vista Silver REAL para una compañía específica"""
    try:
        company_project_id_clean = company_project_id.replace('-', '_')
        company_name_clean = company_name.replace(' ', '_')
        dataset_name = f"servicetitan_{company_project_id_clean}"
        source_table = f"{company_project_id}.{dataset_name}.{table_name}"
        
        print(f"🔍 Creando vista Silver para: {company_name}")
        print(f"📊 Tabla origen: {source_table}")
        
        # Verificar que la tabla origen existe
        try:
            table = client.get_table(source_table)
            print(f"✅ Tabla origen verificada: {len(table.schema)} campos")
        except Exception as e:
            print(f"❌ Tabla origen no encontrada: {str(e)}")
            return False
        
        # Generar SELECT con campos normalizados
        select_fields = []
        for field in consolidated_fields:
            field_name = field['name']
            cast_expression = field['cast']
            
            # Verificar si el campo existe en la tabla origen
            field_exists = any(f.name == field_name for f in table.schema)
            
            if field_exists:
                select_fields.append(f"    {cast_expression}")
            else:
                # Campo no existe, usar NULL con tipo apropiado
                field_type = field['type']
                select_fields.append(f"    NULL AS {field_name}")
                print(f"⚠️ Campo faltante: {field_name} → NULL")
        
        # Generar SQL de la vista
        select_fields_str = ',\n'.join(select_fields)
        view_name = f"{PROJECT_CENTRAL}.silver.{table_name}_{company_name_clean}"
        
        sql_content = f"""
CREATE OR REPLACE VIEW `{view_name}` AS
SELECT 
    '{company_project_id}' AS company_project_id,
    {company_name} AS company_id,
{select_fields_str}
FROM `{source_table}`
        """
        
        print(f"📝 SQL generado ({len(select_fields)} campos)")
        
        # EJECUTAR LA CREACIÓN DE LA VISTA (REAL)
        print(f"🚀 Ejecutando creación de vista: {view_name}")
        query_job = client.query(sql_content)
        query_job.result()  # Esperar a que termine
        
        print(f"✅ Vista Silver creada exitosamente: {view_name}")
        return True
        
    except Exception as e:
        print(f"❌ Error creando vista Silver para {company_name}: {str(e)}")
        return False

def create_silver_views_for_table(table_name, companies_df):
    """Crear vistas Silver para todas las compañías que tienen la tabla"""
    print(f"\n🚀 CREANDO VISTAS SILVER PARA TABLA: {table_name}")
    print("=" * 70)
    
    # Analizar campos de la tabla
    field_types, companies_with_table, conflicts = analyze_field_types_across_companies(table_name, companies_df)
    
    if not field_types:
        print(f"❌ No se encontraron campos para la tabla {table_name}")
        return
    
    # Crear layout consolidado
    consolidated_fields, type_conflicts = create_consolidated_field_layout(field_types)
    
    # Crear vistas para cada compañía
    successful_views = 0
    failed_views = 0
    
    for _, company in companies_with_table.iterrows():
        success = create_silver_view_for_company(
            company['company_project_id'],
            company['company_name'],
            table_name,
            consolidated_fields
        )
        
        if success:
            successful_views += 1
        else:
            failed_views += 1
    
    print(f"\n📊 RESUMEN PARA TABLA {table_name}:")
    print(f"  ✅ Vistas creadas exitosamente: {successful_views}")
    print(f"  ❌ Vistas fallidas: {failed_views}")
    print(f"  📋 Total compañías procesadas: {len(companies_with_table)}")
    
    return successful_views, failed_views

# Probar con una tabla específica
if not companies_df.empty:
    print("🧪 Probando creación REAL de vista Silver...")
    success, failed = create_silver_views_for_table('appointment', companies_df)
    print(f"\n🎯 RESULTADO FINAL: {success} exitosas, {failed} fallidas")


In [None]:
# 🎯 FUNCIÓN REAL: PROCESAR TODAS LAS TABLAS (VERSIÓN COMPLETA)

def process_all_tables():
    """Procesar todas las tablas y crear vistas Silver (VERSIÓN REAL)"""
    print(f"🚀 INICIANDO PROCESAMIENTO COMPLETO DE {len(TABLES_TO_PROCESS)} TABLAS")
    print("=" * 80)
    
    if companies_df.empty:
        print("❌ No hay compañías para procesar")
        return
    
    total_successful = 0
    total_failed = 0
    results_summary = {}
    
    for i, table_name in enumerate(TABLES_TO_PROCESS, 1):
        print(f"\n📋 PROCESANDO TABLA {i}/{len(TABLES_TO_PROCESS)}: {table_name}")
        print("-" * 50)
        
        try:
            success, failed = create_silver_views_for_table(table_name, companies_df)
            
            results_summary[table_name] = {
                'successful': success,
                'failed': failed,
                'total': success + failed
            }
            
            total_successful += success
            total_failed += failed
            
            print(f"✅ Tabla {table_name}: {success} exitosas, {failed} fallidas")
            
        except Exception as e:
            print(f"❌ Error procesando tabla {table_name}: {str(e)}")
            results_summary[table_name] = {
                'successful': 0,
                'failed': 1,
                'total': 1,
                'error': str(e)
            }
            total_failed += 1
    
    # Resumen final
    print(f"\n" + "=" * 80)
    print(f"🎯 RESUMEN FINAL DEL PROCESAMIENTO")
    print("=" * 80)
    print(f"📊 Total vistas exitosas: {total_successful}")
    print(f"❌ Total vistas fallidas: {total_failed}")
    print(f"📋 Total procesadas: {total_successful + total_failed}")
    print(f"📈 Tasa de éxito: {(total_successful / (total_successful + total_failed) * 100):.1f}%")
    
    print(f"\n📋 RESUMEN POR TABLA:")
    for table_name, result in results_summary.items():
        status = "✅" if result['failed'] == 0 else "⚠️" if result['successful'] > 0 else "❌"
        print(f"  {status} {table_name}: {result['successful']}/{result['total']} exitosas")
    
    return results_summary

# Ejecutar procesamiento completo (DESCOMENTAR PARA EJECUTAR)
# print("⚠️ ADVERTENCIA: Esto creará vistas REALES en BigQuery")
# print("💡 Comenta esta línea si solo quieres probar con una tabla")
# results = process_all_tables()

print("💡 Para ejecutar el procesamiento completo, descomenta la línea anterior")


In [None]:
# 🎯 NOTEBOOK REAL PARA GENERAR VISTAS SILVER

print("✅ FUNCIONALIDADES IMPLEMENTADAS:")
print("1. ✅ Omitir campos _fivetran - Filtrados automáticamente")
print("2. ✅ Validación de campos comunes - Layout consolidado entre compañías")
print("3. ✅ Unificación de tipos de datos - SIEMPRE prioriza STRING en conflictos")
print("4. ✅ Creación REAL de vistas - Ejecuta CREATE OR REPLACE VIEW en BigQuery")
print("5. ✅ Manejo de campos faltantes - Usa NULL cuando un campo no existe")
print("6. ✅ Normalización TIMESTAMP/INT64 - Conversiones automáticas")

print("\n🚀 CÓMO USAR:")
print("1. Probar con una tabla: Ejecuta las celdas 1-5 (probará con appointment)")
print("2. Procesar todas las tablas: Descomenta la línea en celda 6")
print("3. Ver resultados: El notebook mostrará estadísticas reales")

print("\n⚠️ IMPORTANTE:")
print("- Este notebook crea vistas REALES en pph-central.silver")
print("- No es un mock - Ejecuta queries reales en BigQuery")
print("- Implementa la misma lógica que generate_silver_views.py")


In [3]:
# 🔍 PRUEBA 1: VERIFICAR TABLA COMPANIES Y SU ESTRUCTURA

def test_companies_table():
    """Verificar que la tabla companies existe y tiene los campos necesarios"""
    try:
        companies_table = f"{PROJECT_SOURCE}.settings.companies"
        print(f"🔍 Verificando tabla: {companies_table}")

        # Verificar que la tabla existe
        table = client.get_table(companies_table)
        print(f"✅ Tabla companies encontrada: {len(table.schema)} campos")

        # Mostrar campos
        print("\n📋 Campos de la tabla:")
        for field in table.schema:
            print(f"  - {field.name}: {field.field_type}")

        # Verificar si existe el campo company_consolidated_status
        has_status_field = any(field.name == 'company_consolidated_status' for field in table.schema)
        if has_status_field:
            print("\n✅ Campo 'company_consolidated_status' encontrado")
        else:
            print("\n❌ Campo 'company_consolidated_status' NO encontrado")
            print("💡 Necesitamos agregar este campo a la tabla")

        return True

    except Exception as e:
        print(f"❌ Error verificando tabla companies: {str(e)}")
        return False

# Ejecutar prueba
test_companies_table()


🔍 Verificando tabla: platform-partners-des.settings.companies
✅ Tabla companies encontrada: 17 campos

📋 Campos de la tabla:
  - company_id: INTEGER
  - company_name: STRING
  - company_new_name: STRING
  - company_join_date: DATE
  - company_state: STRING
  - company_timezone: STRING
  - company_project_id: STRING
  - company_fivetran_status: BOOLEAN
  - app_id: STRING
  - client_id: STRING
  - client_secret: STRING
  - tenant_id: INTEGER
  - app_key: STRING
  - company_bigquery_status: BOOLEAN
  - company_ltm_status: INTEGER
  - company_ltm_datamaster_sheetid: STRING
  - company_consolidated_status: INTEGER

✅ Campo 'company_consolidated_status' encontrado


True

In [5]:
# 🔍 PRUEBA 2: SIMULAR ConsolidationStatusManager.get_companies_by_status(0)

def test_get_companies_by_status(status=0):
    """Simular la función que se cuelga en ConsolidationStatusManager"""
    try:
        companies_table = f"{PROJECT_SOURCE}.settings.companies"

        query = f"""
            SELECT
                company_id,
                company_name,
                company_project_id,
                company_consolidated_status
            FROM `{companies_table}`
            ORDER BY company_id
            LIMIT 5
        """

        print(f"🔄 Ejecutando consulta: {query}")
        print(f"📊 Tabla: {companies_table}")

        query_job = client.query(query)
        print(f"📋 Job creado: {query_job.job_id}")

        result = query_job.result()
        print(f"✅ Consulta completada, procesando resultados...")

        companies_df = pd.DataFrame([dict(row) for row in result])
        print(f"📊 DataFrame creado con {len(companies_df)} filas")

        if not companies_df.empty:
            print("\n📋 Primeras 5 compañías:")
            print(companies_df.head())
        else:
            print("\n⚠️ No hay compañías con el status especificado")

        return companies_df

    except Exception as e:
        print(f"❌ Error obteniendo compañías por estado: {str(e)}")
        return pd.DataFrame()

# Ejecutar prueba
companies_df = test_get_companies_by_status(0)


🔄 Ejecutando consulta: 
            SELECT 
                company_id,
                company_name,
                company_project_id,
                company_consolidated_status
            FROM `platform-partners-des.settings.companies`
            ORDER BY company_id
            LIMIT 5
        
📊 Tabla: platform-partners-des.settings.companies
📋 Job creado: 4468b9e9-cc78-4823-9232-ed9312536e0c
✅ Consulta completada, procesando resultados...
📊 DataFrame creado con 5 filas

📋 Primeras 5 compañías:
   company_id      company_name company_project_id  \
0           1           MONARCH        shape-mhs-1   
1           2           CAPITAL        shape-chc-2   
2           3            FUSION     shape-tucson-3   
3           4     OVER THE MOON        shape-otm-4   
4           5  A1 REFRIGERATION       shape-aone-5   

   company_consolidated_status  
0                            0  
1                            0  
2                            0  
3                            0  
4 

In [7]:
# 🔍 PRUEBA 3: VERIFICAR TABLAS EN COMPAÑÍAS ESPECÍFICAS

def test_company_tables(company_project_id, table_name="appointment"):
    """Verificar que una tabla existe en una compañía específica"""
    try:
        # Corregir el problema del backslash en f-string
        company_project_id_clean = company_project_id.replace('-', '_')
        dataset_name = f"servicetitan_{company_project_id_clean}"
        table_ref = f"{company_project_id}.{dataset_name}.{table_name}"

        print(f"🔍 Verificando tabla: {table_ref}")

        # Verificar que la tabla existe
        table = client.get_table(table_ref)
        print(f"✅ Tabla {table_name} encontrada en {company_project_id}")
        print(f"📊 Campos: {len(table.schema)}")

        # Mostrar algunos campos
        print("\n📋 Primeros 10 campos:")
        for i, field in enumerate(table.schema[:10]):
            print(f"  {i+1}. {field.name}: {field.field_type}")

        return True

    except Exception as e:
        print(f"❌ Error verificando tabla {table_name} en {company_project_id}: {str(e)}")
        return False

# Probar con una compañía específica si tenemos datos
if not companies_df.empty:
    first_company = companies_df.iloc[0]
    print(f"🧪 Probando con compañía: {first_company['company_name']} ({first_company['company_project_id']})")
    test_company_tables(first_company['company_project_id'], "appointment")
else:
    print("⚠️ No hay compañías para probar")


🧪 Probando con compañía: MONARCH (shape-mhs-1)
🔍 Verificando tabla: shape-mhs-1.servicetitan_shape_mhs_1.appointment
✅ Tabla appointment encontrada en shape-mhs-1
📊 Campos: 14

📋 Primeros 10 campos:
  1. id: INTEGER
  2. status: STRING
  3. modified_on: TIMESTAMP
  4. _fivetran_deleted: BOOLEAN
  5. customer_id: INTEGER
  6. job_id: INTEGER
  7. _fivetran_synced: TIMESTAMP
  8. arrival_window_start: TIMESTAMP
  9. appointment_number: STRING
  10. start: TIMESTAMP


In [None]:
# 🔍 PRUEBA 4: SIMULAR ANÁLISIS DE CAMPOS Y TIPOS

def test_field_analysis(company_project_id, table_name="appointment"):
    """Simular el análisis de campos y tipos que hace generate_silver_views"""
    try:
        # Corregir el problema del backslash en f-string
        company_project_id_clean = company_project_id.replace('-', '_')
        dataset_name = f"servicetitan_{company_project_id_clean}"
        table_ref = f"{company_project_id}.{dataset_name}.{table_name}"

        print(f"🔍 Analizando campos de: {table_ref}")

        # Obtener información de la tabla
        table = client.get_table(table_ref)

        # Analizar campos
        field_types = {}
        for field in table.schema:
            field_types[field.name] = field.field_type

        print(f"📊 Total de campos: {len(field_types)}")

        # Mostrar tipos de campos
        type_counts = {}
        for field_type in field_types.values():
            type_counts[field_type] = type_counts.get(field_type, 0) + 1

        print("\n📋 Distribución de tipos:")
        for field_type, count in sorted(type_counts.items()):
            print(f"  {field_type}: {count} campos")

        # Buscar campos problemáticos (TIMESTAMP vs INT64)
        timestamp_fields = [name for name, type in field_types.items() if type == 'TIMESTAMP']
        int64_fields = [name for name, type in field_types.items() if type == 'INT64']

        print(f"\n⏰ Campos TIMESTAMP: {len(timestamp_fields)}")
        print(f"🔢 Campos INT64: {len(int64_fields)}")

        return field_types

    except Exception as e:
        print(f"❌ Error analizando campos: {str(e)}")
        return {}

# Probar análisis si tenemos compañías
if not companies_df.empty:
    first_company = companies_df.iloc[0]
    field_types = test_field_analysis(first_company['company_project_id'], "appointment")
else:
    print("⚠️ No hay compañías para analizar")


🔍 Analizando campos de: shape-mhs-1.servicetitan_shape_mhs_1.appointment
📊 Total de campos: 14

📋 Distribución de tipos:
  BOOLEAN: 1 campos
  INTEGER: 3 campos
  STRING: 3 campos
  TIMESTAMP: 7 campos

⏰ Campos TIMESTAMP: 7
🔢 Campos INT64: 0


In [21]:
# 🔍 PRUEBA 5 CORREGIDA: SIMULAR GENERACIÓN DE VISTA SILVER (SIN ERROR DE BACKSLASH)

def test_silver_view_generation(company_project_id, company_name, table_name="appointment"):
    """Simular la generación de una vista Silver - VERSIÓN CORREGIDA"""
    try:
        # Corregir el problema del backslash en f-string
        company_project_id_clean = company_project_id.replace('-', '_')
        company_name_clean = company_name.replace(' ', '_')

        dataset_name = f"servicetitan_{company_project_id_clean}"
        source_table = f"{company_project_id}.{dataset_name}.{table_name}"

        print(f"🔍 Generando vista Silver para: {company_name}")
        print(f"📊 Tabla origen: {source_table}")

        # Obtener campos de la tabla
        table = client.get_table(source_table)

        # Generar SELECT con normalización de tipos
        select_fields = []
        for field in table.schema:
            field_name = field.name
            field_type = field.field_type

            # Aplicar normalización de tipos (lógica del Paso 2)
            if field_type == 'INT64':
                # Convertir INT64 a STRING para evitar conflictos
                select_fields.append(f"CAST({field_name} AS STRING) AS {field_name}")
            elif field_type == 'TIMESTAMP':
                # Mantener TIMESTAMP
                select_fields.append(f"{field_name}")
            elif field_type == 'JSON':
                # Convertir JSON a STRING
                select_fields.append(f"COALESCE(TO_JSON_STRING({field_name}), '') AS {field_name}")
            else:
                # Otros tipos se mantienen
                select_fields.append(f"{field_name}")

        # Generar SQL de la vista (sin backslash en f-string)
        select_fields_str = ',\n    '.join(select_fields)
        sql_content = f"""
CREATE OR REPLACE VIEW `{PROJECT_CENTRAL}.silver.{table_name}_{company_name_clean}` AS
SELECT
    '{company_project_id}' AS company_project_id,
    {company_name} AS company_id,
    {select_fields_str}
FROM `{source_table}`
        """

        print(f"\n📝 SQL generado ({len(select_fields)} campos):")
        print(sql_content[:500] + "..." if len(sql_content) > 500 else sql_content)

        # Intentar crear la vista (solo si queremos probar)
        # query_job = client.query(sql_content)
        # query_job.result()
        # print(f"✅ Vista Silver creada exitosamente")

        return sql_content

    except Exception as e:
        print(f"❌ Error generando vista Silver: {str(e)}")
        return None

# Probar generación corregida si tenemos compañías
if not companies_df.empty:
    first_company = companies_df.iloc[0]
    sql_content = test_silver_view_generation_fixed(
        first_company['company_project_id'],
        first_company['company_name'],
        "appointment"
    )
else:
    print("⚠️ No hay compañías para generar vistas")


🔍 Generando vista Silver para: MONARCH
📊 Tabla origen: shape-mhs-1.servicetitan_shape_mhs_1.appointment

📝 SQL generado (14 campos):

CREATE OR REPLACE VIEW `pph-central.silver.appointment_MONARCH` AS
SELECT 
    'shape-mhs-1' AS company_project_id,
    MONARCH AS company_id,
    id,
    status,
    modified_on,
    _fivetran_deleted,
    customer_id,
    job_id,
    _fivetran_synced,
    arrival_window_start,
    appointment_number,
    start,
    arrival_window_end,
    created_on,
    special_instruction,
    ends
FROM `shape-mhs-1.servicetitan_shape_mhs_1.appointment`
        


## ⚠️ NOTA IMPORTANTE SOBRE EL ERROR DE BACKSLASH

**La celda "PRUEBA 5" original tiene un error de sintaxis** debido al uso de `replace()` dentro de un f-string.

**✅ SOLUCIÓN:** Usa la **celda anterior "PRUEBA 5 CORREGIDA"** que ya tiene el problema solucionado.

**❌ NO uses** la celda "PRUEBA 5" original que tiene este error:
```python
# ❌ INCORRECTO (causa SyntaxError)
f"servicetitan_{company_project_id.replace('-', '_')}"

# ✅ CORRECTO
company_project_id_clean = company_project_id.replace('-', '_')
dataset_name = f"servicetitan_{company_project_id_clean}"
```


In [22]:
# 🔍 PRUEBA 6: VERIFICAR TABLA companies_consolidated

def test_companies_consolidated_table():
    """Verificar la tabla de seguimiento companies_consolidated"""
    try:
        #table_ref = f"{PROJECT_CENTRAL}.settings.companies_consolidated"
        table_ref = f"platform-partners-des.settings.companies_consolidated"

        print(f"🔍 Verificando tabla: {table_ref}")

        # Verificar que la tabla existe
        table = client.get_table(table_ref)
        print(f"✅ Tabla companies_consolidated encontrada")

        # Contar registros
        count_query = f"SELECT COUNT(*) as total FROM `{table_ref}`"
        count_result = client.query(count_query).result()
        total_records = list(count_result)[0].total

        print(f"📊 Total de registros: {total_records}")

        # Mostrar algunos registros
        if total_records > 0:
            sample_query = f"SELECT * FROM `{table_ref}` LIMIT 5"
            sample_result = client.query(sample_query).result()
            sample_df = pd.DataFrame([dict(row) for row in sample_result])

            print("\n📋 Muestra de registros:")
            print(sample_df)

        return True

    except Exception as e:
        print(f"❌ Error verificando tabla companies_consolidated: {str(e)}")
        return False

# Ejecutar prueba
test_companies_consolidated_table()


🔍 Verificando tabla: platform-partners-des.settings.companies_consolidated
✅ Tabla companies_consolidated encontrada
📊 Total de registros: 1260

📋 Muestra de registros:
   company_id             table_name  consolidated_status  \
0          29      campaign_category                    0   
1          21          estimate_item                    1   
2          29               campaign                    0   
3          21          business_unit                    0   
4          21  campaign_phone_number                    1   

                        created_at                       updated_at  \
0 2025-09-25 19:22:50.843896+00:00 2025-10-06 22:57:43.904947+00:00   
1 2025-09-25 19:41:13.276043+00:00 2025-10-05 21:36:42.314903+00:00   
2 2025-09-25 19:20:09.385570+00:00 2025-10-06 22:53:30.356878+00:00   
3 2025-09-25 19:14:21.675067+00:00 2025-10-06 22:46:22.787316+00:00   
4 2025-09-25 19:27:15.360686+00:00 2025-10-05 21:12:12.912451+00:00   

  error_message                      

True

In [23]:
# 🔍 PRUEBA 7: RESUMEN Y DIAGNÓSTICO COMPLETO

def run_complete_diagnostic():
    """Ejecutar diagnóstico completo del Paso 2"""
    print("🔍 DIAGNÓSTICO COMPLETO DEL PASO 2")
    print("=" * 50)

    # 1. Verificar tabla companies
    print("\n1️⃣ Verificando tabla companies...")
    companies_ok = test_companies_table()

    # 2. Probar get_companies_by_status
    print("\n2️⃣ Probando get_companies_by_status...")
    companies_df = test_get_companies_by_status(0)

    # 3. Verificar companies_consolidated
    print("\n3️⃣ Verificando tabla companies_consolidated...")
    consolidated_ok = test_companies_consolidated_table()

    # 4. Si hay compañías, probar análisis
    if not companies_df.empty:
        print("\n4️⃣ Probando análisis de campos...")
        first_company = companies_df.iloc[0]
        field_types = test_field_analysis(first_company['company_project_id'], "appointment")

        print("\n5️⃣ Probando generación de vista Silver...")
        sql_content = test_silver_view_generation(
            first_company['company_project_id'],
            first_company['company_name'],
            "appointment"
        )

    # Resumen final
    print("\n" + "=" * 50)
    print("📊 RESUMEN DEL DIAGNÓSTICO:")
    print(f"✅ Tabla companies: {'OK' if companies_ok else 'ERROR'}")
    print(f"✅ Compañías encontradas: {len(companies_df)}")
    print(f"✅ Tabla companies_consolidated: {'OK' if consolidated_ok else 'ERROR'}")

    if companies_df.empty:
        print("\n⚠️ PROBLEMA IDENTIFICADO: No hay compañías con status 0")
        print("💡 SOLUCIÓN: Verificar el campo company_consolidated_status en la tabla companies")

    return {
        'companies_ok': companies_ok,
        'companies_count': len(companies_df),
        'consolidated_ok': consolidated_ok
    }

# Ejecutar diagnóstico completo
diagnostic_results = run_complete_diagnostic()


🔍 DIAGNÓSTICO COMPLETO DEL PASO 2

1️⃣ Verificando tabla companies...
🔍 Verificando tabla: platform-partners-des.settings.companies
✅ Tabla companies encontrada: 17 campos

📋 Campos de la tabla:
  - company_id: INTEGER
  - company_name: STRING
  - company_new_name: STRING
  - company_join_date: DATE
  - company_state: STRING
  - company_timezone: STRING
  - company_project_id: STRING
  - company_fivetran_status: BOOLEAN
  - app_id: STRING
  - client_id: STRING
  - client_secret: STRING
  - tenant_id: INTEGER
  - app_key: STRING
  - company_bigquery_status: BOOLEAN
  - company_ltm_status: INTEGER
  - company_ltm_datamaster_sheetid: STRING
  - company_consolidated_status: INTEGER

✅ Campo 'company_consolidated_status' encontrado

2️⃣ Probando get_companies_by_status...
🔄 Ejecutando consulta: 
            SELECT 
                company_id,
                company_name,
                company_project_id,
                company_consolidated_status
            FROM `platform-partners-des.