# **Libraries**

In [26]:
import pymssql
import pandas as pd
from pathlib import Path
from datetime import datetime



In [20]:
DATA_DIR = Path("../data/wind")

# **Creation of Data Base**

In [7]:
conn = pymssql.connect(
    server='localhost',
    port='1433',
    user='sa',
    password='SQLServer.1234',
    autocommit=True 
)

cursor = conn.cursor()
print("‚úÖ Conectado al servidor SQL Server")

‚úÖ Conectado al servidor SQL Server


In [9]:
cursor = conn.cursor()
cursor.execute("SELECT name FROM sys.databases WHERE name = 'ClimateDB'")
exists = cursor.fetchone()

if not exists:
    # Crear la base de datos (ejecutar solo si no existe)
    cursor.execute("CREATE DATABASE ClimateDB")
    print("‚úÖ Base de datos ClimateDB creada")
else:
    print("‚úÖ Base de datos ClimateDB ya existe")

conn.close()

‚úÖ Base de datos ClimateDB creada


In [10]:
conn = pymssql.connect(
    server='localhost',
    port='1433',
    user='sa',
    password='SQLServer.1234',
    database='ClimateDB'
)

In [11]:
cursor = conn.cursor()
cursor.execute("SELECT DB_NAME()")
db_name = cursor.fetchone()[0]
print(f"‚úÖ Conectado a: {db_name}")

‚úÖ Conectado a: ClimateDB


# **Create tables**

In [12]:
# Tabla principal de observaciones clim√°ticas
cursor.execute("""
    IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'climate_observations')
    CREATE TABLE climate_observations (
        id BIGINT IDENTITY(1,1) PRIMARY KEY,
        municipio NVARCHAR(50) NOT NULL,
        datetime DATETIME2 NOT NULL,
        wind_speed_10m FLOAT,
        wind_direction_10m INT,
        temperature_2m FLOAT,
        relative_humidity_2m INT,
        precipitation FLOAT,
        created_at DATETIME2 DEFAULT GETUTCDATE(),
        
        CONSTRAINT UQ_municipio_datetime UNIQUE (municipio, datetime)
    )
""")
print("‚úÖ Tabla climate_observations creada")

‚úÖ Tabla climate_observations creada


In [13]:
cursor.execute("""
    IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'climate_predictions')
    CREATE TABLE climate_predictions (
        id BIGINT IDENTITY(1,1) PRIMARY KEY,
        municipio NVARCHAR(50) NOT NULL,
        prediction_datetime DATETIME2 NOT NULL,
        target_variable NVARCHAR(50) NOT NULL,
        predicted_value FLOAT NOT NULL,
        model_version NVARCHAR(50) NOT NULL,
        created_at DATETIME2 DEFAULT GETUTCDATE()
    )
""")
print("‚úÖ Tabla climate_predictions creada")

‚úÖ Tabla climate_predictions creada


In [14]:
cursor.execute("""
    IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'model_registry')
    CREATE TABLE model_registry (
        id INT IDENTITY(1,1) PRIMARY KEY,
        model_name NVARCHAR(100) NOT NULL,
        model_version NVARCHAR(50) NOT NULL,
        target_variable NVARCHAR(50) NOT NULL,
        municipio NVARCHAR(50),
        model_path NVARCHAR(255) NOT NULL,
        metrics NVARCHAR(MAX),
        trained_at DATETIME2 NOT NULL,
        is_active BIT DEFAULT 0,
        created_at DATETIME2 DEFAULT GETUTCDATE()
    )
""")
print("‚úÖ Tabla model_registry creada")


‚úÖ Tabla model_registry creada


In [15]:
cursor.execute("""
    IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'ingestion_logs')
    CREATE TABLE ingestion_logs (
        id BIGINT IDENTITY(1,1) PRIMARY KEY,
        municipio NVARCHAR(50) NOT NULL,
        records_inserted INT,
        records_updated INT,
        start_time DATETIME2,
        end_time DATETIME2,
        status NVARCHAR(20),
        error_message NVARCHAR(MAX),
        created_at DATETIME2 DEFAULT GETUTCDATE()
    )
""")
print("‚úÖ Tabla ingestion_logs creada")

‚úÖ Tabla ingestion_logs creada


# **Check tables**

In [17]:
cursor = conn.cursor()
cursor.execute("""
    SELECT TABLE_NAME 
    FROM INFORMATION_SCHEMA.TABLES 
    WHERE TABLE_TYPE = 'BASE TABLE'
""")

print("üìä Tablas en ClimateDB:")
for row in cursor.fetchall():
    print(f"   ‚Ä¢ {row[0]}")

conn.close()

üìä Tablas en ClimateDB:
   ‚Ä¢ climate_observations
   ‚Ä¢ climate_predictions
   ‚Ä¢ model_registry
   ‚Ä¢ ingestion_logs


# **Load history datase**

In [21]:
csv_files = sorted(DATA_DIR.glob("*.csv"))
print(f"üìÅ Encontrados {len(csv_files)} archivos CSV\n")

üìÅ Encontrados 13 archivos CSV



In [30]:
import pymssql
import pandas as pd
from pathlib import Path

# ============================================
# CONFIGURACI√ìN
# ============================================
DATA_DIR = Path("../data/wind")
SERVER = 'localhost'
PORT = '1433'
USER = 'sa'
PASSWORD = 'SQLServer.1234'
DATABASE = 'ClimateDB'

# ============================================
# PASO 1: Crear conexi√≥n con autocommit
# ============================================
conn = pymssql.connect(
    server=SERVER,
    port=PORT,
    user=USER,
    password=PASSWORD,
    database=DATABASE,
    autocommit=True  # Importante para DDL
)
cursor = conn.cursor()
print("‚úÖ Conectado a ClimateDB")

# ============================================
# PASO 2: Verificar/Crear la tabla
# ============================================
cursor.execute("""
    IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'climate_observations')
    BEGIN
        CREATE TABLE climate_observations (
            id BIGINT IDENTITY(1,1) PRIMARY KEY,
            municipio NVARCHAR(50) NOT NULL,
            datetime DATETIME2 NOT NULL,
            wind_speed_10m FLOAT,
            wind_direction_10m INT,
            temperature_2m FLOAT,
            relative_humidity_2m INT,
            precipitation FLOAT,
            created_at DATETIME2 DEFAULT GETUTCDATE(),
            CONSTRAINT UQ_municipio_datetime UNIQUE (municipio, datetime)
        )
        PRINT 'Tabla creada'
    END
""")
print("‚úÖ Tabla climate_observations verificada/creada")

# Verificar que la tabla existe
cursor.execute("SELECT COUNT(*) FROM sys.tables WHERE name = 'climate_observations'")
if cursor.fetchone()[0] == 1:
    print("‚úÖ Confirmado: tabla climate_observations existe")
else:
    print("‚ùå ERROR: La tabla no se cre√≥ correctamente")
    conn.close()
    raise Exception("Tabla no existe")

# ============================================
# PASO 3: Cargar datos hist√≥ricos
# ============================================
csv_files = sorted(DATA_DIR.glob("*.csv"))
print(f"\nüìÅ Encontrados {len(csv_files)} archivos CSV\n")

total_inserted = 0

for csv_file in csv_files:
    municipio = csv_file.stem
    print(f"üìç Procesando: {municipio}")
    
    # Leer CSV
    df = pd.read_csv(csv_file)
    df['datetime'] = pd.to_datetime(df['datetime'])
    
    # Preparar datos
    records = [
        (
            municipio,
            row['datetime'].to_pydatetime(),
            float(row['wind_speed_10m']) if pd.notna(row['wind_speed_10m']) else None,
            int(row['wind_direction_10m']) if pd.notna(row['wind_direction_10m']) else None,
            float(row['temperature_2m']) if pd.notna(row['temperature_2m']) else None,
            int(row['relative_humidity_2m']) if pd.notna(row['relative_humidity_2m']) else None,
            float(row['precipitation']) if pd.notna(row['precipitation']) else None
        )
        for _, row in df.iterrows()
    ]
    
    # Insertar en lotes
    batch_size = 5000
    inserted = 0
    errors = 0
    
    for i in range(0, len(records), batch_size):
        batch = records[i:i+batch_size]
        try:
            cursor.executemany("""
                INSERT INTO climate_observations 
                (municipio, datetime, wind_speed_10m, wind_direction_10m, 
                 temperature_2m, relative_humidity_2m, precipitation)
                VALUES (%s, %s, %s, %s, %s, %s, %s)
            """, batch)
            inserted += len(batch)
            
            # Progreso cada 5 lotes
            if (i // batch_size) % 5 == 0 and i > 0:
                print(f"   ‚è≥ {inserted:,}/{len(records):,} registros...")
                
        except Exception as e:
            error_msg = str(e)
            if "UQ_municipio_datetime" in error_msg or "UNIQUE KEY" in error_msg:
                # Duplicados - insertar uno por uno
                for record in batch:
                    try:
                        cursor.execute("""
                            INSERT INTO climate_observations 
                            (municipio, datetime, wind_speed_10m, wind_direction_10m, 
                             temperature_2m, relative_humidity_2m, precipitation)
                            VALUES (%s, %s, %s, %s, %s, %s, %s)
                        """, record)
                        inserted += 1
                    except:
                        errors += 1
            else:
                print(f"   ‚ö†Ô∏è Error en lote: {error_msg[:100]}")
                errors += len(batch)
    
    print(f"   ‚úÖ {inserted:,} insertados, {errors:,} omitidos (duplicados)")
    total_inserted += inserted

conn.close()
print(f"\nüéâ Total: {total_inserted:,} registros cargados a la base de datos")

‚úÖ Conectado a ClimateDB
‚úÖ Tabla climate_observations verificada/creada
‚úÖ Confirmado: tabla climate_observations existe

üìÅ Encontrados 13 archivos CSV

üìç Procesando: albania
   ‚è≥ 30,000/87,624 registros...
   ‚è≥ 55,000/87,624 registros...
   ‚è≥ 80,000/87,624 registros...
   ‚úÖ 87,624 insertados, 0 omitidos (duplicados)
üìç Procesando: barrancas
   ‚è≥ 30,000/87,624 registros...
   ‚è≥ 55,000/87,624 registros...
   ‚è≥ 80,000/87,624 registros...
   ‚úÖ 87,624 insertados, 0 omitidos (duplicados)
üìç Procesando: distraccion
   ‚è≥ 30,000/87,624 registros...
   ‚è≥ 55,000/87,624 registros...
   ‚è≥ 80,000/87,624 registros...
   ‚úÖ 87,624 insertados, 0 omitidos (duplicados)
üìç Procesando: el_molino
   ‚è≥ 30,000/87,624 registros...
   ‚è≥ 55,000/87,624 registros...
   ‚è≥ 80,000/87,624 registros...
   ‚úÖ 87,624 insertados, 0 omitidos (duplicados)
üìç Procesando: fonseca
   ‚è≥ 30,000/87,624 registros...
   ‚è≥ 55,000/87,624 registros...
   ‚è≥ 80,000/87,624 registros.