In [None]:
import pymssql, os, warnings
import pandas as pd
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from dotenv import load_dotenv

warnings.filterwarnings('ignore')

In [2]:
DB_CONFIG = {
    'server': os.getenv('DB_SERVER', 'localhost'),
    'port': os.getenv('DB_PORT', '1433'),
    'user': os.getenv('DB_USER'),
    'password': os.getenv('DB_PASSWORD'),
    'database': os.getenv('DB_NAME', 'ClimateDB')
}

In [3]:
def get_connection():
    """Obtiene una conexi√≥n a la base de datos."""
    return pymssql.connect(**DB_CONFIG)

# **Check connection**

In [None]:
try:
    conn = get_connection()
    cursor = conn.cursor()
    cursor.execute("SELECT DB_NAME()")
    db_name = cursor.fetchone()[0]
    print(f"‚úÖ Conectado a: {db_name}")
    print(f"   Server: {DB_CONFIG['server']}:{DB_CONFIG['port']}")
    conn.close()
except Exception as e:
    print(f"‚ùå Error de conexi√≥n: {e}")

‚úÖ Conectado a: ClimateDB
   Server: localhost:1433


In [5]:
conn = get_connection()
cursor = conn.cursor()

cursor.execute("""
    SELECT TABLE_NAME 
    FROM INFORMATION_SCHEMA.TABLES 
    WHERE TABLE_TYPE = 'BASE TABLE'
""")

print("üìä Tablas en ClimateDB:")
for row in cursor.fetchall():
    print(f"   ‚Ä¢ {row[0]}")

conn.close()

üìä Tablas en ClimateDB:
   ‚Ä¢ climate_observations


In [7]:
conn = get_connection()
cursor = conn.cursor()

tables = ['climate_observations', 'climate_predictions', 'model_registry', 'ingestion_logs']

print("üìà Conteo de registros:\n")
for table in tables:
    try:
        cursor.execute(f"SELECT COUNT(*) FROM {table}")
        count = cursor.fetchone()[0]
        print(f"   {table}: {count:,} registros")
    except:
        print(f"   {table}: ‚ùå No existe")

conn.close()

üìà Conteo de registros:

   climate_observations: 1,139,112 registros
   climate_predictions: ‚ùå No existe
   model_registry: ‚ùå No existe
   ingestion_logs: ‚ùå No existe


In [8]:
conn = get_connection()

query = """
    SELECT 
        municipio,
        COUNT(*) as registros,
        MIN(datetime) as fecha_inicio,
        MAX(datetime) as fecha_fin,
        AVG(wind_speed_10m) as avg_wind_speed,
        AVG(temperature_2m) as avg_temperature
    FROM climate_observations
    GROUP BY municipio
    ORDER BY municipio
"""

df_stats = pd.read_sql(query, conn)
conn.close()

print("üìä Resumen de datos por municipio:\n")
display(df_stats)

  df_stats = pd.read_sql(query, conn)


üìä Resumen de datos por municipio:



Unnamed: 0,municipio,registros,fecha_inicio,fecha_fin,avg_wind_speed,avg_temperature
0,albania,87624,2015-12-21,2025-12-18 23:00:00,9.339294,27.289533
1,barrancas,87624,2015-12-21,2025-12-18 23:00:00,11.830405,27.870796
2,distraccion,87624,2015-12-21,2025-12-18 23:00:00,13.237239,27.878198
3,el_molino,87624,2015-12-21,2025-12-18 23:00:00,12.437099,27.690288
4,fonseca,87624,2015-12-21,2025-12-18 23:00:00,12.658548,28.009259
5,hatonuevo,87624,2015-12-21,2025-12-18 23:00:00,9.336363,26.887922
6,la_jagua_del_pilar,87624,2015-12-21,2025-12-18 23:00:00,14.688101,28.374023
7,maicao,87624,2015-12-21,2025-12-18 23:00:00,15.349213,27.639091
8,manaure,87624,2015-12-21,2025-12-18 23:00:00,17.624694,28.551882
9,mingueo,87624,2015-12-21,2025-12-18 23:00:00,8.296277,26.424275


In [9]:
conn = get_connection()

query = """
    SELECT TOP 10 
        municipio, datetime, wind_speed_10m, 
        temperature_2m, precipitation
    FROM climate_observations
    ORDER BY datetime DESC
"""

df_recent = pd.read_sql(query, conn)
conn.close()

print("üïê √öltimos 10 registros:\n")
display(df_recent)

  df_recent = pd.read_sql(query, conn)


üïê √öltimos 10 registros:



Unnamed: 0,municipio,datetime,wind_speed_10m,temperature_2m,precipitation
0,maicao,2025-12-18 23:00:00,6.4,24.4,0.0
1,albania,2025-12-18 23:00:00,6.6,22.9,0.0
2,hatonuevo,2025-12-18 23:00:00,5.1,21.6,0.0
3,manaure,2025-12-18 23:00:00,13.4,27.4,0.0
4,mingueo,2025-12-18 23:00:00,5.0,25.8,0.0
5,barrancas,2025-12-18 23:00:00,5.1,21.8,0.0
6,riohacha,2025-12-18 23:00:00,12.8,26.6,0.0
7,distraccion,2025-12-18 23:00:00,7.3,24.0,0.0
8,san_juan_del_cesar,2025-12-18 23:00:00,8.6,25.7,0.0
9,el_molino,2025-12-18 23:00:00,8.6,25.4,0.0


In [10]:
conn = get_connection()

# Verificar valores nulos
query_nulls = """
    SELECT 
        SUM(CASE WHEN wind_speed_10m IS NULL THEN 1 ELSE 0 END) as null_wind,
        SUM(CASE WHEN temperature_2m IS NULL THEN 1 ELSE 0 END) as null_temp,
        SUM(CASE WHEN precipitation IS NULL THEN 1 ELSE 0 END) as null_precip,
        COUNT(*) as total
    FROM climate_observations
"""

df_nulls = pd.read_sql(query_nulls, conn)
conn.close()

print("üîç Verificaci√≥n de valores nulos:\n")
print(f"   Wind Speed NULL: {df_nulls['null_wind'].iloc[0]:,}")
print(f"   Temperature NULL: {df_nulls['null_temp'].iloc[0]:,}")
print(f"   Precipitation NULL: {df_nulls['null_precip'].iloc[0]:,}")
print(f"   Total registros: {df_nulls['total'].iloc[0]:,}")

üîç Verificaci√≥n de valores nulos:

   Wind Speed NULL: 0
   Temperature NULL: 0
   Precipitation NULL: 0
   Total registros: 1,139,112


  df_nulls = pd.read_sql(query_nulls, conn)


In [11]:
conn = get_connection()

query = """
    SELECT 
        MIN(wind_speed_10m) as min_wind,
        MAX(wind_speed_10m) as max_wind,
        AVG(wind_speed_10m) as avg_wind,
        MIN(temperature_2m) as min_temp,
        MAX(temperature_2m) as max_temp,
        AVG(temperature_2m) as avg_temp
    FROM climate_observations
"""

df_desc = pd.read_sql(query, conn)
conn.close()

print("üìà Estad√≠sticas descriptivas:\n")
print(f"   üå¨Ô∏è Wind Speed: min={df_desc['min_wind'].iloc[0]:.1f}, max={df_desc['max_wind'].iloc[0]:.1f}, avg={df_desc['avg_wind'].iloc[0]:.1f}")
print(f"   üå°Ô∏è Temperature: min={df_desc['min_temp'].iloc[0]:.1f}, max={df_desc['max_temp'].iloc[0]:.1f}, avg={df_desc['avg_temp'].iloc[0]:.1f}")

üìà Estad√≠sticas descriptivas:

   üå¨Ô∏è Wind Speed: min=0.0, max=45.4, avg=13.1
   üå°Ô∏è Temperature: min=18.2, max=40.5, avg=27.8


  df_desc = pd.read_sql(query, conn)
