# üì• Climate Data Download - La Guajira (10 Years)

**Project:** GuajiraClimateAgents  
**Author:** Eder Arley Le√≥n G√≥mez  
**GitHub:** https://github.com/ealeongomez  

This notebook downloads historical climate data from the last **10 years** for all municipalities in La Guajira using the Open-Meteo API.

Data is saved to `data/wind/{municipality}.csv`

## 1. Initial Setup

In [None]:
# Initial configuration
import sys
import logging
from pathlib import Path
from datetime import datetime, timedelta

import pandas as pd

# Add src to path for imports
PROJECT_ROOT = Path.cwd().parent
sys.path.insert(0, str(PROJECT_ROOT))

# Configure logging to see progress
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

# Import the class
from src.utils.climate_data import ClimateDataFetcher

print("‚úÖ Modules loaded successfully")
print(f"üìÅ Project: {PROJECT_ROOT}")

## 2. Download Parameters

In [None]:
# ===========================
# CONFIGURATION PARAMETERS
# ===========================

# Time range: last 10 years
YEARS_BACK = 10
END_DATE = datetime.now()
START_DATE = END_DATE - timedelta(days=YEARS_BACK * 365)

# Output directory
OUTPUT_DIR = PROJECT_ROOT / "data" / "wind"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Municipalities to download (all)
MUNICIPIOS = ClimateDataFetcher.get_available_municipios()

# Show configuration
print("="*60)
print("üìã DOWNLOAD CONFIGURATION")
print("="*60)
print(f"üìÖ Period: {START_DATE.strftime('%Y-%m-%d')} ‚Üí {END_DATE.strftime('%Y-%m-%d')}")
print(f"üìÜ Total: {YEARS_BACK} years (~{YEARS_BACK * 365 * 24:,} records per municipality)")
print(f"üìÇ Directory: {OUTPUT_DIR}")
print(f"üó∫Ô∏è  Municipalities: {len(MUNICIPIOS)}")
print()
for i, m in enumerate(MUNICIPIOS, 1):
    print(f"   {i:2d}. {m}")

## 3. Data Download (10 Years per Municipality)

‚ö†Ô∏è **Note:** This operation may take several minutes due to:
- API rate limiting (pauses between requests)
- Data volume (10 years √ó 13 municipalities)

Estimated time: ~15-30 minutes

In [None]:
# ===========================
# DATA DOWNLOAD
# ===========================

results = []
total_records = 0

print("üöÄ Starting climate data download...")
print("="*60)

for idx, municipio in enumerate(MUNICIPIOS, 1):
    print(f"\n[{idx}/{len(MUNICIPIOS)}] üìç {municipio.upper()}")
    print("-"*40)
    
    try:
        # Create fetcher with custom directory
        fetcher = ClimateDataFetcher(
            municipio=municipio,
            start_date=START_DATE,
            end_date=END_DATE,
            data_dir=OUTPUT_DIR,
            wind_only=False  # Download all variables
        )
        
        # Execute download
        df = fetcher.fetch(block_days=180)  # 6-month blocks
        
        # Save with municipality name
        output_path = OUTPUT_DIR / f"{municipio}.csv"
        df.to_csv(output_path, index=False)
        
        # Register result
        records = len(df)
        total_records += records
        
        results.append({
            "municipality": municipio,
            "records": records,
            "start": df["datetime"].min() if not df.empty else None,
            "end": df["datetime"].max() if not df.empty else None,
            "file": str(output_path),
            "status": "‚úÖ OK"
        })
        
        print(f"   ‚úÖ {records:,} records downloaded")
        print(f"   üíæ Saved: {output_path.name}")
        
    except Exception as e:
        results.append({
            "municipality": municipio,
            "records": 0,
            "start": None,
            "end": None,
            "file": None,
            "status": f"‚ùå Error: {str(e)[:50]}"
        })
        print(f"   ‚ùå Error: {e}")

print("\n" + "="*60)
print("‚úÖ DOWNLOAD COMPLETED")
print("="*60)

## 4. Download Summary

In [None]:
# Create DataFrame with results
df_results = pd.DataFrame(results)

# Statistics
successful = df_results[df_results["status"] == "‚úÖ OK"]
failed = df_results[df_results["status"] != "‚úÖ OK"]

print("üìä FINAL SUMMARY")
print("="*60)
print(f"‚úÖ Successful municipalities: {len(successful)}/{len(MUNICIPIOS)}")
print(f"‚ùå Failed municipalities: {len(failed)}")
print(f"üìà Total records: {total_records:,}")
print(f"üìÇ Location: {OUTPUT_DIR}")
print()

# Show table
df_results

## 5. File Verification

In [None]:
# List generated files
import os

print("üìÅ Files in data/wind/")
print("="*60)

files = sorted(OUTPUT_DIR.glob("*.csv"))
total_size = 0

for file in files:
    size_mb = file.stat().st_size / (1024 * 1024)
    total_size += size_mb
    print(f"   üìÑ {file.name:30s} {size_mb:6.2f} MB")

print("-"*60)
print(f"   üì¶ Total: {len(files)} files, {total_size:.2f} MB")

## 6. Data Preview

In [None]:
# Load and display example from one municipality
example_file = OUTPUT_DIR / "riohacha.csv"

if example_file.exists():
    df_example = pd.read_csv(example_file)
    df_example["datetime"] = pd.to_datetime(df_example["datetime"])
    
    print(f"üìä Preview: {example_file.name}")
    print(f"   Records: {len(df_example):,}")
    print(f"   Columns: {list(df_example.columns)}")
    print(f"   Period: {df_example['datetime'].min()} ‚Üí {df_example['datetime'].max()}")
    print()
    display(df_example.head(10))
else:
    print("‚ö†Ô∏è Example file not found")

In [None]:
# Descriptive statistics
if 'df_example' in dir():
    df_example.describe()