### Schema for Downloading USD Rates and Stock Market Securities

The objective of this module is to develop a web scraping pipeline to retrieve the USD exchange rate and specific Colombian state-owned stocks (as well as sovereign debt). This data will serve as the foundation for rigorous statistical analysis and variance modeling

In [1]:
###################
#### Libraries ####
###################


from dataclasses import dataclass
from datetime import datetime
from typing import Optional
import requests
from bs4 import BeautifulSoup

In [2]:
###########
## Dolar ##
###########


url  =  'https://www.google.com/finance/quote/EUR-COP?sa=X&ved=2ahUKEwi9__3T0OORAxVCaDABHW4FOYMQmY0JegQIDRAu' 
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

### Refactoring Code - Dollar Case -

In [3]:

@dataclass
class DollarQuote:
    price: float
    timestamp: str
    source: str = "Google Finance"
    
    @property
    def formatted_price(self) -> str:
        return f"${self.price:,.2f} COP"
    
    def to_dict(self) -> dict:
        return {
            "price": self.price,
            "timestamp": self.timestamp,
            "source": self.source,
        }


class DollarScraperError(Exception):
    pass


class DollarScraper:
    
    URL = "https://www.google.com/finance/quote/USD-COP"
    
    HEADERS = {
        "User-Agent": (
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/120.0.0.0 Safari/537.36"
        )
    }
    
    def __init__(self, timeout: int = 10):
        self.timeout = timeout
    
    def _fetch_page(self) -> BeautifulSoup:
        try:
            response = requests.get(
                self.URL,
                headers=self.HEADERS,
                timeout=self.timeout
            )
            response.raise_for_status()
            return BeautifulSoup(response.content, "html.parser")
        
        except requests.RequestException as e:
            raise DollarScraperError(f"Error al obtener la pagina: {e}")
    
    def _parse_price(self, soup: BeautifulSoup) -> float:
        element = soup.find("div", class_="YMlKec fxKbKc")
        
        if element is None:
            raise DollarScraperError("No se encontro el elemento del precio")
        
        price_text = element.text.strip()
        price_clean = price_text.replace("$", "").replace(",", "")
        
        try:
            return float(price_clean)
        except ValueError:
            raise DollarScraperError(f"No se pudo convertir el precio: {price_text}")
    
    def get_quote(self) -> DollarQuote:
        soup = self._fetch_page()
        price = self._parse_price(soup)
        
        return DollarQuote(
            price=price,
            timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        )
    
    def get_price(self) -> float:
        return self.get_quote().price


def get_dollar_price() -> float:
    return DollarScraper().get_price()


def get_dollar_quote() -> DollarQuote:
    return DollarScraper().get_quote()


def get_dollar_dict() -> dict:
    return DollarScraper().get_quote().to_dict()


if __name__ == "__main__":
    print("Obteniendo cotizacion del dolar...")
    print("=" * 50)
    
    try:
        scraper = DollarScraper()
        quote = scraper.get_quote()
        
        print(f"Precio:    {quote.formatted_price}")
        print(f"Timestamp: {quote.timestamp}")
        
    except DollarScraperError as e:
        print(f"Error: {e}")

Obteniendo cotizacion del dolar...
Precio:    $3,729.50 COP
Timestamp: 2025-12-29 15:43:22


#### Euro


In [4]:

@dataclass
class EuroQuote:
    price: float
    timestamp: str
    source: str = "Google Finance"
    
    @property
    def formatted_price(self) -> str:
        return f"${self.price:,.2f} COP"
    
    def to_dict(self) -> dict:
        return {
            "price": self.price,
            "timestamp": self.timestamp,
            "source": self.source,
        }


class EuroScraperError(Exception):
    pass


class EuroScraper:
    
    URL = "https://www.google.com/finance/quote/EUR-COP"
    
    HEADERS = {
        "User-Agent": (
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/120.0.0.0 Safari/537.36"
        )
    }
    
    def __init__(self, timeout: int = 10):
        self.timeout = timeout
    
    def _fetch_page(self) -> BeautifulSoup:
        try:
            response = requests.get(
                self.URL,
                headers=self.HEADERS,
                timeout=self.timeout
            )
            response.raise_for_status()
            return BeautifulSoup(response.content, "html.parser")
        
        except requests.RequestException as e:
            raise EuroScraperError(f"Error al obtener la pagina: {e}")
    
    def _parse_price(self, soup: BeautifulSoup) -> float:
        element = soup.find("div", class_="YMlKec fxKbKc")
        
        if element is None:
            raise EuroScraperError("No se encontro el elemento del precio")
        
        price_text = element.text.strip()
        price_clean = price_text.replace("$", "").replace(",", "")
        
        try:
            return float(price_clean)
        except ValueError:
            raise EuroScraperError(f"No se pudo convertir el precio: {price_text}")
    
    def get_quote(self) -> EuroQuote:
        soup = self._fetch_page()
        price = self._parse_price(soup)
        
        return EuroQuote(
            price=price,
            timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        )
    
    def get_price(self) -> float:
        return self.get_quote().price


def get_euro_price() -> float:
    return EuroScraper().get_price()


def get_euro_quote() -> EuroQuote:
    return EuroScraper().get_quote()


def get_euro_dict() -> dict:
    return EuroScraper().get_quote().to_dict()


if __name__ == "__main__":
    print("Obteniendo cotizacion del euro...")
    print("=" * 50)
    
    try:
        scraper = EuroScraper()
        quote = scraper.get_quote()
        
        print(f"Precio:    {quote.formatted_price}")
        print(f"Timestamp: {quote.timestamp}")
        
    except EuroScraperError as e:
        print(f"Error: {e}")

Obteniendo cotizacion del euro...
Precio:    $4,350.60 COP
Timestamp: 2025-12-29 15:43:35


### Datos del mercado 

In [78]:
#!pip install yfinance

In [None]:


from dataclasses import dataclass, field
from datetime import datetime, timedelta
from typing import Optional
from enum import Enum
import yfinance as yf
import pandas as pd


class IndicatorTier(Enum):
    """Clasificacion de importancia de indicadores."""
    CRITICAL = 1    # Impacto directo en USD/COP
    IMPORTANT = 2   # Muy relevante para el analisis
    CONTEXT = 3     # Contexto adicional


@dataclass
class MarketIndicator:
    """Representa un indicador de mercado."""
    name: str
    symbol: str
    value: Optional[float] = None
    previous_close: Optional[float] = None
    change_value: Optional[float] = None
    change_pct: Optional[float] = None
    tier: IndicatorTier = IndicatorTier.CONTEXT
    timestamp: Optional[str] = None
    error: Optional[str] = None
    
    @property
    def is_valid(self) -> bool:
        """Indica si el indicador tiene datos validos."""
        return self.value is not None and self.error is None
    
    def to_dict(self) -> dict:
        """Convierte a diccionario."""
        return {
            "name": self.name,
            "symbol": self.symbol,
            "value": self.value,
            "previous_close": self.previous_close,
            "change_value": self.change_value,
            "change_pct": self.change_pct,
            "tier": self.tier.name,
            "timestamp": self.timestamp,
            "error": self.error,
        }


@dataclass
class MarketSnapshot:
    """Snapshot completo del mercado."""
    timestamp: str
    indicators: dict[str, MarketIndicator] = field(default_factory=dict)
    
    @property
    def critical_indicators(self) -> dict[str, MarketIndicator]:
        """Retorna solo indicadores criticos."""
        return {
            k: v for k, v in self.indicators.items() 
            if v.tier == IndicatorTier.CRITICAL
        }
    
    @property
    def valid_indicators(self) -> dict[str, MarketIndicator]:
        """Retorna solo indicadores con datos validos."""
        return {k: v for k, v in self.indicators.items() if v.is_valid}
    
    def to_dict(self) -> dict:
        """Convierte a diccionario."""
        return {
            "timestamp": self.timestamp,
            "indicators": {k: v.to_dict() for k, v in self.indicators.items()},
            "summary": {
                "total": len(self.indicators),
                "valid": len(self.valid_indicators),
                "critical": len(self.critical_indicators),
            }
        }


class YahooFinanceClient:
    """Cliente para obtener datos de Yahoo Finance."""
    
    # Configuracion de indicadores
    INDICATORS_CONFIG = {
        # Tier 1 - Criticos (Impacto directo en USD/COP)
        "petroleo_brent": {
            "symbol": "BZ=F",
            "tier": IndicatorTier.CRITICAL,
            "description": "Petroleo Brent - Principal exportacion de Colombia"
        },
        "dxy": {
            "symbol": "DX-Y.NYB",
            "tier": IndicatorTier.CRITICAL,
            "description": "Dollar Index - Fortaleza global del dolar"
        },
        "usd_cop": {
            "symbol": "COP=X",
            "tier": IndicatorTier.CRITICAL,
            "description": "Dolar en Colombia"
        },
        
        # Tier 2 - Importantes
        "vix": {
            "symbol": "^VIX",
            "tier": IndicatorTier.IMPORTANT,
            "description": "Indice de volatilidad - Aversion al riesgo"
        },
        "treasury_10y": {
            "symbol": "^TNX",
            "tier": IndicatorTier.IMPORTANT,
            "description": "Bono Tesoro EEUU 10 anos"
        },
        "treasury_2y": {
            "symbol": "^IRX",
            "tier": IndicatorTier.IMPORTANT,
            "description": "Bono Tesoro EEUU 2 anos"
        },
        "sp500": {
            "symbol": "^GSPC",
            "tier": IndicatorTier.IMPORTANT,
            "description": "S&P 500 - Sentimiento de mercado"
        },
        
        # Tier 3 - Contexto
        "petroleo_wti": {
            "symbol": "CL=F",
            "tier": IndicatorTier.CONTEXT,
            "description": "Petroleo WTI"
        },
        "oro": {
            "symbol": "GC=F",
            "tier": IndicatorTier.CONTEXT,
            "description": "Oro - Activo refugio"
        },
        "cafe": {
            "symbol": "KC=F",
            "tier": IndicatorTier.CONTEXT,
            "description": "Cafe - Exportacion colombiana"
        },
        "usd_mxn": {
            "symbol": "MXN=X",
            "tier": IndicatorTier.CONTEXT,
            "description": "Dolar en Mexico - Comparacion regional"
        },
        "usd_brl": {
            "symbol": "BRL=X",
            "tier": IndicatorTier.CONTEXT,
            "description": "Dolar en Brasil - Comparacion regional"
        },
        "usd_clp": {
            "symbol": "CLP=X",
            "tier": IndicatorTier.CONTEXT,
            "description": "Dolar en Chile - Comparacion regional"
        },
        "eur_usd": {
            "symbol": "EURUSD=X",
            "tier": IndicatorTier.CONTEXT,
            "description": "Euro vs Dolar"
        },
    }
    
    def __init__(self, timeout: int = 10):
        """
        Inicializa el cliente.
        
        Args:
            timeout: Timeout en segundos para las peticiones.
        """
        self.timeout = timeout
    
    def _fetch_single_indicator(self, name: str, config: dict) -> MarketIndicator:
        """
        Obtiene datos de un solo indicador.
        
        Args:
            name: Nombre del indicador.
            config: Configuracion del indicador.
            
        Returns:
            MarketIndicator con los datos o error.
        """
        symbol = config["symbol"]
        tier = config["tier"]
        
        try:
            ticker = yf.Ticker(symbol)
            info = ticker.fast_info
            
            price = info.last_price
            prev_close = info.previous_close
            
            if price is None:
                return MarketIndicator(
                    name=name,
                    symbol=symbol,
                    tier=tier,
                    error="No price data available"
                )
            
            change_value = None
            change_pct = None
            
            if prev_close and prev_close > 0:
                change_value = price - prev_close
                change_pct = (change_value / prev_close) * 100
            
            return MarketIndicator(
                name=name,
                symbol=symbol,
                value=round(price, 4),
                previous_close=round(prev_close, 4) if prev_close else None,
                change_value=round(change_value, 4) if change_value else None,
                change_pct=round(change_pct, 2) if change_pct else None,
                tier=tier,
                timestamp=datetime.now().isoformat(),
            )
            
        except Exception as e:
            return MarketIndicator(
                name=name,
                symbol=symbol,
                tier=tier,
                error=str(e),
                timestamp=datetime.now().isoformat(),
            )
    
    def get_indicator(self, name: str) -> MarketIndicator:
        """
        Obtiene un indicador especifico.
        
        Args:
            name: Nombre del indicador (ej: 'petroleo_brent', 'dxy').
            
        Returns:
            MarketIndicator con los datos.
            
        Raises:
            ValueError: Si el indicador no existe.
        """
        if name not in self.INDICATORS_CONFIG:
            raise ValueError(f"Indicador '{name}' no encontrado. "
                           f"Disponibles: {list(self.INDICATORS_CONFIG.keys())}")
        
        config = self.INDICATORS_CONFIG[name]
        return self._fetch_single_indicator(name, config)
    
    def get_indicators_by_tier(self, tier: IndicatorTier) -> dict[str, MarketIndicator]:
        """
        Obtiene todos los indicadores de un tier especifico.
        
        Args:
            tier: Nivel de importancia.
            
        Returns:
            Diccionario con los indicadores.
        """
        indicators = {}
        
        for name, config in self.INDICATORS_CONFIG.items():
            if config["tier"] == tier:
                indicators[name] = self._fetch_single_indicator(name, config)
        
        return indicators
    
    def get_critical_indicators(self) -> dict[str, MarketIndicator]:
        """Obtiene solo indicadores criticos (Tier 1)."""
        return self.get_indicators_by_tier(IndicatorTier.CRITICAL)
    
    def get_all_indicators(self) -> dict[str, MarketIndicator]:
        """Obtiene todos los indicadores configurados."""
        indicators = {}
        
        for name, config in self.INDICATORS_CONFIG.items():
            indicators[name] = self._fetch_single_indicator(name, config)
        
        return indicators
    
    def get_market_snapshot(self) -> MarketSnapshot:
        """
        Obtiene un snapshot completo del mercado.
        
        Returns:
            MarketSnapshot con todos los indicadores.
        """
        indicators = self.get_all_indicators()
        
        return MarketSnapshot(
            timestamp=datetime.now().isoformat(),
            indicators=indicators
        )
    
    def get_historical_data(
        self, 
        name: str, 
        days: int = 30
    ) -> Optional[pd.DataFrame]:
        """
        Obtiene datos historicos de un indicador.
        
        Args:
            name: Nombre del indicador.
            days: Numero de dias hacia atras.
            
        Returns:
            DataFrame con datos historicos o None si hay error.
        """
        if name not in self.INDICATORS_CONFIG:
            raise ValueError(f"Indicador '{name}' no encontrado.")
        
        symbol = self.INDICATORS_CONFIG[name]["symbol"]
        
        try:
            ticker = yf.Ticker(symbol)
            end_date = datetime.now()
            start_date = end_date - timedelta(days=days)
            
            history = ticker.history(start=start_date, end=end_date)
            
            if history.empty:
                return None
            
            return history[["Open", "High", "Low", "Close", "Volume"]]
            
        except Exception as e:
            print(f"Error obteniendo historico de {name}: {e}")
            return None
    
    def get_multiple_historical(
        self, 
        names: list[str], 
        days: int = 30
    ) -> dict[str, pd.DataFrame]:
        """
        Obtiene datos historicos de multiples indicadores.
        
        Args:
            names: Lista de nombres de indicadores.
            days: Numero de dias hacia atras.
            
        Returns:
            Diccionario con DataFrames por indicador.
        """
        result = {}
        
        for name in names:
            df = self.get_historical_data(name, days)
            if df is not None:
                result[name] = df
        
        return result


def get_market_context() -> dict:
    """
    Funcion helper para obtener contexto de mercado formateado.
    
    Returns:
        Diccionario con contexto listo para usar en prompts.
    """
    client = YahooFinanceClient()
    snapshot = client.get_market_snapshot()
    
    context = {
        "timestamp": snapshot.timestamp,
        "indicators": {},
    }
    
    for name, indicator in snapshot.valid_indicators.items():
        change_str = ""
        if indicator.change_pct is not None:
            sign = "+" if indicator.change_pct >= 0 else ""
            change_str = f" ({sign}{indicator.change_pct}%)"
        
        context["indicators"][name] = {
            "value": indicator.value,
            "change_pct": indicator.change_pct,
            "formatted": f"{indicator.value}{change_str}",
            "tier": indicator.tier.name,
        }
    
    return context


if __name__ == "__main__":
    client = YahooFinanceClient()
    
    print("=" * 60)
    print("INDICADORES CRITICOS")
    print("=" * 60)
    
    critical = client.get_critical_indicators()
    for name, indicator in critical.items():
        if indicator.is_valid:
            change = f"({indicator.change_pct:+.2f}%)" if indicator.change_pct else ""
            print(f"{name}: {indicator.value} {change}")
        else:
            print(f"{name}: ERROR - {indicator.error}")
    
    print("\n" + "=" * 60)
    print("SNAPSHOT COMPLETO")
    print("=" * 60)
    
    snapshot = client.get_market_snapshot()
    print(f"Total indicadores: {len(snapshot.indicators)}")
    print(f"Validos: {len(snapshot.valid_indicators)}")
    print(f"Criticos: {len(snapshot.critical_indicators)}")

INDICADORES CRITICOS
petroleo_brent: 61.36 (+0.85%)
dxy: 98.038 (-0.03%)
usd_cop: 3742.47 (+0.69%)

SNAPSHOT COMPLETO
Total indicadores: 14
Validos: 14
Criticos: 3


In [7]:
print(snapshot)

MarketSnapshot(timestamp='2025-12-29T15:44:14.936888', indicators={'petroleo_brent': MarketIndicator(name='petroleo_brent', symbol='BZ=F', value=61.36, previous_close=60.84, change_value=0.52, change_pct=0.85, tier=<IndicatorTier.CRITICAL: 1>, timestamp='2025-12-29T15:44:07.660465', error=None), 'dxy': MarketIndicator(name='dxy', symbol='DX-Y.NYB', value=98.038, previous_close=98.069, change_value=-0.031, change_pct=-0.03, tier=<IndicatorTier.CRITICAL: 1>, timestamp='2025-12-29T15:44:08.008971', error=None), 'usd_cop': MarketIndicator(name='usd_cop', symbol='COP=X', value=3742.47, previous_close=3716.95, change_value=25.52, change_pct=0.69, tier=<IndicatorTier.CRITICAL: 1>, timestamp='2025-12-29T15:44:08.346559', error=None), 'vix': MarketIndicator(name='vix', symbol='^VIX', value=14.01, previous_close=13.6, change_value=0.41, change_pct=3.01, tier=<IndicatorTier.IMPORTANT: 2>, timestamp='2025-12-29T15:44:08.948359', error=None), 'treasury_10y': MarketIndicator(name='treasury_10y', sym