In [15]:
import requests
from io import BytesIO
import pandas as pd
import yaml
import os

def get_variables_from_yaml():
    """
    Helper to read variables from YAML, same as in your ETL DAG.
    Looks first in /opt/secrets (container path), then local path for dev.
    """
    yaml_path = "/opt/secrets/variables.yaml"
    if os.path.exists(yaml_path):
        with open(yaml_path, "r") as f:
            return yaml.safe_load(f)
    else:
        yaml_path = "./airflow/secrets/variables.yaml"
        with open(yaml_path, "r") as f:
            return yaml.safe_load(f)

def load_and_get_df(url: str, alias: str) -> pd.DataFrame:
    '''
    Descarga un archivo CSV.GZ desde una URL, lo lee como pandas DataFrame y agrega la columna "city".
    '''
    print(f"Descargando datos de {alias}...")

    # Descargar archivo desde la URL en memoria
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"No se pudo descargar el archivo de {alias}")

    # Leer el contenido comprimido en memoria
    df = pd.read_csv(BytesIO(response.content), compression='gzip')

    # Agregar columna de ciudad
    df["city"] = alias

    print(f"Registros en {alias}: {len(df)}")
    return df

In [16]:
# Fetch dataset
urls = {
    "Buenos Aires": "https://data.insideairbnb.com/argentina/ciudad-aut%C3%B3noma-de-buenos-aires/buenos-aires/2025-01-29/data/listings.csv.gz",
}

# Load and merge all dataframes
dfs = [load_and_get_df(url, city) for city, url in urls.items()]
dataframe = pd.concat(dfs, ignore_index=True)

Descargando datos de Buenos Aires...
Registros en Buenos Aires: 35172


In [17]:
dataframe.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,...,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month,city
0,11508,https://www.airbnb.com/rooms/11508,20250129142212,2025-01-30,city scrape,Amazing Luxurious Apt-Palermo Soho,LUXURIOUS 1 BDRM APT- POOL/ GYM/ 24-HR SECURIT...,AREA: PALERMO SOHO<br /><br />Minutes walking ...,https://a0.muscache.com/pictures/19357696/b1de...,42762,...,4.93,4.86,,f,1,1,0,0,0.29,Buenos Aires
1,14222,https://www.airbnb.com/rooms/14222,20250129142212,2025-01-30,city scrape,"RELAX IN HAPPY HOUSE - PALERMO, BUENOS AIRES",Beautiful cozy apartment in excellent location...,Palermo is such a perfect place to explore the...,https://a0.muscache.com/pictures/4695637/bbae8...,87710233,...,4.87,4.75,,f,6,6,0,0,0.8,Buenos Aires
2,15074,https://www.airbnb.com/rooms/15074,20250129142212,2025-01-30,previous scrape,ROOM WITH RIVER SIGHT,,,https://a0.muscache.com/pictures/91166/c0fdcb4...,59338,...,,,,f,1,0,1,0,,Buenos Aires
3,16695,https://www.airbnb.com/rooms/16695,20250129142212,2025-01-30,city scrape,DUPLEX LOFT 2 - SAN TELMO,,San Telmo is one of the best neighborhoods in ...,https://a0.muscache.com/pictures/619c33a9-0618...,64880,...,4.39,4.41,,t,9,9,0,0,0.27,Buenos Aires
4,20062,https://www.airbnb.com/rooms/20062,20250129142212,2025-01-30,city scrape,PENTHOUSE /Terrace & pool /City views /2bedrooms,,,https://a0.muscache.com/pictures/165679/2eb448...,75891,...,4.93,4.79,,f,4,4,0,0,1.84,Buenos Aires


In [21]:
!curl http://localhost:8800/

curl: (7) Failed to connect to localhost port 8800 after 0 ms: Couldn't connect to server
