In [1]:
#!pip freeze > requirements.txt

In [2]:
import pandas as pd
import polars as pl
import time
import numpy as np
import yfinance as yf

In [3]:
#!pip install yfinance

In [4]:
TICKERS   = ["AAPL", "MSFT", "AMZN", "GOOGL", "META", "NVDA", "TSLA", "NFLX"]
PERIOD    = "5y"        # 5 años de datos diarios
INTERVAL  = "1d"
REPEATS   = 3           # veces para cada test (escoge el mejor tiempo)

In [5]:
print("Descargando datos de Yahoo…")
raw = yf.download(
    tickers=" ".join(TICKERS),
    period=PERIOD,
    interval=INTERVAL,
    group_by="ticker",
    auto_adjust=False,
    threads=True,
    progress=False,
)

Descargando datos de Yahoo…


In [6]:
pdf = (
    raw.stack(level=0)
       .rename_axis(['Date', 'Ticker'])
       .reset_index()
       .rename(columns={"Adj Close": "Adj_Close"})
)

print("Filas totales:", len(pdf))

Filas totales: 10040


  raw.stack(level=0)


In [16]:
def resample_pandas(df: pd.DataFrame) -> pd.DataFrame:
    return (
        df.set_index("Date")
          .groupby("Ticker")
          .resample("M")
          .agg({"Open":"first",
                "High":"max",
                "Low":"min",
                "Close":"last",
                "Volume":"sum"})
          .dropna()
          .reset_index()
    )

def resample_polars(df_pl: pl.DataFrame) -> pl.DataFrame:
    df_pl = df_pl.with_columns(
        pl.col("Date").cast(pl.Datetime).dt.cast_time_unit("ns")
    )

    return (
        df_pl
        .group_by_dynamic(
            index_column="Date",
            every="1mo",
            by=["Ticker"],
            closed="left"
        )
        .agg([
            pl.col("Open").first(),
            pl.col("High").max(),
            pl.col("Low").min(),
            pl.col("Close").last(),
            pl.col("Volume").sum()
        ])
    )

In [17]:
def best_time(fn, *args, repeats=REPEATS, **kw):
    times = []
    for _ in range(repeats):
        t0 = time.perf_counter()
        _ = fn(*args, **kw)
        times.append(time.perf_counter() - t0)
    return min(times)

t_pandas = best_time(resample_pandas, pdf)
t_polars = best_time(resample_polars, pl.from_pandas(pdf))

print("\n--- RESULTADOS ---")
print(f"Pandas  : {t_pandas*1000:7.1f} ms")
print(f"Polars  : {t_polars*1000:7.1f} ms")
print(f"Speed-up: {t_pandas / t_polars:7.2f} ×")

  .resample("M")



--- RESULTADOS ---
Pandas  :    29.9 ms
Polars  :     1.3 ms
Speed-up:   22.35 ×


  .resample("M")
  .resample("M")
  df_pl
