# Lecture 2 Finance Practice
## Data finance (Peru / US)
- **BCRPData API (Peru)** (official): https://estadisticas.bcrp.gob.pe/estadisticas/series/ayuda/api
- **Yahoo Finance** via `yfinance` (US market data): https://ranaroussi.github.io/yfinance/
- **INEI open data (Peru)** (ENAPREF sample CSV): https://www.datosabiertos.gob.pe/


In [85]:
import numpy as np
import pandas as pd

pd.set_option("display.max_columns", 120)

In [86]:
START = "2022-01-01"
END = "2025-12-18"

In [87]:
# --- BCRP: daily PEN/USD buy & sell (official API) ---
# Codes:
# - PD04637PD: USD/PEN (buy)
# - PD04638PD: USD/PEN (sell)

import requests

bcrp_url = f"https://estadisticas.bcrp.gob.pe/estadisticas/series/api/PD04637PD-PD04638PD/json/{START}/{END}/esp"
try:
    r = requests.get(bcrp_url, timeout=30)
    r.raise_for_status()
    bcrp_obj = r.json()
except Exception as e:
    bcrp_obj = {"periods": []}
    print("BCRP request failed:", type(e).__name__, str(e))

periods = bcrp_obj.get("periods", [])
rows = []
for p in periods:
    name = p.get("name")
    vals = p.get("values", [])
    if isinstance(vals, str):
        vals = [vals]
    if name is None or not isinstance(vals, list) or len(vals) < 2:
        continue
    rows.append([name, vals[0], vals[1]])

fx = pd.DataFrame(rows, columns=["date_raw", "PENUSD_buy", "PENUSD_sell"])

# Convert numeric
if fx.shape[0] > 0:
    fx["PENUSD_buy"] = pd.to_numeric(fx["PENUSD_buy"].replace({"n.d.": np.nan, "nd": np.nan, "N.D.": np.nan}), errors="coerce")
    fx["PENUSD_sell"] = pd.to_numeric(fx["PENUSD_sell"].replace({"n.d.": np.nan, "nd": np.nan, "N.D.": np.nan}), errors="coerce")

# Parse dates (BCRP labels can be ISO-like or ddMonYY or Mon.YYYY)
if fx.shape[0] > 0:
    s = fx["date_raw"].astype(str).str.strip()

    # ISO parse
    dt_iso = pd.to_datetime(s, errors="coerce")

    # Monthly like Mar.2020
    mask_monthly = s.str.match(r"^[A-Za-zÁÉÍÓÚÑñ]{3}\.[0-9]{4}$", na=False)
    mon_map = {
        "Ene":"Jan","Feb":"Feb","Mar":"Mar","Abr":"Apr","May":"May","Jun":"Jun",
        "Jul":"Jul","Ago":"Aug","Set":"Sep","Sep":"Sep","Oct":"Oct","Nov":"Nov","Dic":"Dec"
    }
    mon_es = s.where(mask_monthly).str.slice(0,3)
    year4 = s.where(mask_monthly).str.slice(4,8)
    mon_en = mon_es.map(mon_map)
    dt_monthly = pd.to_datetime(mon_en + "." + year4, format="%b.%Y", errors="coerce")

    # Daily like 18Dic25
    mask_daily = s.str.match(r"^[0-9]{2}[A-Za-zÁÉÍÓÚÑñ]{3}[0-9]{2}$", na=False)
    day = s.where(mask_daily).str.slice(0,2)
    mon_es2 = s.where(mask_daily).str.slice(2,5)
    yy = pd.to_numeric(s.where(mask_daily).str.slice(5,7), errors="coerce")
    mon_en2 = mon_es2.map(mon_map)
    year = np.where(yy <= 69, 2000 + yy, 1900 + yy)
    year_s = pd.Series(year).astype("Int64").astype(str)
    dt_daily = pd.to_datetime(day + mon_en2 + year_s, format="%d%b%Y", errors="coerce")

    dt = dt_iso.copy()
    dt = dt.fillna(dt_monthly)
    dt = dt.fillna(dt_daily)

    fx["date"] = dt
    fx = fx.dropna(subset=["date"]).sort_values("date").reset_index(drop=True)

fx.head(), fx.tail(), fx.shape

  dt_iso = pd.to_datetime(s, errors="coerce")


(    date_raw  PENUSD_buy  PENUSD_sell       date
 0  01.Feb.22    3.871333     3.877667 2022-02-01
 1  02.Feb.22    3.852000     3.857000 2022-02-02
 2  03.Feb.22    3.858500     3.860833 2022-02-03
 3  04.Feb.22    3.863000     3.867833 2022-02-04
 4  07.Feb.22    3.838500     3.845833 2022-02-07,
       date_raw  PENUSD_buy  PENUSD_sell       date
 579  24.Nov.25    3.382857     3.385286 2025-11-24
 580  25.Nov.25    3.375143     3.377214 2025-11-25
 581  26.Nov.25    3.365000     3.366857 2025-11-26
 582  27.Nov.25    3.364143     3.366429 2025-11-27
 583  28.Nov.25    3.360714     3.362500 2025-11-28,
 (584, 4))

In [88]:
# --- Yahoo Finance via yfinance: US tickers (real market data) ---
tickers = ["SPY", "QQQ", "TLT", "GLD", "EEM"]

try:
    import yfinance as yf
except Exception as e:
    yf = None
    print("Could not import yfinance:", type(e).__name__, str(e))

if yf is not None:
    try:
        data = yf.download(tickers, start=START, end=END, auto_adjust=True, progress=False)
    except Exception as e:
        data = pd.DataFrame()
        print("yfinance download failed:", type(e).__name__, str(e))
else:
    data = pd.DataFrame()

# Convert to long format: date, ticker, close, volume
if isinstance(data, pd.DataFrame) and data.shape[0] > 0:
    if isinstance(data.columns, pd.MultiIndex):
        close = data["Close"].copy()
        vol = data["Volume"].copy()
    else:
        close = data[["Close"]].rename(columns={"Close": tickers[0]})
        vol = data[["Volume"]].rename(columns={"Volume": tickers[0]})

    close.index.name = "date"
    vol.index.name = "date"

    us_close_long = close.reset_index().melt(id_vars="date", var_name="ticker", value_name="close")
    us_vol_long = vol.reset_index().melt(id_vars="date", var_name="ticker", value_name="volume")
    us_mkt = us_close_long.merge(us_vol_long, on=["date","ticker"], how="inner").dropna(subset=["close"])
else:
    us_mkt = pd.DataFrame(columns=["date","ticker","close","volume"])

us_mkt.head(), us_mkt.shape

(        date ticker      close    volume
 0 2022-01-03    EEM  44.624969  27572700
 1 2022-01-04    EEM  44.470764  24579500
 2 2022-01-05    EEM  43.745163  46425100
 3 2022-01-06    EEM  43.944714  34288700
 4 2022-01-07    EEM  44.343792  32640900,
 (4970, 4))

In [89]:
# INEI: read correctly using '|' as delimiter (pipe-separated file)
inei_url = "https://www.datosabiertos.gob.pe/sites/default/files/Cap.%20100_Vivienda-Hogar_Muestra.csv"

inei = pd.read_csv(
    inei_url,
    sep="|",            # <-- key fix: the file is pipe-separated
    nrows=3000,
    low_memory=False,
    encoding_errors="replace"
)

inei.shape, list(inei.columns)[:5], inei.head()


((200, 286),
 ['ANIO', 'MES', 'CONGLOME', 'VIVIENDA', 'HOGAR'],
    ANIO  MES  CONGLOME  VIVIENDA  HOGAR  UBIGEO  DOMINIO  ESTRATO  PERIODO  \
 0  2008    6         1        15     11   10101        4        4        3   
 1  2008    6         1        46     11   10101        4        4        3   
 2  2008    6         1        62     11   10101        4        4        3   
 3  2008    6         1        88     11   10101        4        4        3   
 4  2008    6         1        99     11   10101        4        4        3   
 
    TIPO  RFINAL  REEMPLAZO  P101  P102  P102_A  P103  P104_1  P104_A1  \
 0     1       4          0   NaN   NaN     NaN   NaN     NaN      NaN   
 1     1       1          0   1.0   2.0     NaN  40.0     2.0      NaN   
 2     1       1          0   1.0   2.0     NaN  50.0     2.0      NaN   
 3     1       1          0   1.0   2.0     NaN  30.0     2.0      NaN   
 4     1       1          0   1.0   2.0     NaN  20.0     2.0      NaN   
 
    P104_B1  P

## 3.2 Pandas Series

### 3.2.1 From lists to Series exercise

Using `fx`:

1. Create `PENUSD_mid = (PENUSD_buy + PENUSD_sell) / 2`.
2. Take the **last 15 mid values** as a Python list.
3. Create a `pd.Series` from that list.
4. Name it `PENUSD_mid_last15`.

In [90]:
fx["PENUSD_mid"] = (fx["PENUSD_buy"] + fx["PENUSD_sell"]) / 2

PENUSD_mid_last15 = pd.Series(
    fx["PENUSD_mid"].dropna().tail(15).tolist(),
    name="PENUSD_mid_last15"
)

PENUSD_mid_last15

0     3.370750
1     3.359500
2     3.368214
3     3.364071
4     3.370393
5     3.362321
6     3.367964
7     3.376429
8     3.379857
9     3.388214
10    3.384071
11    3.376179
12    3.365929
13    3.365286
14    3.361607
Name: PENUSD_mid_last15, dtype: float64

### 3.2.2 From NumPy array to Series 

Using `us_mkt`:

1. Filter to `ticker == "SPY"`.
2. Take `close` as a NumPy array.
3. Create a Series indexed by `date` named `SPY_close_series`.
4. Compute the mean/min/max with Series methods.

In [91]:

# 1 Filter rows where ticker == "SPY" and keep only the columns we need.
spy_df = us_mkt.loc[us_mkt["ticker"].eq("SPY"), ["date", "close"]].dropna(subset=["date", "close"])

# 2 Convert the 'close' column to a NumPy array (this is what the exercise asks for)
spy_close_np = spy_df["close"].to_numpy()

# 3 Convert the 'date' column to datetime and use it as the Series index.
#    Name the Series exactly as required: "SPY_close_series".
SPY_close_series = pd.Series(
    spy_close_np,
    index=pd.to_datetime(spy_df["date"]),
    name="SPY_close_series"
)

# 4 Compute summary statistics using pandas Series methods.
summary_stats = {
    "mean": SPY_close_series.mean(),
    "min": SPY_close_series.min(),
    "max": SPY_close_series.max(),
}

SPY_close_series.head(), summary_stats

(date
 2022-01-03    451.875183
 2022-01-04    451.723785
 2022-01-05    443.049744
 2022-01-06    442.633545
 2022-01-07    440.883545
 Name: SPY_close_series, dtype: float64,
 {'mean': np.float64(485.6085832008653),
  'min': 341.1820983886719,
  'max': 687.1395263671875})

### 3.2.3 From Dictionary to Series 

Using `us_mkt`:

1. Compute the **last available close** for each ticker in `tickers`.
2. Store it in a dict `{ticker: last_close}`.
3. Convert to a Series and sort descending.

In [92]:
# Ensure we have only the relevant columns, remove missing closes, and sort by date
tmp = us_mkt.loc[:, ["date", "ticker", "close"]].dropna(subset=["date", "close"]).copy()
tmp["date"] = pd.to_datetime(tmp["date"])
tmp = tmp.sort_values(["ticker", "date"])

# Step 1 + 2: build dict {ticker: last_close}
last_close_by_ticker = (
    tmp.groupby("ticker")["close"]
       .last()                 # last row per ticker after sorting by date
       .to_dict()              # convert to dict
)

# Step 3: convert dict -> Series and sort descending
last_close_series = (
    pd.Series(last_close_by_ticker, dtype=float)
      .sort_values(ascending=False)
)

last_close_series

SPY    669.421936
QQQ    600.409973
GLD    399.290009
TLT     87.459633
EEM     52.599998
dtype: float64

### 3.2.4 Series vs NumPy 

Goal: show why pandas alignment matters.

1. Create two Series indexed by date:
   - FX mid-rate from `fx`
   - SPY close from `us_mkt`
2. Combine them into a DataFrame (pandas aligns on dates).
3. Separately, build two NumPy arrays by truncating to the same length.
4. In markdown: explain why alignment is safer.

In [93]:
# --- FX mid-rate Series (indexed by date) ---
fx_mid = (
    fx.loc[:, ["date", "PENUSD_buy", "PENUSD_sell"]]
      .dropna(subset=["date", "PENUSD_buy", "PENUSD_sell"])
      .assign(PENUSD_mid=lambda d: (d["PENUSD_buy"] + d["PENUSD_sell"]) / 2)
      .set_index("date")["PENUSD_mid"]
      .sort_index()
)

# --- SPY close Series (indexed by date) ---
spy_close = (
    us_mkt.loc[us_mkt["ticker"].eq("SPY"), ["date", "close"]]
         .dropna(subset=["date", "close"])
)

spy_close["date"] = pd.to_datetime(spy_close["date"])
spy_close_series = spy_close.set_index("date")["close"].sort_index()

# --- pandas alignment: align on the date index automatically ---
aligned_df = pd.DataFrame({"FX_mid": fx_mid, "SPY_close": spy_close_series}).dropna()

# --- NumPy approach: truncate arrays to the same length (no date alignment) ---
# NOTE: This can compare different dates if the series have missing days or different calendars.
n = min(len(fx_mid), len(spy_close_series))
fx_np = fx_mid.to_numpy()[:n]
spy_np = spy_close_series.to_numpy()[:n]

aligned_df.head(), fx_np.shape, spy_np.shape

(              FX_mid   SPY_close
 date                            
 2022-02-01  3.874500  428.454163
 2022-02-02  3.854500  432.616180
 2022-02-03  3.859667  422.447632
 2022-02-04  3.865417  424.434082
 2022-02-07  3.842167  423.071960,
 (584,),
 (584,))

### 3.2.5 Indexing 

1. From `last_close_series`, use `.iloc` to select the top 3 tickers.
2. Use `.loc` to select the SPY value.

In [94]:
# 1) Top 3 by position (first three entries)
top3 = last_close_series.iloc[:3]

# 2) SPY value by label (safe lookup: returns NaN if SPY is missing)
spy_value = last_close_series.get("SPY", np.nan)

top3, spy_value

(SPY    669.421936
 QQQ    600.409973
 GLD    399.290009
 dtype: float64,
 np.float64(669.4219360351562))

## 3.3 DataFrame

### 3.3.1 DataFrame Generation 

1. Build a DataFrame with daily FX mid-rate and SPY close aligned by date.
2. Create columns:
   - `FX_ret` and `SPY_ret` using `pct_change()`
   - `FX_abs_change` = absolute day-to-day change in FX mid
3. Keep columns in this order:
   `["PENUSD_mid","SPY_close","FX_ret","SPY_ret","FX_abs_change"]`

In [95]:
# --- Step 1: Create the FX mid-rate series indexed by date ---
# We compute the mid-rate as the average of buy and sell, then keep it as a clean, date-indexed Series.
fx_mid = (
    fx.loc[:, ["date", "PENUSD_buy", "PENUSD_sell"]]
      .dropna(subset=["date", "PENUSD_buy", "PENUSD_sell"])                 # remove rows with missing pieces
      .assign(PENUSD_mid=lambda d: (d["PENUSD_buy"] + d["PENUSD_sell"]) / 2) # mid = (buy + sell)/2
      .set_index("date")["PENUSD_mid"]                                      # turn into a Series indexed by date
      .sort_index()                                                         # sort so time-based operations make sense
)

# --- Step 2: Create the SPY close series indexed by date ---
# We filter us_mkt to SPY only, then build a clean Series with date as the index.
spy_close_series = (
    us_mkt.loc[us_mkt["ticker"].eq("SPY"), ["date", "close"]]   # keep only SPY rows and the needed columns
         .dropna(subset=["date", "close"])                      # drop missing dates/closes
)
spy_close_series["date"] = pd.to_datetime(spy_close_series["date"])          # ensure dates are true datetimes
spy_close_series = (
    spy_close_series.set_index("date")["close"]                # make it a Series indexed by date
                  .sort_index()                                # sort by date
)

# --- Step 3: Align FX and SPY by date in a single DataFrame ---
# Pandas aligns automatically by the index (date). dropna() keeps only dates present in BOTH series.
df = pd.DataFrame({
    "PENUSD_mid": fx_mid,
    "SPY_close": spy_close_series
}).dropna()

# --- Step 4: Create return and change features ---
# pct_change() computes (today / yesterday - 1), i.e., simple daily returns.
df_features = df.copy()
df_features["FX_ret"] = df_features["PENUSD_mid"].pct_change()               # daily FX return
df_features["SPY_ret"] = df_features["SPY_close"].pct_change()               # daily SPY return

# Absolute day-to-day change in FX mid-rate (in PEN per USD).
df_features["FX_abs_change"] = df_features["PENUSD_mid"].diff().abs()        # |mid_t - mid_{t-1}|

# --- Step 5: Keep columns in the required order ---
df_features = df_features[["PENUSD_mid", "SPY_close", "FX_ret", "SPY_ret", "FX_abs_change"]]

df_features.head()

Unnamed: 0_level_0,PENUSD_mid,SPY_close,FX_ret,SPY_ret,FX_abs_change
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-02-01,3.8745,428.454163,,,
2022-02-02,3.8545,432.61618,-0.005162,0.009714,0.02
2022-02-03,3.859667,422.447632,0.00134,-0.023505,0.005167
2022-02-04,3.865417,424.434082,0.00149,0.004702,0.00575
2022-02-07,3.842167,423.07196,-0.006015,-0.003209,0.02325


### 3.3.2 Indexing 

1. Use `.iloc` to select first 10 rows of returns only.
2. Use `.loc` to select a date range in 2024.

In [96]:
# 1) First 10 rows of returns only (position-based)
first10_rets = df_features.loc[:, ["FX_ret", "SPY_ret"]].iloc[:10]

# 2) Date range selection in 2024 (label-based, using the datetime index)
range_2024 = df_features.loc["2024-01-01":"2024-12-31"]

first10_rets, range_2024.head()

(              FX_ret   SPY_ret
 date                          
 2022-02-01       NaN       NaN
 2022-02-02 -0.005162  0.009714
 2022-02-03  0.001340 -0.023505
 2022-02-04  0.001490  0.004702
 2022-02-07 -0.006015 -0.003209
 2022-02-08 -0.000564  0.008228
 2022-02-09 -0.004970  0.014636
 2022-02-10 -0.017252 -0.017965
 2022-02-11 -0.000222 -0.019719
 2022-02-14  0.008835 -0.003269,
             PENUSD_mid   SPY_close    FX_ret   SPY_ret  FX_abs_change
 date                                                                 
 2024-02-01    3.805250  477.398163  0.018104  0.076213       0.067667
 2024-02-02    3.832000  482.423981  0.007030  0.010528       0.026750
 2024-02-05    3.855667  480.667389  0.006176 -0.003641       0.023667
 2024-02-06    3.856250  482.062866  0.000151  0.002903       0.000583
 2024-02-07    3.864667  486.083527  0.002183  0.008341       0.008417)

### 3.3.3 General Methods 
1. Use `.describe()` for return columns.
2. Find the 5 highest SPY daily returns and the dates.
3. Create `FX_direction` = "up" if `FX_ret > 0` else "down".
4. Count how many "up" days per calendar year.

In [97]:
# Make sure FX_direction exists (from the previous step)
df_features["FX_direction"] = np.where(df_features["FX_ret"] > 0, "up", "down")

# Extract years only for "up" days (this returns an array of years, one per "up" day)
up_years = df_features.index[df_features["FX_direction"].eq("up")].year

# Count occurrences of each year (no groupby used)
up_days_per_year = pd.Series(up_years).value_counts().sort_index()

up_days_per_year

date
2022    66
2023    71
2024    72
2025    57
Name: count, dtype: int64

### 3.3.4 Importing Data 

Using INEI sample `inei`:

1. Display shape and first 5 columns.
2. Pick 2 columns and rename to snake_case.
3. Keep only those 2 columns in a new DataFrame `inei_small`.

In [98]:
# 3.3.4 Importing Data (solution after reading with sep="|")
#
# 1) Show shape and first 5 columns
inei_shape = inei.shape
inei_first5_cols = list(inei.columns)[:5]

# 2) Pick two columns and rename to snake_case
inei_small = (
    inei.loc[:, ["ANIO", "MES"]]
        .rename(columns={"ANIO": "year", "MES": "month"})
)

inei_shape, inei_first5_cols, inei_small.head()


((200, 286),
 ['ANIO', 'MES', 'CONGLOME', 'VIVIENDA', 'HOGAR'],
    year  month
 0  2008      6
 1  2008      6
 2  2008      6
 3  2008      6
 4  2008      6)

### 3.3.5 Filtering Data 

Using `df_features`:

1. Filter days where `SPY_ret < -0.02` (large negative days).
2. Filter days where `FX_abs_change` is in the top 1%.
3. Compare counts.

In [99]:
# 1) Days where SPY had a large negative return (< -2%)
neg_spy_days = df_features.loc[df_features["SPY_ret"] < -0.02]

# 2) Days where FX_abs_change is in the top 1% (>= 99th percentile)
fx_threshold = df_features["FX_abs_change"].quantile(0.99)
fx_jump_days = df_features.loc[df_features["FX_abs_change"] >= fx_threshold]

len(neg_spy_days), len(fx_jump_days)

(24, 6)

### 3.3.6 Dealing with Nulls 
Using `us_mkt`:

1. Copy `us_mkt` to `us_mkt_nan`.
2. Set 1% of `close` to NaN (fixed random seed).
3. Create:
   - `us_drop`: drop NaNs
   - `us_fill`: fill NaNs with ticker-specific median close
4. Compare shapes.

In [100]:
# 1 Copy
us_mkt_nan = us_mkt.copy()

# 2 Set ~1% of close to NaN 
idx = us_mkt_nan.sample(frac=0.01, random_state=42).index
us_mkt_nan.loc[idx, "close"] = np.nan

# 3 Drop NaNs
us_drop = us_mkt_nan.dropna(subset=["close"]).copy()

# Fill NaNs with ticker-specific median (NO groupby)
tickers_unique = us_mkt_nan["ticker"].dropna().unique()

median_by_ticker = {
    t: us_mkt_nan.loc[us_mkt_nan["ticker"].eq(t), "close"].median()
    for t in tickers_unique
}

us_fill = us_mkt_nan.copy()
us_fill["close"] = us_fill["close"].fillna(us_fill["ticker"].map(median_by_ticker))

# 4 Compare shapes
us_mkt.shape, us_mkt_nan.shape, us_drop.shape, us_fill.shape

((4970, 4), (4970, 4), (4920, 4), (4970, 4))

### 3.3.7 Duplicates 

1. Create `dup_df` by stacking the last 5 rows of `us_mkt` twice.
2. Detect duplicates using `.duplicated()`.
3. Remove them using `.drop_duplicates()`.

In [101]:
# 1Stack the last 5 rows twice
last5 = us_mkt.tail(5)
dup_df = pd.concat([last5, last5], axis=0, ignore_index=True)

# 2 Detect duplicates (True means "this row is a duplicate of a previous row")
dup_mask = dup_df.duplicated()

# 3 Remove duplicates (keep the first occurrence by default)
dedup_df = dup_df.drop_duplicates().reset_index(drop=True)

dup_df.shape, dedup_df.shape


((10, 4), (5, 4))

### 3.3.8 Groupby 


Using `us_mkt`:

1. Group by `ticker` and compute:
   - mean close
   - median close
   - max volume
2. Rename columns clearly.
3. Sort by mean close descending.

In [102]:
ticker_summary = (
    us_mkt
    .dropna(subset=["ticker", "close"])              # ensure close exists for the stats
    .groupby("ticker")
    .agg(
        mean_close=("close", "mean"),                # average close per ticker
        median_close=("close", "median"),            # median close per ticker
        max_volume=("volume", "max")                 # max volume per ticker (NaN-safe)
    )
    .sort_values("mean_close", ascending=False)      # highest mean close first
)

ticker_summary

Unnamed: 0_level_0,mean_close,median_close,max_volume
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SPY,485.608583,460.73999,256611400
QQQ,411.806857,400.730164,198685800
GLD,220.130422,187.864998,62025000
TLT,91.395622,88.549709,131353500
EEM,40.46235,39.107407,134225700


### 3.3.9 Reshape 

1. Create a 1-row wide DataFrame with last closes per ticker.
2. Convert it to long format with `melt()` into columns: `ticker`, `last_close`.
3. Pivot `us_mkt` into a wide table: index=`date`, columns=`ticker`, values=`close` (keep first 50 dates).

In [103]:
# 1  1-row wide DataFrame with last closes per ticker ---
# We sort by ticker and date so "last" means the most recent observation for each ticker.
tmp = us_mkt.loc[:, ["date", "ticker", "close"]].dropna(subset=["date", "ticker", "close"]).copy()
tmp["date"] = pd.to_datetime(tmp["date"])
tmp = tmp.sort_values(["ticker", "date"])

# Get last close per ticker (Series indexed by ticker), then convert to a 1-row DataFrame
last_close_series = tmp.groupby("ticker")["close"].last()
wide_last = last_close_series.to_frame().T
wide_last.index = ["last_close"]  # optional: label the single row

# 2 Convert wide -> long using melt() ---
# Reset index so melt has a normal DataFrame to work with.
long_last = (
    wide_last.reset_index(drop=True)
            .melt(var_name="ticker", value_name="last_close")
)

# 3 Pivot long us_mkt into a wide close table (date x ticker) ---
# Pivot creates a table where each ticker is a column and each row is a date.
wide_close = (
    tmp.pivot(index="date", columns="ticker", values="close")
       .sort_index()
       .head(50)
)

wide_last, long_last.head(), wide_close.head()

(ticker            EEM         GLD         QQQ         SPY        TLT
 last_close  52.599998  399.290009  600.409973  669.421936  87.459633,
   ticker  last_close
 0    EEM   52.599998
 1    GLD  399.290009
 2    QQQ  600.409973
 3    SPY  669.421936
 4    TLT   87.459633,
 ticker            EEM         GLD         QQQ         SPY         TLT
 date                                                                 
 2022-01-03  44.624969  168.330002  392.184082  451.875183  125.295334
 2022-01-04  44.470764  169.570007  387.097321  451.723785  124.774353
 2022-01-05  43.745163  169.059998  375.205231  443.049744  124.097099
 2022-01-06  43.944714  166.990005  374.941620  442.633545  124.418365
 2022-01-07  44.343792  167.750000  370.879974  440.883545  123.524025)

### 3.3.10 Merge (Assignment)

Goal: merge Peru macro data (BCRP) with US market data (Yahoo) at monthly frequency.

1. Fetch BCRP monthly policy rate: code `PD12301MD`.
2. Build a monthly SPY average close from `us_mkt`.
3. Merge the two tables.
4. Save to `outputs/lecture2_policy_spy_monthly.csv`.

In [104]:
# Fetch BCRP monthly policy rate (official API)
policy_url = f"https://estadisticas.bcrp.gob.pe/estadisticas/series/api/PD12301MD/json/{START}/{END}/esp"
try:
    r = requests.get(policy_url, timeout=30)
    r.raise_for_status()
    pol_obj = r.json()
except Exception as e:
    pol_obj = {"periods": []}
    print("BCRP policy request failed:", type(e).__name__, str(e))

rows = []
for p in pol_obj.get("periods", []):
    name = p.get("name")
    vals = p.get("values", [])
    if isinstance(vals, str):
        vals = [vals]
    if name is None or not isinstance(vals, list) or len(vals) < 1:
        continue
    rows.append([name, vals[0]])

policy = pd.DataFrame(rows, columns=["date_raw", "policy_rate"])
if policy.shape[0] > 0:
    policy["policy_rate"] = pd.to_numeric(policy["policy_rate"].replace({"n.d.": np.nan, "nd": np.nan, "N.D.": np.nan}), errors="coerce")

    s = policy["date_raw"].astype(str).str.strip()
    dt_iso = pd.to_datetime(s, errors="coerce")

    mask_monthly = s.str.match(r"^[A-Za-zÁÉÍÓÚÑñ]{3}\.[0-9]{4}$", na=False)
    mon_map = {
        "Ene":"Jan","Feb":"Feb","Mar":"Mar","Abr":"Apr","May":"May","Jun":"Jun",
        "Jul":"Jul","Ago":"Aug","Set":"Sep","Sep":"Sep","Oct":"Oct","Nov":"Nov","Dic":"Dec"
    }
    mon_es = s.where(mask_monthly).str.slice(0,3)
    year4 = s.where(mask_monthly).str.slice(4,8)
    mon_en = mon_es.map(mon_map)
    dt_monthly = pd.to_datetime(mon_en + "." + year4, format="%b.%Y", errors="coerce")

    policy["date"] = dt_iso.fillna(dt_monthly)
    policy = policy.dropna(subset=["date"]).sort_values("date").reset_index(drop=True)

policy.head(), policy.tail()

  dt_iso = pd.to_datetime(s, errors="coerce")


(    date_raw  policy_rate       date
 0  01.Feb.22          3.0 2022-02-01
 1  02.Feb.22          3.0 2022-02-02
 2  03.Feb.22          3.0 2022-02-03
 3  04.Feb.22          3.0 2022-02-04
 4  07.Feb.22          3.0 2022-02-07,
       date_raw  policy_rate       date
 589  24.Nov.25         4.25 2025-11-24
 590  25.Nov.25         4.25 2025-11-25
 591  26.Nov.25         4.25 2025-11-26
 592  27.Nov.25         4.25 2025-11-27
 593  28.Nov.25         4.25 2025-11-28)

In [105]:
from pathlib import Path
Path("outputs").mkdir(exist_ok=True)

spy_monthly = (
    us_mkt.loc[us_mkt["ticker"].eq("SPY"), ["date", "close"]]
         .dropna(subset=["date", "close"])
         .assign(date=lambda d: pd.to_datetime(d["date"]))
         .set_index("date")["close"]
         .resample("M")
         .mean()
         .rename("spy_close_avg")
         .reset_index()
)

policy_monthly = (
    policy.loc[:, ["date", "policy_rate"]]
          .dropna(subset=["date", "policy_rate"])
          .assign(date=lambda d: pd.to_datetime(d["date"]))
          .sort_values("date")
)

merged_monthly = policy_monthly.merge(spy_monthly, on="date", how="inner")

out_path = Path("outputs/lecture_2_finance.csv")
merged_monthly.to_csv(out_path, index=False)

  .resample("M")
