# 4. If and Loops — Finance Practice (Assignments Only)

This notebook follows the **same topics and order** as the original Lecture 3 (`If and Loops`). All tasks use **real financial/economic data** from Peru and the US.

**Student rule:** fill only the `TODO` blocks. Do *not* paste full solutions from elsewhere.

## Data sources used

- **BCRPData (Peru, official)**: BCRP statistical series API.
  - API guide: https://estadisticas.bcrp.gob.pe/estadisticas/series/ayuda/api
  - Example daily series list includes BVL indexes and FX.
- **Yahoo Finance (US market data)** via `yfinance` (community wrapper for Yahoo Finance).
  - Docs: https://ranaroussi.github.io/yfinance/
- **SEC EDGAR Data APIs (US, official)**: company facts JSON.
  - Docs: https://www.sec.gov/search-filings/edgar-application-programming-interfaces

If any download fails (internet/firewall), this notebook is designed to **keep running** with empty data, so you can still practice control flow syntax.

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 120)


In [2]:
from __future__ import annotations

from pathlib import Path
import hashlib
import re
import warnings

import numpy as np
import pandas as pd

CACHE_DIR = Path(".cache")
CACHE_DIR.mkdir(exist_ok=True)

warnings.filterwarnings("ignore")

_ES_TO_EN_MONTH = {
    "Ene": "Jan", "Feb": "Feb", "Mar": "Mar", "Abr": "Apr", "May": "May", "Jun": "Jun",
    "Jul": "Jul", "Ago": "Aug", "Set": "Sep", "Sep": "Sep", "Oct": "Oct", "Nov": "Nov", "Dic": "Dec"
}

def _hash_key(*parts: str) -> str:
    h = hashlib.sha256()
    for p in parts:
        h.update(str(p).encode("utf-8"))
        h.update(b"|")
    return h.hexdigest()[:24]

def _normalize_period(code: str, period: str | None) -> str | None:
    if period is None:
        return None
    period = str(period).strip()
    freq = code[-2:].upper() if len(code) >= 2 else ""

    if freq == "PD":  # daily
        if re.fullmatch(r"\d{4}-\d{1,2}", period):
            y, m = period.split("-")
            return f"{int(y):04d}-{int(m):02d}-01"
        if re.fullmatch(r"\d{4}", period):
            return f"{int(period):04d}-01-01"
        return period

    if freq == "PM":  # monthly
        m = re.fullmatch(r"(\d{4})-(\d{1,2})-(\d{1,2})", period)
        if m:
            y, mo, _ = m.groups()
            return f"{int(y):04d}-{int(mo)}"
        m = re.fullmatch(r"(\d{4})-(\d{1,2})", period)
        if m:
            y, mo = m.groups()
            return f"{int(y):04d}-{int(mo)}"
        if re.fullmatch(r"\d{4}", period):
            return f"{int(period):04d}-1"
        return period

    if freq == "MD":  # daily index
        return period

    return period

def _parse_bcrp_period_name(name: str) -> pd.Timestamp:
    s = str(name).strip()

    # ISO-ish
    try:
        if re.fullmatch(r"\d{4}(-\d{1,2}){0,2}", s):
            return pd.to_datetime(s, errors="raise")
    except Exception:
        pass

    # Monthly like "Mar.2020"
    m = re.fullmatch(r"([A-Za-zÁÉÍÓÚÑñ]{3})\.(\d{4})", s)
    if m:
        mon_es, y = m.groups()
        mon = _ES_TO_EN_MONTH.get(mon_es[:3], mon_es[:3])
        return pd.to_datetime(f"{mon}.{y}", format="%b.%Y", errors="coerce")

    # Daily like "18Nov25" or "02Ene97"
    m = re.fullmatch(r"(\d{2})([A-Za-zÁÉÍÓÚÑñ]{3})(\d{2})", s)
    if m:
        d, mon_es, yy = m.groups()
        mon = _ES_TO_EN_MONTH.get(mon_es[:3], mon_es[:3])
        year = 2000 + int(yy) if int(yy) <= 69 else 1900 + int(yy)
        return pd.to_datetime(f"{d}{mon}{year}", format="%d%b%Y", errors="coerce")

    return pd.to_datetime(s, errors="coerce")

def bcrp_get(series_codes, start: str | None = None, end: str | None = None, lang: str = "esp") -> pd.DataFrame:
    """Fetch BCRPData series (official API) into a DataFrame.

    Returns columns: ["date", <code1>, <code2>, ...]
    """
    try:
        import requests
    except Exception:
        return pd.DataFrame()

    if isinstance(series_codes, (list, tuple)):
        codes_list = [str(c).strip() for c in series_codes]
        codes = "-".join(codes_list)
        freq_code = codes_list[0]
    else:
        codes = str(series_codes).strip()
        codes_list = codes.split("-")
        freq_code = codes_list[0]

    start_n = _normalize_period(freq_code, start)
    end_n = _normalize_period(freq_code, end)

    key = _hash_key("bcrp", codes, start_n or "", end_n or "", lang)
    cache_path = CACHE_DIR / f"bcrp_{key}.pkl"
    if cache_path.exists():
        return pd.read_pickle(cache_path)

    base_url = "https://estadisticas.bcrp.gob.pe/estadisticas/series/api"
    parts = [base_url, codes, "json"]
    if start_n and end_n:
        parts += [start_n, end_n]
    if lang:
        parts += [lang]
    url = "/".join(parts)

    r = requests.get(url, timeout=30)
    r.raise_for_status()
    obj = r.json()

    periods = obj.get("periods", [])
    rows = []
    for p in periods:
        name = p.get("name")
        vals = p.get("values", [])
        if isinstance(vals, str):
            vals = [vals]
        if name is None or not isinstance(vals, list):
            continue
        vals = (vals + [None] * len(codes_list))[:len(codes_list)]
        rows.append([name] + vals)

    df = pd.DataFrame(rows, columns=["date"] + codes_list)
    if df.shape[0] == 0:
        return pd.DataFrame(columns=["date"] + codes_list)

    df["date"] = df["date"].apply(_parse_bcrp_period_name)
    for c in codes_list:
        df[c] = df[c].replace({"n.d.": np.nan, "nd": np.nan, "N.D.": np.nan})
        df[c] = pd.to_numeric(df[c], errors="coerce")

    df = df.dropna(subset=["date"]).sort_values("date").reset_index(drop=True)
    df.to_pickle(cache_path)
    return df

def bcrp_get_cached_or_empty(series_codes, start: str, end: str) -> pd.DataFrame:
    try:
        return bcrp_get(series_codes, start=start, end=end)
    except Exception:
        if isinstance(series_codes, (list, tuple)):
            codes_list = [str(c).strip() for c in series_codes]
        else:
            codes_list = [str(series_codes).strip()]
        return pd.DataFrame(columns=["date"] + codes_list)

def yf_download_wide(tickers, start: str, end: str) -> pd.DataFrame:
    """Download Close and Volume using yfinance.

    Returns a DataFrame indexed by date with columns:
      Close_<TICKER>, Volume_<TICKER>
    """
    try:
        import yfinance as yf
    except Exception:
        return pd.DataFrame()

    cols = tickers if isinstance(tickers, (list, tuple)) else [tickers]
    key = _hash_key("yf_wide", ",".join(cols), start, end)
    cache_path = CACHE_DIR / f"yf_wide_{key}.pkl"
    if cache_path.exists():
        return pd.read_pickle(cache_path)

    try:
        data = yf.download(cols, start=start, end=end, auto_adjust=True, progress=False)
        if data.empty:
            return pd.DataFrame()
        if isinstance(data.columns, pd.MultiIndex):
            close = data["Close"].copy()
            vol = data["Volume"].copy()
        else:
            close = data[["Close"]].rename(columns={"Close": cols[0]})
            vol = data[["Volume"]].rename(columns={"Volume": cols[0]})
        close = close.add_prefix("Close_")
        vol = vol.add_prefix("Volume_")
        out = close.join(vol, how="outer")
        out.index.name = "date"
        out.to_pickle(cache_path)
        return out
    except Exception:
        return pd.DataFrame()

def sec_companyfacts(cik10: str, user_agent: str) -> dict:
    """Fetch SEC company facts (official EDGAR Data APIs).

    Endpoint:
      https://data.sec.gov/api/xbrl/companyfacts/CIK##########.json

    The SEC requires a User-Agent identifying the requester.

    Returns {} on failure.
    """
    try:
        import requests, json
    except Exception:
        return {}

    cik10 = str(cik10).zfill(10)
    key = _hash_key("sec_companyfacts", cik10)
    cache_path = CACHE_DIR / f"sec_companyfacts_{key}.json"
    if cache_path.exists():
        try:
            return json.loads(cache_path.read_text(encoding="utf-8"))
        except Exception:
            pass

    url = f"https://data.sec.gov/api/xbrl/companyfacts/CIK{cik10}.json"
    headers = {"User-Agent": user_agent, "Accept-Encoding": "gzip, deflate", "Host": "data.sec.gov"}

    try:
        r = requests.get(url, headers=headers, timeout=30)
        r.raise_for_status()
        obj = r.json()
        try:
            cache_path.write_text(json.dumps(obj), encoding="utf-8")
        except Exception:
            pass
        return obj
    except Exception:
        return {}

def safe_head(df: pd.DataFrame, n: int = 5) -> pd.DataFrame:
    return df.head(n) if isinstance(df, pd.DataFrame) else pd.DataFrame()

### Load datasets (used across exercises)

We load:
- Peru: **BVL General Index** (BCRP series `PD38026MD`) and **CPI Non-Tradables** (monthly, `PN01282PM`).
- US: prices/volumes for selected tickers via `yfinance`.
- US fundamentals: SEC company facts (optional) for Apple (CIK 0000320193).


In [3]:
START = "2021-01-01"
END = "2025-12-18"

# Peru — BCRPData (official)
bvl = bcrp_get_cached_or_empty("PD38026MD", start=START, end=END).rename(columns={"PD38026MD":"BVL_index"})
cpi_nt = bcrp_get_cached_or_empty("PN01282PM", start=START, end=END).rename(columns={"PN01282PM":"CPI_non_tradables"})

# US — Yahoo Finance (real market data)
tickers = ["SPY", "AAPL", "MSFT", "JPM", "NVDA"]
us = yf_download_wide(tickers, start=START, end=END)

print("BVL:", bvl.shape, "CPI NT:", cpi_nt.shape, "US:", us.shape)
safe_head(bvl), safe_head(cpi_nt), safe_head(us)

BVL: (729, 2) CPI NT: (59, 2) US: (1246, 10)


(        date  BVL_index
 0 2021-02-01   21610.03
 1 2021-02-02   21451.89
 2 2021-02-03   21632.78
 3 2021-02-04   21552.20
 4 2021-02-05   21783.37,
         date  CPI_non_tradables
 0 2021-01-01           0.832155
 1 2021-02-01          -0.424668
 2 2021-03-01           1.026397
 3 2021-04-01          -0.361899
 4 2021-05-01           0.158005,
 Ticker      Close_AAPL   Close_JPM  Close_MSFT  Close_NVDA   Close_SPY  \
 date                                                                     
 2021-01-04  125.974487  110.548325  208.882187   13.076726  345.273926   
 2021-01-05  127.531975  111.149849  209.083710   13.367159  347.651978   
 2021-01-06  123.239067  116.369003  203.662308   12.579123  349.730438   
 2021-01-07  127.444382  120.190437  209.457947   13.306580  354.926575   
 2021-01-08  128.544388  120.323151  210.734116   13.239517  356.948822   
 
 Ticker      Volume_AAPL  Volume_JPM  Volume_MSFT  Volume_NVDA  Volume_SPY  
 date                                         

# 4.1. <a id='def'> If condition </a>

In this section you will practice `if / elif / else` using real data.

## 4.1.1.  <a id='4.1.1.'> The structure of the code </a>

**Exercise 4.1.1 — Classify a day (single `if`)**

Using the **BVL index**:
1. Compute daily returns: `ret = pct_change()`.
2. Take the most recent non-missing return.
3. Write a single `if` statement that prints a message only when the last return is **positive**.

Keep it minimal: one `if`, one `print`.

In [4]:
# Solution
# Hint: bvl has columns ["date","BVL_index"]
last_ret = np.nan

if isinstance(bvl, pd.DataFrame) and (not bvl.empty) and ("BVL_index" in bvl.columns):
    ret = bvl.set_index("date")["BVL_index"].pct_change()
    if ret.dropna().shape[0] > 0:
        last_ret = float(ret.dropna().iloc[-1])

# Single IF (prints only when positive)
if pd.notna(last_ret) and last_ret > 0:
    print("BVL last return is positive:", last_ret)

# Optional self-check (does not enforce correctness)
print("Last return value (may be NaN if data missing):", last_ret)


BVL last return is positive: 0.01196384727960198
Last return value (may be NaN if data missing): 0.01196384727960198


### 4.1.2.   <a id='4.1.2.'> If condition with more than 1 expression </a>

**Exercise 4.1.2 — Two conditions with `and` / `or`**

Using US data (`us`):
1. Compute daily returns for `SPY` from `Close_SPY`.
2. Define a threshold, e.g. `thr = 0.01` (1%).
3. Create two booleans for the last available day:
   - `big_move`: `abs(return) > thr`
   - `high_volume`: `Volume_SPY` above its median
4. Use `if / elif / else` to print one of three messages:
   - big move AND high volume
   - big move BUT not high volume
   - not a big move


In [5]:
# Solution
thr = 0.01
big_move = False
high_volume = False

last_spy_ret = np.nan
last_spy_vol = np.nan

if isinstance(us, pd.DataFrame) and (not us.empty) and ("Close_SPY" in us.columns):
    spy_ret = us["Close_SPY"].pct_change()
    if spy_ret.dropna().shape[0] > 0:
        last_spy_ret = float(spy_ret.dropna().iloc[-1])
        big_move = abs(last_spy_ret) > thr

    if "Volume_SPY" in us.columns and us["Volume_SPY"].dropna().shape[0] > 0:
        last_spy_vol = float(us["Volume_SPY"].dropna().iloc[-1])
        vol_median = float(us["Volume_SPY"].median(skipna=True))
        high_volume = last_spy_vol > vol_median

if big_move and high_volume:
    print("SPY: big move AND high volume")
elif big_move and (not high_volume):
    print("SPY: big move BUT not high volume")
else:
    print("SPY: not a big move")

# Optional self-check
print({"big_move": big_move, "high_volume": high_volume, "last_spy_ret": last_spy_ret, "last_spy_vol": last_spy_vol})


SPY: big move AND high volume
{'big_move': True, 'high_volume': True, 'last_spy_ret': -0.011003536401451242, 'last_spy_vol': 110625200.0}


### 4.1.3.   <a id='4.1.3.'> Logical Operators </a>

**Exercise 4.1.3 — Filter with logical operators**

Using CPI non-tradables (`cpi_nt`):
1. Compute monthly inflation as `% change` of the CPI index.
2. Create a filtered DataFrame keeping months where:
   - inflation is positive **AND** above its own median.
3. Create another filter where:
   - inflation is negative **OR** missing.

Store the results in `infl_above_median` and `infl_negative_or_missing`.

In [6]:
# Solution
infl_above_median = pd.DataFrame()
infl_negative_or_missing = pd.DataFrame()

if isinstance(cpi_nt, pd.DataFrame) and (not cpi_nt.empty) and ("CPI_non_tradables" in cpi_nt.columns):
    tmp = cpi_nt.copy()
    tmp = tmp.sort_values("date").reset_index(drop=True)
    tmp["inflation"] = tmp["CPI_non_tradables"].pct_change() * 100  # % monthly inflation
    med = tmp["inflation"].median(skipna=True)

    infl_above_median = tmp[(tmp["inflation"] > 0) & (tmp["inflation"] > med)][["date", "CPI_non_tradables", "inflation"]].reset_index(drop=True)
    infl_negative_or_missing = tmp[(tmp["inflation"] < 0) | (tmp["inflation"].isna())][["date", "CPI_non_tradables", "inflation"]].reset_index(drop=True)

# Optional self-check
print("infl_above_median:", getattr(infl_above_median, "shape", None))
print("infl_negative_or_missing:", getattr(infl_negative_or_missing, "shape", None))


infl_above_median: (17, 3)
infl_negative_or_missing: (42, 3)


### 4.1.4.   <a id='4.1.4.'> Python Identity Operators </a>

**Exercise 4.1.4 — `==` vs `is` with missing values**

Create a Python list of *observations* that mixes numbers and missing values:
```python
obs = [3.5, None, 3.6, np.nan, 3.55]
```
Tasks:
1. Use `== None` to build a mask for `None`.
2. Use `is None` inside a loop to count `None`.
3. Show that `np.nan == np.nan` is `False`.
4. Use `np.isnan` to count NaNs safely.

This is useful when cleaning financial time series with missing days.

In [7]:
# Solution
obs = [3.5, None, 3.6, np.nan, 3.55]

# 1) == None mask
mask_none = [(x == None) for x in obs]  # noqa: E711

# 2) is None count in a loop
none_count = 0
for x in obs:
    if x is None:
        none_count += 1

# 3) NaN equality check
nan_eq_nan = (np.nan == np.nan)

# 4) Count NaNs safely with np.isnan
nan_count = 0
for x in obs:
    if x is None:
        continue
    try:
        if isinstance(x, float) and np.isnan(x):
            nan_count += 1
    except Exception:
        pass

# Optional self-check
print("mask_none:", mask_none)
print("none_count:", none_count)
print("np.nan == np.nan:", nan_eq_nan)
print("nan_count:", nan_count)


mask_none: [False, True, False, False, False]
none_count: 1
np.nan == np.nan: False
nan_count: 1


### 4.1.5.   <a id='4.1.5.'> Final IF condition structure </a>

**Exercise 4.1.5 — Write a small decision function**

Write a function `risk_label(x, low, high)`:
- returns `'LOW'` if `x < low`
- returns `'MEDIUM'` if `low <= x < high`
- returns `'HIGH'` if `x >= high`

Then apply it to the **last SPY daily return**.

In [8]:
# Solution
def risk_label(x: float, low: float, high: float) -> str:
    if x < low:
        return "LOW"
    elif low <= x < high:
        return "MEDIUM"
    else:
        return "HIGH"

last_spy_ret = np.nan
label = "NA"

if isinstance(us, pd.DataFrame) and (not us.empty) and ("Close_SPY" in us.columns):
    spy_ret = us["Close_SPY"].pct_change()
    if spy_ret.dropna().shape[0] > 0:
        last_spy_ret = float(spy_ret.dropna().iloc[-1])
        # Use absolute return magnitude as a simple "risk" proxy
        label = risk_label(abs(last_spy_ret), low=0.002, high=0.01)

# Optional self-check
print("last_spy_ret:", last_spy_ret, "label:", label)


last_spy_ret: -0.011003536401451242 label: HIGH


### 4.1.6.   <a id='4.1.6.'> Python Nested if Statement </a>

**Exercise 4.1.6 — Nested if for a simple trading rule (logic only)**

Using BVL daily returns:
1. Compute `ret` and its rolling volatility proxy: rolling std over 20 days.
2. Build a **nested if** rule:
   - If `ret > 0`:
       - If `volatility` is high → print `'UP but volatile'`
       - else → print `'UP and calm'`
   - Else:
       - If `ret < 0` print `'DOWN'`
       - else print `'FLAT'`

Do not optimize anything; this is purely control-flow practice.

In [9]:
# Solution
ret = pd.Series(dtype=float)
vol20 = pd.Series(dtype=float)

if isinstance(bvl, pd.DataFrame) and (not bvl.empty) and ("BVL_index" in bvl.columns):
    s = bvl.set_index("date")["BVL_index"].sort_index()
    ret = s.pct_change()
    vol20 = ret.rolling(20).std()

last_r = ret.dropna().iloc[-1] if ret.dropna().shape[0] > 0 else np.nan
last_v = vol20.dropna().iloc[-1] if vol20.dropna().shape[0] > 0 else np.nan
vol_high = False
if vol20.dropna().shape[0] > 0 and pd.notna(last_v):
    vol_high = last_v > vol20.median(skipna=True)

if pd.isna(last_r):
    print("No BVL return available.")
else:
    if last_r > 0:
        if vol_high:
            print("UP but volatile")
        else:
            print("UP and calm")
    else:
        if last_r < 0:
            print("DOWN")
        else:
            print("FLAT")


UP but volatile


## 4.2.   <a id='4.2.'> For Loops </a>

You will practice loops over arrays, lists, dictionaries, ranges, and DataFrames.

### 4.2.1. <a id='4.2.1.'> In numpy </a>

**Exercise 4.2.1 — Cumulative return with a loop (no vectorization)**

Using SPY daily returns (from `Close_SPY`):
1. Take the last 60 returns as a NumPy array.
2. Using a `for` loop, compute cumulative growth starting at 1.0:
   - update: `value *= (1 + r)`
3. Store the cumulative values in a list `path`.


In [10]:
# Solution
path = []
final_value = np.nan

if isinstance(us, pd.DataFrame) and (not us.empty) and ("Close_SPY" in us.columns):
    spy_ret = us["Close_SPY"].pct_change().dropna()
    arr = spy_ret.to_numpy()
    if arr.size > 0:
        arr = arr[-min(60, arr.size):]
        value = 1.0
        for r in arr:
            value *= (1 + float(r))
            path.append(value)
        final_value = float(value)

# Optional self-check
print("len(path):", len(path))
print("final_value:", final_value)


len(path): 60
final_value: 1.0123490329911564


### 4.2.2 <a id='4.2.2.'> In List </a>

**Exercise 4.2.2 — Loop over tickers and compute last close**

Using the `tickers` list and the `us` DataFrame:
1. Loop over tickers.
2. For each ticker, pick its last non-missing `Close_<T>`.
3. Store results in a list of tuples: `(ticker, last_close)`.
4. If a ticker column is missing, use `continue`.


In [11]:
# Solution
last_close_list = []

if isinstance(us, pd.DataFrame) and (not us.empty):
    for t in tickers:
        col = f"Close_{t}"
        if col not in us.columns:
            continue
        series = us[col].dropna()
        if series.shape[0] == 0:
            continue
        last_close = float(series.iloc[-1])
        last_close_list.append((t, last_close))

# Optional self-check
print("last_close_list (first 3):", last_close_list[:3])


last_close_list (first 3): [('SPY', 671.4000244140625), ('AAPL', 271.8399963378906), ('MSFT', 476.1199951171875)]


### 4.2.3  <a id='4.2.3.'>In Dictionary </a>

**Exercise 4.2.3 — Build a dictionary of risk flags**

1. Create a dict `risk_by_ticker = {}`.
2. Loop over `last_close_list` (from 4.2.2).
3. Assign `'HIGH_PRICE'` if last_close is above its own cross-ticker median, else `'LOW_PRICE'`.
4. Store: `risk_by_ticker[ticker] = label`.


In [12]:
# Solution
risk_by_ticker = {}

if isinstance(last_close_list, list) and len(last_close_list) > 0:
    vals = [v for (_, v) in last_close_list if pd.notna(v)]
    med = float(np.median(vals)) if len(vals) > 0 else np.nan
    for t, v in last_close_list:
        if pd.isna(v) or pd.isna(med):
            label = "UNKNOWN"
        else:
            label = "HIGH_PRICE" if v > med else "LOW_PRICE"
        risk_by_ticker[t] = label

# Optional self-check
print(risk_by_ticker)


{'SPY': 'HIGH_PRICE', 'AAPL': 'LOW_PRICE', 'MSFT': 'HIGH_PRICE', 'JPM': 'LOW_PRICE', 'NVDA': 'LOW_PRICE'}


### 4.2.4 <a id = '4.2.4.'>  For loop using range </a>

**Exercise 4.2.4 — Simple monthly budgeting with `range`**

Goal: create a monthly savings path (control flow only).

1. Choose `months = 12` and `monthly_saving = 200`.
2. Use `for i in range(months)` to build a list with cumulative savings.
3. Add an `if` inside the loop: every 3 months add a bonus of 50.


In [13]:
# Solution
months = 12
monthly_saving = 200
savings_path = []

total = 0
for i in range(months):
    total += monthly_saving
    if (i + 1) % 3 == 0:
        total += 50
    savings_path.append(total)

# Optional self-check
print("savings_path:", savings_path)


savings_path: [200, 400, 650, 850, 1050, 1300, 1500, 1700, 1950, 2150, 2350, 2600]


### 4.2.5 <a id='4.2.5.'>  Nested For Loop </a>

**Exercise 4.2.5 — Pairwise comparison (nested loop)**

Using the tickers list:
1. Build all **unique pairs** `(i, j)` with `i < j`.
2. For each pair, compare their last close and store the ticker with the higher value.
3. Save outputs in a list `winners`.

This is intentionally simple to focus on nested loops.

In [14]:
# Solution
winners = []

# Build a lookup of last close for quick access
last_close = {t: v for (t, v) in last_close_list} if isinstance(last_close_list, list) else {}

for i in range(len(tickers)):
    for j in range(i + 1, len(tickers)):
        ti, tj = tickers[i], tickers[j]
        vi, vj = last_close.get(ti, np.nan), last_close.get(tj, np.nan)

        if pd.isna(vi) and pd.isna(vj):
            continue
        if pd.isna(vi):
            winners.append(tj)
        elif pd.isna(vj):
            winners.append(ti)
        else:
            winners.append(ti if vi >= vj else tj)

# Optional self-check
print("winners (first 5):", winners[:5])


winners (first 5): ['SPY', 'SPY', 'SPY', 'SPY', 'MSFT']


### 4.2.6. <a id = '4.2.6.'> Iterations over Pandas</a>

#### Exercise — Rename Columns (finance version)

Using `us`:
1. Copy `us` to `us2`.
2. Rename columns so that:
   - `Close_SPY` → `close_spy`
   - `Volume_SPY` → `volume_spy`
   - similarly for other tickers
3. Do this **with a loop**, not by writing each rename manually.

Store the final DataFrame in `us2`.

In [15]:
# Solution
us2 = us.copy()

rename_map = {}
for col in us2.columns:
    if "_" in col:
        prefix, ticker = col.split("_", 1)
        rename_map[col] = f"{prefix.lower()}_{ticker.lower()}"
    else:
        rename_map[col] = col.lower()

us2 = us2.rename(columns=rename_map)

# Optional self-check
print("Original cols (first 6):", list(us.columns)[:6])
print("New cols (first 6):", list(us2.columns)[:6])


Original cols (first 6): ['Close_AAPL', 'Close_JPM', 'Close_MSFT', 'Close_NVDA', 'Close_SPY', 'Volume_AAPL']
New cols (first 6): ['close_aapl', 'close_jpm', 'close_msft', 'close_nvda', 'close_spy', 'volume_aapl']


## 4.3. <a id = '4.3.'> Pass, Continue, Break, Try</a>

These statements help control loop execution and handle errors.

### 4.3.1 <a id = '4.3.1.'> Pass</a>

**Exercise 4.3.1 — Placeholder rule**

Write a loop over tickers that *would* apply a rule, but for now uses `pass` when the ticker is not `'SPY'`. When the ticker is `'SPY'`, print its last close.


In [16]:
# Solution
# Use last_close_list from 4.2.2 if available

# Create a lookup for convenience
lookup = {t: v for (t, v) in last_close_list} if isinstance(last_close_list, list) else {}

for t in tickers:
    if t != "SPY":
        pass
    else:
        v = lookup.get("SPY", np.nan)
        print("SPY last close:", v)


SPY last close: 671.4000244140625


### 4.3.2. <a id = '4.3.2.'>Continue</a>

**Exercise 4.3.2 — Skip short histories**

1. Loop over tickers.
2. For each ticker, count non-missing price observations.
3. If fewer than 500 observations, `continue`.
4. Otherwise, store the ticker in `enough_data`.


In [17]:
# Solution
enough_data = []

if isinstance(us, pd.DataFrame) and (not us.empty):
    for t in tickers:
        col = f"Close_{t}"
        if col not in us.columns:
            continue
        n = int(us[col].dropna().shape[0])
        if n < 500:
            continue
        enough_data.append(t)

# Optional self-check
print("enough_data:", enough_data)


enough_data: ['SPY', 'AAPL', 'MSFT', 'JPM', 'NVDA']


### 4.3.3. <a id = '4.3.3.'>Break</a>

**Exercise 4.3.3 — Stop when a drawdown threshold is hit**

Using SPY returns:
1. Walk forward day-by-day.
2. Track a running `peak` of the cumulative value.
3. Compute drawdown = `value / peak - 1`.
4. If drawdown falls below `-0.10` (−10%), `break`.
5. Record the date where you stop.


In [18]:
# Solution
stop_date = None

if isinstance(us, pd.DataFrame) and (not us.empty) and ("Close_SPY" in us.columns):
    spy_ret = us["Close_SPY"].pct_change().dropna()
    if spy_ret.shape[0] > 0:
        value = 1.0
        peak = 1.0
        for dt, r in spy_ret.items():
            value *= (1 + float(r))
            peak = max(peak, value)
            dd = value / peak - 1.0
            if dd < -0.10:
                stop_date = dt
                break

print("stop_date:", stop_date)


stop_date: 2022-02-22 00:00:00


### 4.3.4. <a id = '4.3.4.'> Try </a>

**Exercise 4.3.4 — Try/Except with a real API (SEC)**

The SEC requires a **User-Agent**. You can put your email in it.

Tasks:
1. Set `SEC_USER_AGENT`.
2. Fetch Apple company facts (`CIK 0000320193`).
3. In a `try` block, navigate the nested dict to find the section `facts`.
4. If something is missing, handle with `except` and set `facts = {}`.

Do not extract any final metric here—just practice safe access.

In [19]:
# Solution
SEC_USER_AGENT = "Python Finance Course (student@example.com)"  # change to your own email if you run this locally

apple = sec_companyfacts("0000320193", user_agent=SEC_USER_AGENT)

facts = {}
try:
    if isinstance(apple, dict):
        facts = apple.get("facts", {})
    if not isinstance(facts, dict):
        facts = {}
except Exception:
    facts = {}

print("facts keys (if available):", list(facts.keys())[:5] if isinstance(facts, dict) else None)


facts keys (if available): ['dei', 'us-gaap']


## 4.4. <a id='4.4.'> While Loop </a>

Practice `while` loops with a simple finance simulation using real-return samples.

### 4.4.1. <a id='4.4.1.'> Structure </a>

**Exercise 4.4.1 — Reach a target portfolio value**

Using SPY daily returns:
1. Create a list/array of historical daily returns.
2. Start with `value = 1.0`.
3. While `value < 1.2` (target +20%), repeatedly:
   - draw one return at random (fixed seed)
   - update value
   - increment `steps`
4. Add a safety stop: if `steps > 2000`, break.

Store the final `steps` and `value`.

In [20]:
# Solution
steps = 0
value = 1.0

if isinstance(us, pd.DataFrame) and (not us.empty) and ("Close_SPY" in us.columns):
    spy_ret = us["Close_SPY"].pct_change().dropna().to_numpy()
    if spy_ret.size > 0:
        rng = np.random.default_rng(123)
        while value < 1.2:
            r = float(rng.choice(spy_ret))
            value *= (1 + r)
            steps += 1
            if steps > 2000:
                break

print("steps:", steps)
print("value:", value)


steps: 669
value: 1.2059536907831272


## 4.5. <a id = '4.5.'> References </a>

- BCRPData API: https://estadisticas.bcrp.gob.pe/estadisticas/series/ayuda/api
- yfinance docs: https://ranaroussi.github.io/yfinance/
- SEC EDGAR Data APIs: https://www.sec.gov/search-filings/edgar-application-programming-interfaces
- Python control flow: https://docs.python.org/3/tutorial/controlflow.html
- Pandas pct_change: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.pct_change.html
