# Sandbox to explore TIAA-CREF fund holdings

## author:
- **David W. Hogg** (NYU)

## notes:
- Trying to scrape SEC for data.
- Leaning heavily on `claude.ai` for help.

In [None]:
# !pip install pandas datetime

In [None]:
import requests
import zipfile
import io
import pandas as pd
from datetime import datetime

In [None]:
# TIAA-CREF Funds trust CIK (the umbrella trust that contains TISCX)
# You can verify at: https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&company=tiaa-cref+funds&type=NPORT-P
TRUST_CIK = "0001084380"          # TIAA-CREF Funds
SERIES_NAME_FRAGMENT = "SOCIAL"   # case-insensitive match

# Which quarter to pull?  Format: YYYY and QN  (e.g. 2024, "q3")
# The SEC posts data ~60 days after quarter end.
YEAR  = 2024
QUARTER = 3 # q1=Jan-Mar, q2=Apr-Jun, q3=Jul-Sep, q4=Oct-Dec

OUTPUT_CSV = "tiscx_holdings.csv"

In [None]:
url = f"https://www.sec.gov/files/dera/data/form-n-port-data-sets/{YEAR}q{QUARTER}_nport.zip"
headers = {"User-Agent": "your-name your@email.com"}   # SEC requires this

print(f"Downloading: {url}")
r = requests.get(url, headers=headers, timeout=180)
r.raise_for_status()
print(f"Downloaded {len(r.content)/1e6:.1f} MB")

z = zipfile.ZipFile(io.BytesIO(r.content))
print("Files in zip:", z.namelist())

In [None]:
def read_tsv(z, name_fragment):
    candidates = [f for f in z.namelist() if name_fragment.upper() in f.upper()]
    if not candidates:
        raise FileNotFoundError(f"No file matching '{name_fragment}'. Available: {z.namelist()}")
    print(f"  Reading {candidates[0]} ...")
    with z.open(candidates[0]) as f:
        return pd.read_csv(f, sep="\t", dtype=str, low_memory=False)

sub      = read_tsv(z, "SUBMISSION")
reg      = read_tsv(z, "REGISTRANT")
fund     = read_tsv(z, "FUND_REPORTED_INFO")

print("SUBMISSION columns:", sub.columns.tolist())
print("REGISTRANT columns:",       reg.columns.tolist())
print("FUND_REPORTED_INFO columns:", fund.columns.tolist())

In [None]:
# Search all fund names in FUND_REPORTED_INFO for "social choice"
mask = fund["SERIES_NAME"].str.upper().str.contains("SOCIAL CHOICE", na=False)
print(fund[mask][["ACCESSION_NUMBER", "SERIES_NAME"]].drop_duplicates().to_string())

In [None]:
# Also search registrant for any TIAA/CREF related CIKs
mask2 = reg["REGISTRANT_NAME"].str.upper().str.contains("TIAA|CREF|TEACHERS", na=False)
print(reg[mask2][["CIK", "REGISTRANT_NAME", "ACCESSION_NUMBER"]].drop_duplicates("CIK").to_string())

In [None]:
holdings = hold[hold["ACCESSION_NUMBER"] == accession].copy()
print(f"Holdings rows: {len(holdings)}")

keep = {
    "ISSUER_NAME":      "Issuer",
    "ISSUE_TITLE":      "Security Title",
    "CUSIP":            "CUSIP",
    "ISIN":             "ISIN",
    "BALANCE":          "Shares/Par",
    "UNIT":             "Unit",
    "CUR_CD":           "Currency",
    "VALUATION":        "Value (USD)",
    "PCT_VAL":          "Pct of Net Assets",
    "PAYOFF_PROFILE":   "Long/Short",
    "ASSET_CAT":        "Asset Category",
    "ISSUER_TYPE":      "Issuer Type",
    "FAIR_VALUE_LEVEL": "Fair Value Level",
}
existing = {k: v for k, v in keep.items() if k in holdings.columns}
out = holdings[list(existing.keys())].rename(columns=existing)

if "Pct of Net Assets" in out.columns:
    out["Pct of Net Assets"] = pd.to_numeric(out["Pct of Net Assets"], errors="coerce")
    out = out.sort_values("Pct of Net Assets", ascending=False)

out.to_csv(OUTPUT_CSV, index=False)
print(f"\n✅  Saved {len(out)} holdings to '{OUTPUT_CSV}'")
print(out.head(10).to_string())

In [None]:
accession = "0001752724-24-196431"

hold = read_tsv(z, "FUND_REPORTED_HOLDING")
holdings = hold[hold["ACCESSION_NUMBER"] == accession].copy()
print(f"Holdings rows: {len(holdings)}")

keep = {
    "ISSUER_NAME":      "Issuer",
    "ISSUE_TITLE":      "Security Title",
    "CUSIP":            "CUSIP",
    "ISIN":             "ISIN",
    "BALANCE":          "Shares/Par",
    "UNIT":             "Unit",
    "CUR_CD":           "Currency",
    "VALUATION":        "Value (USD)",
    "PCT_VAL":          "Pct of Net Assets",
    "PAYOFF_PROFILE":   "Long/Short",
    "ASSET_CAT":        "Asset Category",
    "ISSUER_TYPE":      "Issuer Type",
    "FAIR_VALUE_LEVEL": "Fair Value Level",
}

existing = {k: v for k, v in keep.items() if k in holdings.columns}
out = holdings[list(existing.keys())].rename(columns=existing)

if "Pct of Net Assets" in out.columns:
    out["Pct of Net Assets"] = pd.to_numeric(out["Pct of Net Assets"], errors="coerce")
    out = out.sort_values("Pct of Net Assets", ascending=False)

out.to_csv(OUTPUT_CSV, index=False)
print(f"\n✅  Saved {len(out)} holdings to '{OUTPUT_CSV}'")
print(out.head(10).to_string())