In [8]:
# install setuptools to provide pkg_resources (fix ModuleNotFoundError in Jupyter)
%pip install setuptools
# install pandas
%pip install pandas
# install vnstock
%pip install vnstock

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Collecting vnstock
  Using cached vnstock-3.2.6-py3-none-any.whl.metadata (42 kB)
Collecting requests (from vnstock)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting beautifulsoup4 (from vnstock)
  Downloading beautifulsoup4-4.13.5-py3-none-any.whl.metadata (3.8 kB)
Collecting seaborn (from vnstock)
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting openpyxl (from vnstock)
  Using cached openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting pydantic (from vnstock)
  Downloading pydantic-2.11.9-py3-none-any.whl.metadata (68 kB)
Collecting fake_useragent (from vnstock)
  Using cached fake_useragent-2.2.0-py3-none-any.whl.metadata (17 kB)
Collecting vnstock_ezchart (from vnstock)
  Using cached vnstock_ezchart-0.0.2-py3-none-any.whl.metadata (6.6 kB)
Collecting click (from vnstock)
  Using cached 

In [10]:
import sys
import subprocess
import pkg_resources
import datetime
import pandas as pd
import numpy as np
import vnstock
import json
import os

# Cell 0: fetch past month VN30 + constituents data using vnstock and compute beta for each stock
# Install vnstock if not present

required = {"vnstock"}
installed = {pkg.key for pkg in pkg_resources.working_set}
missing = required - installed
if missing:
    subprocess.check_call([sys.executable, "-m", "pip", "install", *missing], stdout=subprocess.DEVNULL)


# Helpers that try several common vnstock function names (different versions expose different APIs)
def try_call(obj, fnames, *args, **kwargs):
    for fn in fnames:
        if hasattr(obj, fn):
            try:
                return getattr(obj, fn)(*args, **kwargs)
            except TypeError:
                # maybe different signature: try without kwargs
                return getattr(obj, fn)(*args)
    raise RuntimeError(f"None of {fnames} available on object {obj}")

# Note: vnstock does not provide a reliable get_vn30_constituents API across versions
# so we prefer a local `vn30.json` file. If it's not present, we raise an error
# instructing the user to provide the file.

def get_history_close(symbol, start_date, end_date):
    # Try various possible function names and output formats; return Series indexed by date with close prices
    callers = ["get_historical_data", "get_stock_historical", "get_history", "get_historical_prices", "get_price", "history"]
    out = try_call(vnstock, callers, symbol, start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))
    # Normalize outputs
    if isinstance(out, pd.DataFrame):
        for col in ("close", "Close", "ClosePrice", "closing_price"):
            if col in out.columns:
                s = out[col].copy()
                s.index = pd.to_datetime(out.index)
                s.name = symbol
                return s
        # if dataframe has price as last column
        s = out.iloc[:, -1].copy()
        s.index = pd.to_datetime(out.index)
        s.name = symbol
        return s
    if isinstance(out, pd.Series):
        out.index = pd.to_datetime(out.index)
        out.name = symbol
        return out
    # if API returned dict-like with 'historical' key
    if isinstance(out, dict) and "historical" in out:
        df = pd.DataFrame(out["historical"])
        for col in ("close", "Close"):
            if col in df.columns:
                s = df[col].copy()
                if "date" in df.columns:
                    s.index = pd.to_datetime(df["date"])
                s.name = symbol
                return s
    raise RuntimeError(f"Unable to parse historical data for {symbol}")

# date range: past calendar month (last 30 days)
end = pd.to_datetime("today").normalize()
start = end - pd.Timedelta(days=30)

# fetch VN30 constituents — prefer local vn30.json, fallback to vnstock
symbols = None
for p in ("quantvn/vn30.json", "vn30.json"):
    if os.path.exists(p):
        try:
            with open(p, "r", encoding="utf-8") as f:
                data = json.load(f)
            if isinstance(data, (list, tuple)):
                symbols = [str(x) for x in data]
            elif isinstance(data, dict):
                for key in ("symbols", "members", "constituents", "vn30", "VN30"):
                    if key in data and isinstance(data[key], (list, tuple)):
                        symbols = [str(x) for x in data[key]]
                        break
                if symbols is None:
                    symbols = [str(k) for k in data.keys()]
            if symbols:
                break
        except Exception:
            symbols = None
            continue
if not symbols:
    raise RuntimeError("VN30 symbols file not found or unreadable. Please add 'quantvn/vn30.json' with a list of symbols or a dict containing a 'symbols' list.")

# fetch VN30 index series (some vnstock versions use index code 'VN30' or 'VNINDEX' etc.)
try:
    idx_series = get_history_close("VN30", start, end)
except Exception:
    # try common VN30 index symbol variations
    for alt in ("VN30F1M", "VN30INDEX", "VNINDEX", "VN30.HNX"):
        try:
            idx_series = get_history_close(alt, start, end)
            break
        except Exception:
            idx_series = None
    if idx_series is None:
        raise RuntimeError("Could not fetch VN30 index series; adjust the index symbol for your vnstock version")

# fetch each stock close series and build DataFrame
close_df = pd.DataFrame({ "VN30": idx_series })
for s in symbols:
    try:
        s_series = get_history_close(s, start, end)
        close_df[s] = s_series
    except Exception:
        # skip symbols that fail to fetch
        continue

# align by date and forward-fill/backfill as reasonable (or drop NA)
close_df = close_df.sort_index().ffill().bfill()

# compute daily returns
returns = close_df.pct_change().dropna(how="all").dropna(axis=0, how="any")

# ensure VN30 present
if "VN30" not in returns.columns:
    raise RuntimeError("VN30 series not present in returns after fetching and cleaning")

index_returns = returns["VN30"]

# compute beta for each stock: cov(R_stock, R_index)/var(R_index)
betas = {}
for col in returns.columns:
    if col == "VN30":
        continue
    r = returns[col]
    # align
    common = r.index.intersection(index_returns.index)
    r_aligned = r.loc[common]
    i_aligned = index_returns.loc[common]
    if len(common) < 5:
        betas[col] = np.nan
        continue
    cov = np.cov(r_aligned.values, i_aligned.values, ddof=1)[0, 1]
    var = np.var(i_aligned.values, ddof=1)
    betas[col] = cov / var if var != 0 else np.nan

beta_df = pd.DataFrame.from_dict(betas, orient="index", columns=["beta"]).sort_values("beta", ascending=False)

# show results
print(beta_df)

RuntimeError: Could not fetch VN30 index series; adjust the index symbol for your vnstock version