In [1]:
pip install numpy pandas scipy statsmodels arch scikit-learn cvxpy pyportfolioopt numpy-financial matplotlib plotly seaborn empyrical quantstats yfinance pandas-datareader streamlit streamlit-option-menu streamlit-aggrid tqdm joblib linearmodels openpyxl

Collecting arch
  Downloading arch-8.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (13 kB)
Collecting pyportfolioopt
  Downloading pyportfolioopt-1.5.6-py3-none-any.whl.metadata (22 kB)
Collecting numpy-financial
  Downloading numpy_financial-1.0.0-py3-none-any.whl.metadata (2.2 kB)
Collecting empyrical
  Downloading empyrical-0.5.5.tar.gz (52 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.8/52.8 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metada

In [2]:
pip install ecos osqp

Collecting ecos
  Downloading ecos-2.0.14-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.0 kB)
Downloading ecos-2.0.14-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (222 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m222.1/222.1 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ecos
Successfully installed ecos-2.0.14


In [3]:
import os
import io
import requests
import zipfile
import numpy as np
import pandas as pd
import yfinance as yf
from pandas_datareader import data as pdr
from datetime import datetime



In [4]:
def download_price_data(tickers, start="2010-01-01", end=None):
    """
    Download daily adjusted close prices from Yahoo Finance
    """
    if end is None:
        end = datetime.today().strftime("%Y-%m-%d")

    data = yf.download(tickers, start=start, end=end, auto_adjust=True)

    if isinstance(data.columns, pd.MultiIndex):
        prices = data["Close"]
    else:
        prices = data

    prices = prices.dropna(how="all")
    return prices


In [5]:
def download_risk_free_rate(series="TB3MS", start="2010-01-01"):
    """
    Download 3-month Treasury Bill rate from FRED
    """
    rf = pdr.DataReader(series, "fred", start)
    rf = rf / 100  # convert to decimal
    rf = rf.resample("D").ffill()
    return rf


In [6]:
def download_fama_french_factors():
    """
    Download Fama-French 5 factors daily data
    """
    url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_5_Factors_2x3_daily_CSV.zip"

    response = requests.get(url)
    z = zipfile.ZipFile(io.BytesIO(response.content))

    file_name = z.namelist()[0]
    df = pd.read_csv(z.open(file_name), skiprows=3)

    df = df.rename(columns={"Unnamed: 0": "Date"})
    df = df[df["Date"].str.isnumeric()]
    df["Date"] = pd.to_datetime(df["Date"], format="%Y%m%d")
    df = df.set_index("Date")

    df = df.astype(float) / 100
    return df


In [7]:
def get_financial_statements(ticker):
    """
    Extract income statement, balance sheet, cash flow
    """
    stock = yf.Ticker(ticker)

    income_stmt = stock.financials.T
    balance_sheet = stock.balance_sheet.T
    cash_flow = stock.cashflow.T

    return {
        "income_statement": income_stmt,
        "balance_sheet": balance_sheet,
        "cash_flow": cash_flow
    }


In [8]:
def get_earnings_dates(ticker):
    stock = yf.Ticker(ticker)
    earnings = stock.get_earnings_dates(limit=12)
    return earnings


In [9]:
def download_carbon_price(start="2010-01-01"):
    carbon = yf.download("KRBN", start=start, auto_adjust=True)
    carbon = carbon["Close"]
    return carbon


In [10]:
def download_macro_data():
    series = {
        "CPI": "CPIAUCSL",
        "VIX": "^VIX",
        "FEDFUNDS": "FEDFUNDS"
    }

    macro = {}

    # CPI & Fed Funds from FRED
    for key in ["CPI", "FEDFUNDS"]:
        macro[key] = pdr.DataReader(series[key], "fred", "2010-01-01")

    # VIX from Yahoo
    vix = yf.download(series["VIX"], start="2010-01-01")
    macro["VIX"] = vix["Close"]

    return macro


In [11]:
def generate_synthetic_esg(tickers, seed=42):
    np.random.seed(seed)

    esg_data = pd.DataFrame(index=tickers)

    esg_data["Environmental"] = np.clip(np.random.normal(60, 15, len(tickers)), 0, 100)
    esg_data["Social"] = np.clip(np.random.normal(58, 12, len(tickers)), 0, 100)
    esg_data["Governance"] = np.clip(np.random.normal(65, 10, len(tickers)), 0, 100)

    esg_data["Composite_ESG"] = (
        0.4 * esg_data["Governance"] +
        0.3 * esg_data["Environmental"] +
        0.3 * esg_data["Social"]
    )

    return esg_data


In [12]:
def compute_log_returns(prices):
    returns = np.log(prices / prices.shift(1))
    return returns.dropna()


In [13]:
def save_to_csv(df, filename, folder="data/raw"):
    os.makedirs(folder, exist_ok=True)
    df.to_csv(os.path.join(folder, filename))


In [15]:
tickers = ["AAPL", "MSFT", "JPM", "XOM", "NVDA"]

prices = download_price_data(tickers)
returns = compute_log_returns(prices)
rf = download_risk_free_rate()
ff_factors = download_fama_french_factors()
carbon = download_carbon_price()
macro = download_macro_data()
esg = generate_synthetic_esg(tickers)

save_to_csv(prices, "prices.csv")
save_to_csv(returns, "returns.csv")
save_to_csv(rf, "risk_free.csv")
save_to_csv(ff_factors, "ff_factors.csv")
save_to_csv(carbon, "carbon_price.csv")
save_to_csv(esg, "esg_scores.csv")

print("Data extraction complete.")

[*********************100%***********************]  5 of 5 completed
[*********************100%***********************]  1 of 1 completed
  vix = yf.download(series["VIX"], start="2010-01-01")
[*********************100%***********************]  1 of 1 completed


Data extraction complete.


In [17]:
pip install arch

Collecting arch
  Using cached arch-8.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (13 kB)
Downloading arch-8.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (981 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.3/981.3 kB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: arch
Successfully installed arch-8.0.0


In [18]:
import numpy as np
import pandas as pd
from sklearn.covariance import LedoitWolf
from arch import arch_model

class RiskEngine:

    def __init__(self, returns):
        self.returns = returns

    def historical_covariance(self):
        return self.returns.cov()

    def ledoit_wolf_covariance(self):
        lw = LedoitWolf()
        lw.fit(self.returns.values)
        return pd.DataFrame(lw.covariance_,
                            index=self.returns.columns,
                            columns=self.returns.columns)

    def garch_volatility(self, asset):
        """
        Univariate GARCH(1,1) forecast
        """
        am = arch_model(self.returns[asset]*100, vol="Garch", p=1, q=1)
        res = am.fit(disp="off")
        forecast = res.forecast(horizon=1)
        variance = forecast.variance.iloc[-1, 0]
        return np.sqrt(variance) / 100


In [19]:
import statsmodels.api as sm
import pandas as pd

class FactorModel:

    def __init__(self, returns, factors):
        self.returns = returns
        self.factors = factors

    def fama_french_expected_returns(self):
        expected_returns = {}

        for asset in self.returns.columns:
            y = self.returns[asset].dropna()
            X = self.factors.loc[y.index]
            X = sm.add_constant(X)

            model = sm.OLS(y, X).fit()
            betas = model.params

            factor_means = self.factors.mean()
            er = betas[0] + (betas[1:] * factor_means).sum()

            expected_returns[asset] = er

        return pd.Series(expected_returns)
