# Practical Exercise 7.03: Carhart 4-Factor Model

In [None]:
import pandas as pd
import requests
import zipfile
from io import BytesIO
import yfinance as yf
import statsmodels.api as sm

# Download Carhart factors (Fama-French 3 factors + Momentum)
def download_carhart_factors():
    ff_url = 'https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip'
    response_ff = requests.get(ff_url)
    with zipfile.ZipFile(BytesIO(response_ff.content)) as z:
        file_name = z.namelist()[0]
        with z.open(file_name) as f:
            ff_raw_lines = f.read().decode('utf-8').splitlines()
            ff_data_start = next((i for i, line in enumerate(ff_raw_lines) if line.strip() and line[0].isdigit()), None)
            ff_cleaned_data = '\n'.join(ff_raw_lines[ff_data_start:])
            fama_french_factors = pd.read_csv(
                BytesIO(ff_cleaned_data.encode('utf-8')),
                index_col=0, header=None, names=['Mkt-RF', 'SMB', 'HML', 'RF'], skip_blank_lines=True
            )

    mom_url = 'https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Momentum_Factor_daily_CSV.zip'
    response_mom = requests.get(mom_url)
    with zipfile.ZipFile(BytesIO(response_mom.content)) as z:
        file_name = z.namelist()[0]
        with z.open(file_name) as f:
            mom_raw_lines = f.read().decode('utf-8').splitlines()
            mom_data_start = next((i for i, line in enumerate(mom_raw_lines) if line.strip() and line[0].isdigit()), None)
            mom_cleaned_data = '\n'.join(mom_raw_lines[mom_data_start:])
            momentum_factor = pd.read_csv(
                BytesIO(mom_cleaned_data.encode('utf-8')),
                index_col=0, header=None, names=['MOM'], skip_blank_lines=True
            )

    fama_french_factors = fama_french_factors[fama_french_factors.index.astype(str).str.match(r'^\d{8}$')]
    fama_french_factors.index = pd.to_datetime(fama_french_factors.index, format='%Y%m%d', errors='coerce')
    fama_french_factors = fama_french_factors.dropna()

    momentum_factor = momentum_factor[momentum_factor.index.astype(str).str.match(r'^\d{8}$')]
    momentum_factor.index = pd.to_datetime(momentum_factor.index, format='%Y%m%d', errors='coerce')
    momentum_factor = momentum_factor.dropna()

    carhart_factors = pd.merge(fama_french_factors, momentum_factor, left_index=True, right_index=True, how='inner')

    return carhart_factors

def download_stock_data(symbols, start_date):
    data = yf.download(symbols, start=start_date, auto_adjust=False, actions=False)['Adj Close']
    return data.pct_change().dropna() * 100

def calculate_betas(stock_returns, factors):
    betas = pd.DataFrame(index=stock_returns.columns, columns=['Alpha'] + list(factors.columns.drop('RF')))
    for stock in stock_returns.columns:
        Y = stock_returns[stock].dropna()
        X = factors.drop(columns=['RF']).loc[Y.index].dropna()

        if X.empty or Y.empty:
            continue

        try:
            X = sm.add_constant(X)
            model = sm.OLS(Y, X).fit()
            betas.loc[stock, 'Alpha'] = model.params['const']
            for factor in factors.columns.drop('RF'):
                betas.loc[stock, factor] = model.params.get(factor, None)
        except Exception as e:
            print(f"Error processing {stock}: {e}")

    return betas.dropna()

def calculate_annualized_returns(betas, factors):
    avg_factors = factors.mean()
    trading_days = 252
    avg_factors_annualized = avg_factors * trading_days

    annualized_returns = pd.DataFrame(index=betas.index, columns=['Total Return'])
    for stock in betas.index:
        rf_annualized = factors['RF'].mean() * trading_days
        factor_contributions = (betas.loc[stock, betas.columns[1:]] * avg_factors_annualized[betas.columns[1:]]).sum()
        total_return = rf_annualized + factor_contributions

        annualized_returns.loc[stock, 'Total Return'] = total_return

    return annualized_returns

def decompose_returns(betas, factors):
    avg_factors = factors.mean()
    trading_days = 252
    avg_factors_annualized = avg_factors * trading_days

    decomposition = pd.DataFrame(index=betas.index, columns=factors.columns.drop('RF').tolist() + ['RF', 'Total'])
    for stock in betas.index:
        rf_annualized = factors['RF'].mean() * trading_days
        factor_contributions = betas.loc[stock, betas.columns[1:]] * avg_factors_annualized[betas.columns[1:]]
        total_return = rf_annualized + factor_contributions.sum()

        decomposition.loc[stock, factors.columns.drop('RF')] = factor_contributions
        decomposition.loc[stock, 'RF'] = rf_annualized
        decomposition.loc[stock, 'Total'] = total_return

    return decomposition

symbols = ['AAPL', 'AMZN', 'META', 'GOOGL', 'MSFT', 'NVDA', 'TSLA']
start_date = '2014-01-01'

carhart_factors = download_carhart_factors()
stock_returns = download_stock_data(symbols, start_date)

stock_returns.index = stock_returns.index.tz_localize(None)
carhart_factors.index = carhart_factors.index.tz_localize(None)
carhart_factors = carhart_factors.apply(pd.to_numeric, errors='coerce')

common_dates = stock_returns.index.intersection(carhart_factors.index)
stock_returns = stock_returns.loc[common_dates]
carhart_factors = carhart_factors.loc[common_dates]

stock_returns = stock_returns.dropna()
carhart_factors = carhart_factors.dropna()

betas = calculate_betas(stock_returns, carhart_factors)
annualized_returns = calculate_annualized_returns(betas, carhart_factors)
returns_decomposition = decompose_returns(betas, carhart_factors)

# Print results
print("Betas (including Alpha):")
print(betas)

print("\nAnnualized Returns:")
print(annualized_returns)

print("\nReturns Decomposition:")
print(returns_decomposition)
