# $\text{Code (Vomit)} \; \mid \; \, : \text{P}$

### $ 1. \; \text{Collection (of Data)} $

In [1]:
import yfinance as yf
import pandas as pd

# List of ETF tickers
tickers = ['FXE', 'EWJ', 'GLD', 'QQQ', 'SPY', 'SHV', 'DBA', 'USO', 'XBI', 'ILF', 'EPP', 'FEZ']

# Download historical data for each ETF
start_date = '2007-03-01'
end_date = '2024-03-31'
data = yf.download(tickers, start=start_date, end=end_date)

# Only keep the adjusted close prices
adj_close = data['Adj Close']

# Save data to CSV
adj_close.to_csv('etf_prices.csv')

[*********************100%%**********************]  12 of 12 completed


### $ 2. \; \text{Construction (of)} \; \mathcal{THE} \; \, \textbf{Factor Model} $

In [6]:
import pandas as pd

# Load the Fama-French factors CSV
file_path = './F-F_Research_Data_Factors_daily.CSV'

# Read the CSV with specified delimiter and skip initial rows if necessary
try:
    # Check if there are any header rows to skip
    with open(file_path, 'r') as file:
        lines = file.readlines()
        for i, line in enumerate(lines[:10]):  # Inspect the first 10 lines
            print(f"Line {i + 1}: {line}")

    # Adjust the skiprows parameter based on the output
    ff_data = pd.read_csv(file_path, skiprows=4, index_col=0)
    ff_data.index = pd.to_datetime(ff_data.index, format='%Y%m%d')
    ff_data = ff_data.loc['2007-03-01':'2024-03-31']

    print(ff_data.head())  # Display the first few rows to verify
except Exception as e:
    print(f"Error reading the CSV file: {e}")

Line 1: This file was created by CMPT_ME_BEME_RETS_DAILY using the 202403 CRSP database.

Line 2: The Tbill return is the simple daily rate that, over the number of trading days

Line 3: in the month, compounds to 1-month TBill rate from Ibbotson and Associates Inc.

Line 4: 

Line 5: ,Mkt-RF,SMB,HML,RF

Line 6: 19260701,    0.10,   -0.25,   -0.27,   0.009

Line 7: 19260702,    0.45,   -0.33,   -0.06,   0.009

Line 8: 19260706,    0.17,    0.30,   -0.39,   0.009

Line 9: 19260707,    0.09,   -0.58,    0.02,   0.009

Line 10: 19260708,    0.21,   -0.38,    0.19,   0.009

Error reading the CSV file: time data "Copyright 2024 Kenneth R. French" doesn't match format "%Y%m%d", at position 25710. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You 

In [4]:
import statsmodels.api as sm

def fama_french_factors(start_date, end_date):
    # Load Fama-French factors (can be downloaded from Kenneth French's data library)
    ff_data = pd.read_csv('F-F_Research_Data_Factors_daily.csv', index_col=0)
    ff_data.index = pd.to_datetime(ff_data.index, format='%Y%m%d')
    ff_data = ff_data[(ff_data.index >= start_date) & (ff_data.index <= end_date)]
    return ff_data

# Load ETF data
etf_data = pd.read_csv('etf_prices.csv', index_col=0, parse_dates=True)
etf_returns = etf_data.pct_change().dropna()

# Load Fama-French factors
ff_factors = fama_french_factors(start_date, end_date)

# Merge ETF returns with Fama-French factors
merged_data = etf_returns.join(ff_factors, how='inner')

# Estimate factor loadings for each ETF
factor_loadings = {}
for ticker in tickers:
    model = sm.OLS(merged_data[ticker], sm.add_constant(merged_data[['Mkt-RF', 'SMB', 'HML']]))
    results = model.fit()
    factor_loadings[ticker] = results.params

# Convert factor loadings to a DataFrame
factor_loadings_df = pd.DataFrame(factor_loadings).T
factor_loadings_df.columns = ['Alpha', 'Mkt-RF', 'SMB', 'HML']
factor_loadings_df.to_csv('factor_loadings.csv')

ParserError: Error tokenizing data. C error: Expected 2 fields in line 5, saw 5
