# Practical Exercise 7.01: Factor Evolution

In [None]:
import pandas as pd
import numpy as np
import requests
import zipfile
from datetime import datetime
import matplotlib.pyplot as plt


In [None]:
#FF Data Library file download

url = 'https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_5_Factors_2x3_daily_csv.zip'

# Specify the path where you want the file to be saved
zip_path1 = 'F-F_Research_Data_5_Factors_2x3_daily_csv.zip'

# Make HTTP request to download the file
response = requests.get(url)
with open(zip_path1, 'wb') as f:
            f.write(response.content)
            print(f"File downloaded and saved as: {zip_path1}")

with zipfile.ZipFile(zip_path1, 'r') as z:

    # List the names of files in the ZIP to identify the CSV
    csv_files = [f for f in z.namelist() if f.endswith('.csv')]
    csv_file_name = csv_files[0]
    z.extract(csv_file_name, '.')
    print(f"File '{csv_file_name}' extracted correctly.")


In [None]:
# Load the CSV file, assuming the first column should be named 'Date'
# and skipping the header

filepath='F-F_Research_Data_5_Factors_2x3_daily.csv'
factors = pd.read_csv(filepath, skiprows=3, low_memory=False)
factors.rename(columns={factors.columns[0]: 'Date'}, inplace=True)
factors.tail()


In [None]:
# Momentum factor

# Momentum factor
url = 'https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Momentum_Factor_daily_csv.zip'

# Specify the path where you want the file to be saved
zip_path2 = 'F-F_Momentum_Factor_daily_csv.zip'

# Make HTTP request to download the file
response = requests.get(url)
with open(zip_path2, 'wb') as f:
            f.write(response.content)
            print(f"File downloaded and saved as: {zip_path2}")

with zipfile.ZipFile(zip_path2, 'r') as z:

    # List the names of files in the ZIP to identify the CSV
    csv_files2 = [f for f in z.namelist() if f.endswith('.csv')]
    csv_file_name2 = csv_files2[0]
    z.extract(csv_file_name2, '.')
    print(f"File '{csv_file_name2}' extracted correctly.")
Output:
File downloaded and saved as: F-F_Momentum_Factor_daily_csv.zip
File 'F-F_Momentum_Factor_daily.csv' extracted correctly.


In [None]:
# --- Load and clean the Momentum file ---
filepath2 = 'F-F_Momentum_Factor_daily.csv'

# Load the CSV file, skipping the non-data header rows
momentum = pd.read_csv(filepath2, skiprows=13, low_memory=False)
momentum.rename(columns={momentum.columns[0]: 'Date'}, inplace=True)

# Extract only 8-digit date strings (YYYYMMDD)
momentum['Date'] = (
    momentum['Date']
    .astype(str)
    .str.extract(r'(\d{8})', expand=False)
)

# Convert 'Date' to datetime format and drop invalid rows
momentum['Date'] = pd.to_datetime(momentum['Date'], format='%Y%m%d', errors='coerce')
momentum.dropna(subset=['Date'], inplace=True)


# --- Clean the Fama-French 5 Factors file ---
df1 = pd.DataFrame(factors).copy()
df1.columns = df1.columns.str.strip()  # remove extra spaces in column names

# Rename the date column if it appears with a different name (e.g., 'DATE')
if 'Date' not in df1.columns:
    date_col = [c for c in df1.columns if c.lower() == 'date']
    if date_col:
        df1.rename(columns={date_col[0]: 'Date'}, inplace=True)

# Extract and convert date values to datetime
df1['Date'] = (
    df1['Date']
    .astype(str)
    .str.extract(r'(\d{8})', expand=False)
)
df1['Date'] = pd.to_datetime(df1['Date'], format='%Y%m%d', errors='coerce')
df1.dropna(subset=['Date'], inplace=True)

# --- Merge both datasets on 'Date' ---
df_combined = pd.merge(df1, momentum, on='Date', how='outer')

# --- Sort and clean the merged DataFrame ---
df_combined = df_combined.sort_values(by='Date').dropna()

# --- Display the result ---
print(df_combined.tail())


In [None]:
# Drop 'ColumnName'
df_combined.drop('RF', axis=1, inplace=True)
df_combined.head()

# Create a replacement dictionary
replacement = {
    'Mkt-RF': 'Market',
    'SMB': 'Size',
    'HML': 'Value',
    'RMW': 'Quality',
    'CMA': 'Investment',
    'Mom   ': 'Momentum',
    }
# Rename headings
df_combined.rename(columns=replacement, inplace=True)

# See the new headings
df_combined.tail()


In [None]:
#Filtering data by date
start_date = pd.to_datetime('19800701',format='%Y%m%d')
end_date = pd.to_datetime('20240531',format='%Y%m%d')
try:
    filtered_factors = df_combined[(df_combined['Date'] >= start_date) & (df_combined['Date'] <= end_date)]
except TypeError as e:
    print("TypeError encountered:", e)
    print("Re-checking the data types...")
    print(data.dtypes)  # This will help identify if there's still a data type issue.

# Initialize base 100 for each factor
accumulated_factors = filtered_factors.copy()

# Start each column (factor) at 100 and accumulate daily changes
for column in accumulated_factors.columns[1:]:
    accumulated_factors[column] = 100 + accumulated_factors[column].cumsum()

# Graph the cumulative evolution of each factor.
plt.figure(figsize=(10, 6))
for column in accumulated_factors.columns[1:]:
    plt.plot(accumulated_factors['Date'], accumulated_factors[column], label=column)

# Add title and tags
plt.title('Cumulative Evolution of Factors with Base 100')
plt.xlabel('Date')
plt.ylabel('Index(Base 100)')
plt.legend()
plt.grid(True)
