# Regression Analysis PSKY Vs the Fama-French Market Model
Reference: kenneth french data library<br>
U.S. Research Returns Data (Downloadable Files)<br>
https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html<br>
Daily Returns: July 1, 1926 - November 30, 2025<br>

## Single Factor Models with the Fama-Rrench Market Portfolio

In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('F-F_Research_Data_Factors_daily.csv', skiprows=4, skipfooter=1, parse_dates=[0], index_col=0, engine='python')

In [3]:
df

Unnamed: 0,Mkt-RF,SMB,HML,RF
1926-07-01,0.09,-0.25,-0.27,0.01
1926-07-02,0.45,-0.33,-0.06,0.01
1926-07-06,0.17,0.30,-0.39,0.01
1926-07-07,0.09,-0.58,0.02,0.01
1926-07-08,0.22,-0.38,0.19,0.01
...,...,...,...,...
2025-11-21,1.03,1.66,0.74,0.02
2025-11-24,1.61,0.30,-0.96,0.02
2025-11-25,1.04,1.65,0.04,0.02
2025-11-26,0.69,-0.06,-0.07,0.02


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 26129 entries, 1926-07-01 to 2025-11-28
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Mkt-RF  26129 non-null  float64
 1   SMB     26129 non-null  float64
 2   HML     26129 non-null  float64
 3   RF      26129 non-null  float64
dtypes: float64(4)
memory usage: 1020.7 KB


#### Single Factor Model

In [5]:
df = df.loc["2010":"2025", ['Mkt-RF', 'RF']]

In [6]:
df

Unnamed: 0,Mkt-RF,RF
2010-01-04,1.69,0.00
2010-01-05,0.31,0.00
2010-01-06,0.13,0.00
2010-01-07,0.40,0.00
2010-01-08,0.33,0.00
...,...,...
2025-11-21,1.03,0.02
2025-11-24,1.61,0.02
2025-11-25,1.04,0.02
2025-11-26,0.69,0.02


In [None]:
psky = yf.download('PSKY', start='2010-01-01', end='2025-11-29', multi_level_index=False) #set multi_level_index to False to avoid hierarchical column index from yfinance version 0.2.48

[*********************100%***********************]  1 of 1 completed


In [11]:
psky

Unnamed: 0_level_0,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,10.735095,10.848096,10.531693,10.704961,6710900
2010-01-05,10.682361,10.833028,10.584426,10.682361,5441000
2010-01-06,10.343358,10.727562,10.298158,10.554294,14566200
2010-01-07,10.546761,10.735096,10.237891,10.381026,9000300
2010-01-08,10.659763,10.885766,10.456362,10.539229,6732900
...,...,...,...,...,...
2025-11-21,15.829351,15.978779,15.400992,15.640076,5483100
2025-11-24,15.002520,15.839313,14.942748,15.829352,13558300
2025-11-25,15.580306,15.610191,15.042367,15.092176,4552500
2025-11-26,15.729733,15.789504,15.391030,15.620153,6666200


In [15]:
psky = psky.loc[:, 'Close']

In [16]:
psky

Date
2010-01-04    10.735095
2010-01-05    10.682361
2010-01-06    10.343358
2010-01-07    10.546761
2010-01-08    10.659763
                ...    
2025-11-21    15.829351
2025-11-24    15.002520
2025-11-25    15.580306
2025-11-26    15.729733
2025-11-28    15.958856
Name: Close, Length: 4002, dtype: float64

In [17]:
psky = psky.pct_change().dropna()

In [18]:
psky

Date
2010-01-05   -0.004912
2010-01-06   -0.031735
2010-01-07    0.019665
2010-01-08    0.010714
2010-01-11   -0.004241
                ...   
2025-11-21    0.013393
2025-11-24   -0.052234
2025-11-25    0.038513
2025-11-26    0.009591
2025-11-28    0.014566
Name: Close, Length: 4001, dtype: float64

In [None]:
# adjust to percentage to match Fama-French Mkt-RF scale
psky = psky * 100

In [20]:
# merge psky returns with Fama-French factors
df['PSKY'] = psky

In [21]:
df = df.dropna()

In [22]:
df

Unnamed: 0,Mkt-RF,RF,PSKY
2010-01-05,0.31,0.00,-0.491233
2010-01-06,0.13,0.00,-3.173480
2010-01-07,0.40,0.00,1.966504
2010-01-08,0.33,0.00,1.071445
2010-01-11,0.13,0.00,-0.424063
...,...,...,...
2025-11-21,1.03,0.02,1.339283
2025-11-24,1.61,0.02,-5.223409
2025-11-25,1.04,0.02,3.851263
2025-11-26,0.69,0.02,0.959073


In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4001 entries, 2010-01-05 to 2025-11-28
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Mkt-RF  4001 non-null   float64
 1   RF      4001 non-null   float64
 2   PSKY    4001 non-null   float64
dtypes: float64(3)
memory usage: 125.0 KB


In [24]:
df.describe()

Unnamed: 0,Mkt-RF,RF,PSKY
count,4001.0,4001.0,4001.0
mean,0.053319,0.005381,0.046576
std,1.124072,0.0079,2.707133
min,-12.01,0.0,-28.353
25%,-0.4,0.0,-1.215694
50%,0.08,0.0,0.045184
75%,0.59,0.01,1.226273
max,9.65,0.02,36.736547


In [26]:
# rename Mkt-RF to MktPrem
df = df.rename(columns={'Mkt-RF': 'MktPrem'})

In [27]:
df

Unnamed: 0,MktPrem,RF,PSKY
2010-01-05,0.31,0.00,-0.491233
2010-01-06,0.13,0.00,-3.173480
2010-01-07,0.40,0.00,1.966504
2010-01-08,0.33,0.00,1.071445
2010-01-11,0.13,0.00,-0.424063
...,...,...,...
2025-11-21,1.03,0.02,1.339283
2025-11-24,1.61,0.02,-5.223409
2025-11-25,1.04,0.02,3.851263
2025-11-26,0.69,0.02,0.959073


In [28]:
# add return of the market portfolio
df['Mkt'] = df['MktPrem'] + df['RF']

In [29]:
# add premium of PSKY over risk-free rate
df['PSKYPrem'] = df['PSKY'] - df['RF']

In [30]:
df

Unnamed: 0,MktPrem,RF,PSKY,Mkt,PSKYPrem
2010-01-05,0.31,0.00,-0.491233,0.31,-0.491233
2010-01-06,0.13,0.00,-3.173480,0.13,-3.173480
2010-01-07,0.40,0.00,1.966504,0.40,1.966504
2010-01-08,0.33,0.00,1.071445,0.33,1.071445
2010-01-11,0.13,0.00,-0.424063,0.13,-0.424063
...,...,...,...,...,...
2025-11-21,1.03,0.02,1.339283,1.05,1.319283
2025-11-24,1.61,0.02,-5.223409,1.63,-5.243409
2025-11-25,1.04,0.02,3.851263,1.06,3.831263
2025-11-26,0.69,0.02,0.959073,0.71,0.939073
