## Setup

In [None]:
# install dependencies
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [None]:
# variables
KURTOSIS_WINDOW = "1825D"
DATA_FILE = 'crsp.csv'
FAMA_FRENCH_FACTORS = 'F-F_Research_Data_Factors.csv'

In [None]:
# preprocessing

src = pd.read_csv(DATA_FILE)

src = src[['permno', 'comnam', 'date', 'prc', 'ret', 'dlret']]

src['ret'] = pd.to_numeric(src['ret'], errors='coerce')
src['date'] = pd.to_datetime(src['date'], format='%Y%m%d')

In [None]:
def assign_quantiles(data: pd.DataFrame, field):
    sorted_df = data.sort_values(by=field)
    sorted_df['quantile'] = pd.qcut(sorted_df[field], q=10, labels=False)
    return sorted_df

In [None]:
def calculate_kurtosis(frame: pd.DataFrame):
    frame['kurtosis'] = None 

    for _, group in frame.groupby('permno'):
        k = group.set_index('date')['ret'].rolling(window=KURTOSIS_WINDOW).kurt()
        frame.loc[group.index, 'kurtosis'] = k.values

    return frame.dropna(subset=['kurtosis'])

def assign_quantiles(data: pd.DataFrame, field):
    sorted_df = data.sort_values(by=field)
    sorted_df['quantile'] = pd.qcut(sorted_df[field], q=10, labels=False)
    return sorted_df

In [None]:
def mean_return(portfolio: pd.DataFrame):
    return portfolio['ret'].mean()

In [None]:
# creating portfolios for next month
def create_portfolio(data: pd.DataFrame, long: list, short: list):
    quantiles = assign_quantiles(data, "kurtosis")

    portfolio = pd.concat(quantiles[quantiles['quantile'] == q] for q in (long + short))
    portfolio = portfolio[['permno', 'comnam', 'date']]

    return portfolio 

In [None]:
# define famma french
def fama_french(data: pd.DataFrame):
    # Load Fama-French factors
    factors = pd.read_csv(FAMA_FRENCH_FACTORS)
    factors['Date'] = pd.to_datetime(factors['Date'], format='%Y%m')
    
    data['month_year'] = data['date'].dt.to_period('M')
    factors['month_year'] = factors['Date'].dt.to_period('M')

    factors = factors[factors['month_year'].isin(data['month_year'])]

    # Keep only necessary columns and ensure consistent naming
    factors = factors[['month_year', 'MKT_RF', 'SMB', 'HML', 'RF']]

    # Adjust scale of factors
    factors['MKT_RF'] = factors['MKT_RF'] / 100
    factors['SMB'] = factors['SMB'] / 100
    factors['HML'] = factors['HML'] / 100
    factors['RF'] = factors['RF'] / 100

    # Merge Fama-French factors with stock data
    regression_in = pd.merge(data, factors, on='month_year', how='left')

    # Compute excess return
    regression_in['excess_return'] = regression_in['ret'] - regression_in['RF']

    # Prepare regression variables
    X = regression_in[['MKT_RF', 'SMB', 'HML']]
    X = X.apply(pd.to_numeric, errors='raise')
    X = sm.add_constant(X, has_constant='add')

    y = pd.to_numeric(regression_in['excess_return'], errors='raise')

    # Run regression
    model = sm.OLS(y, X).fit()
    return model

In [None]:
# get return in next month
def get_return(portfolio: pd.DataFrame, next_month: pd.DataFrame):
    portfolio = pd.merge(portfolio, next_month[['permno', 'ret', 'dlret']], on='permno', how='left')

    portfolio['ret'] = pd.to_numeric(portfolio['ret'], errors='coerce')
    portfolio['next_month_return'] = portfolio['ret'].fillna(portfolio['dlret']).fillna(0)

    return portfolio['next_month_return'].mean()

In [None]:
# define portfolio evaluation strategy
def evaluate_portfolio(portfolio: pd.DataFrame, next_month: pd.DataFrame):
    # ovaj portfolio za mjesec n+1

    return portfolio['next_month_return'].mean()

## Experiment

In [None]:
# calculate kurtosis
df = calculate_kurtosis(src)

In [None]:
# Extract unique months
df['month_year'] = df['date'].dt.to_period('M')
df = df[df['month_year'] >= pd.Period('1926-07', freq='M')]
months = df['month_year'].unique()


##### Plotted curtosis of 15 stocks (1990-2025)

In [None]:
def plot_kurtosis_over_time_limited_filtered(data, limit=15):
    plt.figure(figsize=(12, 5))  
    filtered_data = data[(data['date'] >= '1990-01-01') & (data['date'] <= '2025-12-31')]
    for permno, group in filtered_data.groupby('permno'):
        plt.plot(group['date'], group['kurtosis'], label=f'Permno {permno}')
        limit -= 1
        if limit == 0:
            break
    plt.xlabel('Date')
    plt.ylabel('Kurtosis')
    plt.title('Kurtosis Over Time for Limited Permno')
    plt.legend(loc='upper right', fontsize='small', ncol=2)
    plt.show()

# Plot kurtosis 
plot_kurtosis_over_time_limited_filtered(df)

##### Mean return of each quantile (all-time)

In [None]:
# measure mean return of quantiles through time 

for i in range(10):
    mean_returns = []

    # Iterate over unique months
    for month in months:
        month_data = df[df['month_year'] == month]
        quantiles = assign_quantiles(month_data, "kurtosis")
        mean_returns.append(quantiles[quantiles['quantile'] == i]['ret'].mean())

    print(f"Mean return (all time) - Q{i+1}: ", pd.Series(mean_returns).mean())

Appears that quantiles with higher curtosis, when viewed as portfolios, perform better than quantiles with lower curtosis.

#### Portfolio generation and evaluation

In [None]:
# Generate time series for port_small and port_large
portfolio_small_returns = []
portfolio_large_returns = []
quantile_returns = {q: [] for q in range(10)}

for month in months[:-1]:
    next_month = month + 1

    month_data = df[df['month_year'] == month].copy()
    next_month_data = df[df['month_year'] == next_month].copy()

    month_data = assign_quantiles(month_data, "kurtosis")

    portfolio_small = create_portfolio(month_data, [9], [0])
    portfolio_large = create_portfolio(month_data, [9, 8], [0, 1])

    # get quantile and portfolio returns
    for q in range(10):
        quantile_data = month_data[month_data['quantile'] == q]
        quantile_returns[q].append(
            (month, get_return(quantile_data[['permno', 'comnam', 'date']], next_month_data))
        )

    portfolio_small_returns.append((month, get_return(portfolio_small, next_month_data)))
    portfolio_large_returns.append((month, get_return(portfolio_large, next_month_data)))

# Create time series
port_small_series = pd.Series(
    dict(portfolio_small_returns)
).sort_index()

port_large_series = pd.Series(
    dict(portfolio_large_returns)
).sort_index()

quantile_series = {
    q: pd.Series(dict(returns)).sort_index() for q, returns in quantile_returns.items()
}


##### Misc

In [None]:
# generate portfolio for next month (from now)
month = months[-1]
month_data = df[df['month_year'] == month]

portfolio_small = create_portfolio(month_data, [9], [0])
portfolio_large = create_portfolio(month_data, [9,8], [0,1])

print("Portfolio Small:", portfolio_small.head())
print("Portfolio Large:", portfolio_large.head())