## Setup

In [3]:
# install dependencies
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [None]:
# variables
KURTOSIS_WINDOW = "180D"
DATA_FILE = 'crsp.csv'
FAMA_FRENCH_FACTORS = 'F-F_Research_Data_Factors.csv'

In [4]:
# preprocessing

src = pd.read_csv(DATA_FILE)

src = src[['permno', 'ticker', 'comnam', 'date', 'prc', 'ret']]

src['ret'] = pd.to_numeric(src['ret'], errors='coerce')
src = src.dropna(subset=['ret'])

src['date'] = pd.to_datetime(src['date'], format='%Y%m%d')

FileNotFoundError: [Errno 2] No such file or directory: 'csrp.csv'

In [None]:
# define functions
def calculate_kurtosis(frame: pd.DataFrame):
    frame['kurtosis'] = None 

    for _, group in frame.groupby('permno'):
        k = group.set_index('date')['ret'].rolling(window=KURTOSIS_WINDOW).kurt()
        frame.loc[group.index, 'kurtosis'] = k.values

    return frame.dropna(subset=['kurtosis'])

def assign_quantiles(data: pd.DataFrame, field):
    sorted_df = data.sort_values(by=field)
    sorted_df['quantile'] = pd.qcut(sorted_df[field], q=10, labels=False)
    return sorted_df

def process_quantiles(data: pd.DataFrame, f):
    data = assign_quantiles(data, "kurtosis")

    for quantile in range(10):
        quantile_data = data[data['quantile'] == quantile]
        f(quantile_data)


In [None]:
# creating portfolios

In [None]:
# define famma french
def fama_french(data: pd.DataFrame):
    # Load Fama-French factors
    factors = pd.read_csv(FAMA_FRENCH_FACTORS)
    factors['Date'] = pd.to_datetime(factors['Date'], format='%Y%m')
    
    data['month_year'] = data['date'].dt.to_period('M')
    factors['month_year'] = factors['Date'].dt.to_period('M')

    factors = factors[factors['month_year'].isin(data['month_year'])]

    # Keep only necessary columns and ensure consistent naming
    factors = factors[['month_year', 'MKT_RF', 'SMB', 'HML', 'RF']]

    print(factors.head())
    # Adjust scale of factors
    factors['MKT_RF'] = factors['MKT_RF'] / 100
    factors['SMB'] = factors['SMB'] / 100
    factors['HML'] = factors['HML'] / 100
    factors['RF'] = factors['RF'] / 100

    # Merge Fama-French factors with stock data
    regression_in = pd.merge(data, factors, on='month_year', how='left')

    # Compute excess return
    regression_in['excess_return'] = regression_in['ret'] - regression_in['RF']

    # Prepare regression variables
    X = regression_in[['MKT_RF', 'SMB', 'HML']]
    X = X.apply(pd.to_numeric, errors='raise')
    X = sm.add_constant(X, has_constant='add')

    y = pd.to_numeric(regression_in['excess_return'], errors='raise')

    # Run regression
    model = sm.OLS(y, X).fit()
    return model

In [None]:
# portfolio evaluation
def rate_portfolio(portfolio: pd.DataFrame):
    # ovaj portfolio za mjesec n+1
    fama_french(portfolio)

## Analysis

In [None]:
# calculate kurtosis
df = calculate_kurtosis(src)

In [None]:
# Extract unique months
df['month_year'] = df['date'].dt.to_period('M')
df = df[df['month_year'] >= pd.Period('1926-07', freq='M')]
months = df['month_year'].unique()

# Iterate over unique months
for month in months:
    month_data = df[df['month_year'] == month]
    # process_quantiles(month_data, rate_portfolio)

In [None]:

def plot_kurtosis_over_time_limited_filtered(data, limit=20):
    plt.figure(figsize=(12, 6))
    filtered_data = data[(data['date'] >= '2010-01-01') & (data['date'] <= '2025-12-31')]
    for permno, group in filtered_data.groupby('permno'):
        plt.plot(group['date'], group['kurtosis'], label=f'Permno {permno}')
        limit -= 1
        if limit == 0:
            break
    plt.xlabel('Date')
    plt.ylabel('Kurtosis')
    plt.title('Kurtosis Over Time for 500 Permno (2010-2025)')
    plt.legend(loc='upper right', fontsize='small', ncol=2)
    plt.show()

# Call the function to plot
plot_kurtosis_over_time_limited_filtered(df)