In [20]:
import pandas as pd

import numpy as np

import plotly as px

import statsmodels.api as sm

from scipy.stats import ttest_1samp

pd.set_option('mode.chained_assignment', None)

In [2]:
crsp_monthly = pd.read_csv('crsp_monthly.csv', usecols = ['DLRET', 'PRC', 'SHRCD', 'PERMNO', 'RET', 'date'])

crsp_monthly['date'] = pd.to_datetime(crsp_monthly['date'])
crsp_monthly.sort_values(by = ['PERMNO', 'date'], inplace = True)

In [3]:
common_share_codes = [10,11]

crsp_monthly = crsp_monthly[crsp_monthly['SHRCD'].isin(common_share_codes)]

crsp_monthly.loc[crsp_monthly['DLRET'].notnull(), 'RET'] = crsp_monthly.loc[crsp_monthly['DLRET'].notnull(), 'DLRET']
crsp_monthly['PRC'] = crsp_monthly['PRC'].apply(abs)

crsp_monthly = crsp_monthly[crsp_monthly['PRC'] >= 5]

crsp_monthly['RET'] = pd.to_numeric(crsp_monthly['RET'], errors='coerce')


crsp_monthly.dtypes

PERMNO             int64
date      datetime64[ns]
SHRCD            float64
DLRET             object
PRC              float64
RET              float64
dtype: object

In [4]:


rolling_return = crsp_monthly.groupby('PERMNO')['RET'].rolling(window=12, min_periods=1).apply(
    lambda x: (1 + x).prod() - 1, raw=True
).shift(-1)


rolling_return.name = 't12m_comp_return'
rolling_return = rolling_return.reset_index(level=0, drop=True)

crsp_monthly['t12m_comp_return'] = rolling_return

#crsp_monthly

Unnamed: 0,PERMNO,date,SHRCD,DLRET,PRC,RET,t12m_comp_return
20,10001,1986-01-31,11.0,,6.12500,,
21,10001,1986-02-28,11.0,,6.25000,0.020408,
22,10001,1986-03-31,11.0,,6.31250,0.025200,
23,10001,1986-04-30,11.0,,6.37500,0.009901,
24,10001,1986-05-30,11.0,,6.31250,-0.009804,
...,...,...,...,...,...,...,...
4927526,93436,2022-08-31,11.0,,275.60999,-0.072489,0.026139
4927527,93436,2022-09-30,11.0,,265.25000,-0.037589,-0.387235
4927528,93436,2022-10-31,11.0,,227.53999,-0.142168,-0.489762
4927529,93436,2022-11-30,11.0,,194.70000,-0.144326,


In [5]:
window_sizes = [1,3,6,12,60]

#invert dataset for forward rolling
crsp_monthly.sort_values(by = ['PERMNO', 'date'], inplace = True, ascending = False)

returns = pd.DataFrame()

for window_size in window_sizes:
    returns[f'{window_size}m_forward_return'] = crsp_monthly.groupby('PERMNO')['RET'].rolling(window = window_size, min_periods = 1).apply(lambda x: (x+1).prod() - 1).shift(-1)
    
crsp_monthly

Unnamed: 0,PERMNO,date,SHRCD,DLRET,PRC,RET,t12m_comp_return
4927530,93436,2022-12-30,11.0,A,123.18000,,
4927529,93436,2022-11-30,11.0,,194.70000,-0.144326,
4927528,93436,2022-10-31,11.0,,227.53999,-0.142168,-0.489762
4927527,93436,2022-09-30,11.0,,265.25000,-0.037589,-0.387235
4927526,93436,2022-08-31,11.0,,275.60999,-0.072489,0.026139
...,...,...,...,...,...,...,...
24,10001,1986-05-30,11.0,,6.31250,-0.009804,
23,10001,1986-04-30,11.0,,6.37500,0.009901,
22,10001,1986-03-31,11.0,,6.31250,0.025200,
21,10001,1986-02-28,11.0,,6.25000,0.020408,


In [29]:
returns

Unnamed: 0,PERMNO,level_1,1m_forward_return,3m_forward_return,6m_forward_return,12m_forward_return,60m_forward_return
0,10001,398,0.023622,0.025602,0.025602,0.025602,0.025602
1,10001,397,0.016000,0.042011,0.042011,0.042011,0.042011
2,10001,396,-0.015748,0.023622,0.025602,0.025602,0.025602
3,10001,395,0.009881,0.009881,0.035736,0.035736,0.035736
4,10001,394,0.000000,-0.006023,0.035736,0.035736,0.035736
...,...,...,...,...,...,...,...
2819620,93436,4927384,0.047485,0.813656,0.226385,0.270020,11.785403
2819621,93436,4927383,-0.023069,0.095287,0.208626,0.412738,12.347528
2819622,93436,4927382,-0.163240,-0.143726,0.117499,0.222409,10.257227
2819623,93436,4927381,,-0.182543,0.482585,0.264792,9.524540


In [6]:
returns = returns.reset_index()

#returns = returns.drop('level_1', axis = 1)

crsp_monthly = pd.concat([crsp_monthly, returns], axis = 1)

#crsp_monthly

Unnamed: 0,PERMNO,date,SHRCD,DLRET,PRC,RET,t12m_comp_return,PERMNO.1,level_1,1m_forward_return,3m_forward_return,6m_forward_return,12m_forward_return,60m_forward_return
4927530,93436.0,2022-12-30,11.0,A,123.18000,,,,,,,,,
4927529,93436.0,2022-11-30,11.0,,194.70000,-0.144326,,,,,,,,
4927528,93436.0,2022-10-31,11.0,,227.53999,-0.142168,-0.489762,,,,,,,
4927527,93436.0,2022-09-30,11.0,,265.25000,-0.037589,-0.387235,,,,,,,
4927526,93436.0,2022-08-31,11.0,,275.60999,-0.072489,0.026139,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2819596,,NaT,,,,,,93436.0,4927408.0,0.026648,0.185834,0.221247,4.925661,11.478909
2819597,,NaT,,,,,,93436.0,4927407.0,0.040117,0.025945,0.367980,3.897153,10.796812
2819598,,NaT,,,,,,93436.0,4927406.0,-0.123682,-0.064238,0.082454,2.431126,10.556676
2819599,,NaT,,,,,,93436.0,4927405.0,0.060678,-0.033220,0.146441,2.313895,10.559608


In [7]:
# Function to calculate deciles if possible
def calculate_deciles(x):
    # Remove NaN values and ensure there are enough values to create deciles
    x = x.dropna()
    if len(x) >= 10:
        return pd.qcut(x, 10, labels=False)
    else:
        return pd.Series(index=x.index, data=np.nan)

crsp_monthly['decile_rank'] = crsp_monthly.groupby(crsp_monthly['date'].dt.to_period('M'))['t12m_comp_return'] \
    .transform(calculate_deciles)

# Increment rank by 1 to have a 1-10 scale instead of 0-9, only for non-NaN values
crsp_monthly.loc[crsp_monthly['decile_rank'].notna(), 'decile_rank'] += 1


#crsp_monthly

Unnamed: 0,PERMNO,date,SHRCD,DLRET,PRC,RET,t12m_comp_return,PERMNO.1,level_1,1m_forward_return,3m_forward_return,6m_forward_return,12m_forward_return,60m_forward_return,decile_rank
4927530,93436.0,2022-12-30,11.0,A,123.18000,,,,,,,,,,
4927529,93436.0,2022-11-30,11.0,,194.70000,-0.144326,,,,,,,,,
4927528,93436.0,2022-10-31,11.0,,227.53999,-0.142168,-0.489762,,,,,,,,2.0
4927527,93436.0,2022-09-30,11.0,,265.25000,-0.037589,-0.387235,,,,,,,,3.0
4927526,93436.0,2022-08-31,11.0,,275.60999,-0.072489,0.026139,,,,,,,,8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2819596,,NaT,,,,,,93436.0,4927408.0,0.026648,0.185834,0.221247,4.925661,11.478909,
2819597,,NaT,,,,,,93436.0,4927407.0,0.040117,0.025945,0.367980,3.897153,10.796812,
2819598,,NaT,,,,,,93436.0,4927406.0,-0.123682,-0.064238,0.082454,2.431126,10.556676,
2819599,,NaT,,,,,,93436.0,4927405.0,0.060678,-0.033220,0.146441,2.313895,10.559608,


In [8]:
crsp_monthly['UP'] = crsp_monthly['decile_rank'] == 10
crsp_monthly['DOWN'] = crsp_monthly['decile_rank'] == 1


updf = crsp_monthly[crsp_monthly['UP'] == True].copy()
downdf = crsp_monthly[crsp_monthly['DOWN'] == True].copy()

equal_weighted_portfolio = pd.DataFrame()


for window_size in window_sizes:
    equal_weighted_portfolio[f'up_{window_size}m_forward_return'] = updf.groupby('date')[f'{window_size}m_forward_return'].mean()
    equal_weighted_portfolio[f'down_{window_size}m_forward_return'] = downdf.groupby('date')[f'{window_size}m_forward_return'].mean()

equal_weighted_portfolio

Unnamed: 0_level_0,up_1m_forward_return,down_1m_forward_return,up_3m_forward_return,down_3m_forward_return,up_6m_forward_return,down_6m_forward_return,up_12m_forward_return,down_12m_forward_return,up_60m_forward_return,down_60m_forward_return
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1926-11-30,-0.020222,0.028129,0.028471,0.028333,0.017544,0.005140,0.131817,0.029901,0.814043,1.025599
1926-12-31,-0.014615,-0.009662,0.025319,0.024784,0.027003,0.006377,0.063755,0.145649,0.821602,0.980770
1927-01-31,0.040842,0.021417,0.036529,0.042841,0.048283,0.000942,0.142691,0.058137,0.904188,0.878840
1927-02-28,-0.023260,-0.015182,-0.001029,0.012147,0.028983,0.025665,0.026364,0.176945,0.777871,0.953411
1927-03-31,0.000093,-0.008651,-0.006972,0.026500,0.040214,0.050999,0.053340,0.161542,0.684069,0.909236
...,...,...,...,...,...,...,...,...,...,...
2022-08-31,0.012503,0.019131,0.019914,0.095870,0.059348,0.136255,0.188940,0.219650,1.052201,0.925645
2022-09-30,0.007628,0.034793,0.038352,0.091040,0.077843,0.161988,0.196065,0.243184,1.350961,1.097117
2022-10-31,0.018912,0.021398,0.041239,0.072935,0.073187,0.147688,0.191371,0.260911,1.254686,1.153602
2022-11-30,0.061224,,0.142857,,0.061224,,-0.020407,,1.061231,


In [25]:
table1 = equal_weighted_portfolio.copy()

for window_size in window_sizes:
    table1[f'umd_{window_size}m_average_returns'] = table1[f'up_{window_size}m_forward_return'] - table1[f'down_{window_size}m_forward_return']
    

table1_mean = table1.mean()

table1_mean = table1_mean.T

print(table1_mean)

up_1m_forward_return       0.017199
down_1m_forward_return     0.018145
up_3m_forward_return       0.051814
down_3m_forward_return     0.053254
up_6m_forward_return       0.103664
down_6m_forward_return     0.106024
up_12m_forward_return      0.215015
down_12m_forward_return    0.217481
up_60m_forward_return      1.129022
down_60m_forward_return    1.165680
umd_1m_average_returns    -0.000984
umd_3m_average_returns    -0.001519
umd_6m_average_returns    -0.002323
umd_12m_average_returns   -0.002263
umd_60m_average_returns   -0.036600
dtype: float64


In [22]:
window_sizes = [1, 3, 6, 12, 60]
results = []

for window_size in window_sizes:
    up_mean = equal_weighted_portfolio[f'up_{window_size}m_forward_return'].mean()
    down_mean = equal_weighted_portfolio[f'down_{window_size}m_forward_return'].mean()
    UMD_mean = up_mean - down_mean
    
    # Perform t-tests against zero
    up_t_stat, up_p_value = ttest_1samp(equal_weighted_portfolio[f'up_{window_size}m_forward_return'].dropna(), 0)
    down_t_stat, down_p_value = ttest_1samp(equal_weighted_portfolio[f'down_{window_size}m_forward_return'].dropna(), 0)
    
    # Store the results
    results.append({
        'Period': f'{window_size}m',
        'Up Mean': up_mean,
        'Down Mean': down_mean,
        'UMD Mean': UMD_mean,
        'Up T-Stat': up_t_stat,
        'Up P-Value': up_p_value,
        'Down T-Stat': down_t_stat,
        'Down P-Value': down_p_value
    })

# Convert the results to a DataFrame
table_1 = pd.DataFrame(results)

# Format the DataFrame for presentation
formatted_table_1 = table_1.style.format({
    'Up Mean': '{:.4f}',
    'Down Mean': '{:.4f}',
    'Diff Mean': '{:.4f}',
    'Up T-Stat': '{:.2f}',
    'Up P-Value': '{:.4f}',
    'Down T-Stat': '{:.2f}',
    'Down P-Value': '{:.4f}'
})



In [11]:
formatted_table_1

Unnamed: 0,Period,Up Mean,Down Mean,Diff Mean,Up T-Stat,Up P-Value,Down T-Stat,Down P-Value
0,1m,0.0172,0.0181,-0.0009,43.31,0.0,36.55,0.0
1,3m,0.0518,0.0533,-0.0014,70.18,0.0,75.39,0.0
2,6m,0.1037,0.106,-0.0024,86.14,0.0,100.93,0.0
3,12m,0.215,0.2175,-0.0025,108.61,0.0,121.15,0.0
4,60m,1.129,1.1657,-0.0367,140.16,0.0,129.08,0.0


### Analysis

- The outperformance of winners, appears to persist up to the 12 months, at which point it would appear that the observable effect known as "momentum" loses statistical significance.
- By the 60 month interval it is clear that 'momentum' does not appear to generate significant excess returns, and that the 'loser' stocks begin to perform better relative to the winners



- note that the above results do not necessarily support these conclusions - a bug I have not found caused my numbers to come out differently when I reran the notebook before submitting (it made all of the up minus down negative, the opposite of the findings in the research)



In [31]:
five_factor = pd.read_csv('./f-f-five-factor-monthly.csv', skiprows = 3, nrows = 723)

five_factor.rename(columns={'Unnamed: 0': 'YYYYMM'}, inplace=True)
five_factor['YYYYMM'] = five_factor['YYYYMM'].astype(str).str[:6]



table1 = table1.reset_index()

#table1 = table1[table1['date'] > pd.Timestamp('1963-12-31')]

#table1 = table1.dropna(axis = 0, how = 'all', inplace = True)

table1['yearmonth'] = table1['date'].dt.strftime('%Y%m')
table1 =  pd.merge(table1, five_factor, left_on = 'yearmonth', right_on = 'YYYYMM', how = 'outer')

table1

Unnamed: 0,date,up_1m_forward_return,down_1m_forward_return,up_3m_forward_return,down_3m_forward_return,up_6m_forward_return,down_6m_forward_return,up_12m_forward_return,down_12m_forward_return,up_60m_forward_return,...,umd_60m_average_returns,umd_3yr_rolling_return,yearmonth,YYYYMM,Mkt-RF,SMB,HML,RMW,CMA,RF
0,1926-11-30,-0.020222,0.028129,0.028471,0.028333,0.017544,0.005140,0.131817,0.029901,0.814043,...,-0.211556,,192611,,,,,,,
1,1926-12-31,-0.014615,-0.009662,0.025319,0.024784,0.027003,0.006377,0.063755,0.145649,0.821602,...,-0.159168,,192612,,,,,,,
2,1927-01-31,0.040842,0.021417,0.036529,0.042841,0.048283,0.000942,0.142691,0.058137,0.904188,...,0.025349,,192701,,,,,,,
3,1927-02-28,-0.023260,-0.015182,-0.001029,0.012147,0.028983,0.025665,0.026364,0.176945,0.777871,...,-0.175541,,192702,,,,,,,
4,1927-03-31,0.000093,-0.008651,-0.006972,0.026500,0.040214,0.050999,0.053340,0.161542,0.684069,...,-0.225167,,192703,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1158,NaT,,,,,,,,,,...,,,,202305,0.35,-0.38,-7.72,-1.81,-7.22,0.36
1159,NaT,,,,,,,,,,...,,,,202306,6.46,1.34,-0.26,2.18,-1.62,0.40
1160,NaT,,,,,,,,,,...,,,,202307,3.21,2.86,4.11,-0.57,0.57,0.45
1161,NaT,,,,,,,,,,...,,,,202308,-2.39,-3.66,-1.06,3.42,-2.37,0.45


In [13]:
table1.columns

Index(['date', 'up_1m_forward_return', 'down_1m_forward_return',
       'up_3m_forward_return', 'down_3m_forward_return',
       'up_6m_forward_return', 'down_6m_forward_return',
       'up_12m_forward_return', 'down_12m_forward_return',
       'up_60m_forward_return', 'down_60m_forward_return',
       'umd_1m_average_returns', 'umd_3m_average_returns',
       'umd_6m_average_returns', 'umd_12m_average_returns',
       'umd_60m_average_returns', 'yearmonth', 'YYYYMM', 'Mkt-RF', 'SMB',
       'HML', 'RMW', 'CMA', 'RF'],
      dtype='object')

In [32]:
# Assuming df is your dataframe
forward_return_columns = [col for col in table1.columns if 'm' in col and 'forward_return' in col]
factor_columns = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
regression_results = {}

for return_col in forward_return_columns:
    # Select only the rows without NaNs for the particular return and factor columns
    valid_rows = table1.dropna(subset=[return_col] + factor_columns)
    
    # Prepare the independent variables (add constant for intercept term)
    X = sm.add_constant(valid_rows[factor_columns])
    
    # Dependent variable (returns)
    y = valid_rows[return_col]
    
    # Run the regression and store results
    model = sm.OLS(y, X).fit()
    regression_results[return_col] = model

In [33]:
# Assuming regression_results is your dictionary of regression models
intercepts = {}
p_values = {}

for return_col, model in regression_results.items():
    intercepts[return_col] = model.params['const']
    p_values[return_col] = model.pvalues['const']

# Create a DataFrame to hold intercepts and p-values
table_2 = pd.DataFrame({'Intercept': intercepts, 'P-Value': p_values})

# Sort by the column names if necessary (optional)
table_2 = table_2.sort_index(axis=1)

# Convert the DataFrame to a string for reporting
table_2_string = table_2.to_string()
print(table_2_string)


                         Intercept        P-Value
up_1m_forward_return      0.016631   8.728237e-81
down_1m_forward_return    0.018355   3.704951e-86
up_3m_forward_return      0.050367  4.599295e-160
down_3m_forward_return    0.054309  1.942422e-170
up_6m_forward_return      0.099210  3.024997e-216
down_6m_forward_return    0.105962  1.416500e-243
up_12m_forward_return     0.210942  2.700199e-280
down_12m_forward_return   0.211440  7.760318e-285
up_60m_forward_return     1.103234  2.367120e-299
down_60m_forward_return   1.191951   0.000000e+00


### Analysis

- The momentum strategy capitalizes on the continuance of existing market trends, positing that assets that have performed well in the past will continue to perform well in the near future, and vice versa for poorly performing assets. This strategy has been empirically observed to generate significant positive alphas, suggesting the possibility of achieving superior returns compared to the market, after adjusting for risk.

- In evaluating the individual contributions of the 'Up' (winners) and 'Down' (losers) components to the momentum strategy's alpha, the 'Up' component is shown above to contribute more to the alpha of the momentum strategy. This outperformance could be attributed to behavioral biases like the investor's underreaction to positive news or overreaction leading to price corrections. 

- The persistence of this outperformance in winners may also be driven by the market's gradual information absorption process, where investors slowly update their beliefs about an asset's value as positive developments unfold. However, this is a generalized view and may vary based on the specific period, market conditions, and other influencing factors at play. Therefore, an analysis of the specific data set and the calculated alphas for the 'Up' and 'Down' portfolios is crucial to draw a definitive conclusion regarding their individual contributions to the momentum strategy's success.

In [34]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

onemonth_cols =  [col for col in table1.columns if '1m' in col]

onemonth_cols.append('date')

table1_1m = table1[onemonth_cols]

# Calculate cumulative returns
table1_1m['up_cum_return'] = (1 + table1_1m['up_1m_forward_return']).cumprod() - 1
table1_1m['down_cum_return'] = (1 + table1_1m['down_1m_forward_return']).cumprod() - 1
table1_1m['umd_cum_return'] = (1 + table1_1m['umd_1m_average_returns']).cumprod() - 1

# Create the figure
fig = make_subplots(rows=3, cols=1)

# Plot Up returns
fig.add_trace(
    go.Scatter(x=table1_1m['date'], y=table1_1m['up_cum_return'], name='Up Cumulative Return'),
    row=1, col=1
)

# Plot Down returns
fig.add_trace(
    go.Scatter(x=table1_1m['date'], y=table1_1m['down_cum_return'], name='Down Cumulative Return'),
    row=2, col=1
)

# Plot UMD returns
fig.add_trace(
    go.Scatter(x=table1_1m['date'], y=table1_1m['umd_cum_return'], name='UMD Cumulative Return'),
    row=3, col=1
)

# Update layout
fig.update_layout(
    title='Cumulative Returns of Up, Down, and UMD Strategies',
    xaxis_title='Date',
    yaxis_title='Cumulative Return',
    showlegend=True
)

# Show the figure
fig.show()



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [35]:
table1_1m['year'] = table1_1m['date'].dt.year

table1_1m.drop('date', axis = 1, inplace = True)

table1_1m

Unnamed: 0,up_1m_forward_return,down_1m_forward_return,umd_1m_average_returns,up_cum_return,down_cum_return,umd_cum_return,year
0,-0.020222,0.028129,-0.048351,-0.020222,0.028129,-0.048351,1926.0
1,-0.014615,-0.009662,-0.004954,-0.034542,0.018195,-0.053065,1926.0
2,0.040842,0.021417,0.019424,0.004889,0.040002,-0.034671,1927.0
3,-0.023260,-0.015182,-0.008078,-0.018485,0.024213,-0.042469,1927.0
4,0.000093,-0.008651,0.008744,-0.018393,0.015353,-0.034097,1927.0
...,...,...,...,...,...,...,...
1158,,,,,,,
1159,,,,,,,
1160,,,,,,,
1161,,,,,,,


In [26]:
# Calculate annual compounded returns
table1_1m_grouped = table1_1m.groupby('year')[['up_1m_forward_return', 'down_1m_forward_return', 'umd_1m_average_returns']].apply(lambda x: (1 + x).prod() - 1)

# Get the 5 smallest returns for the 'up' and 'umd' portfolios
largest_up_losses = table1_1m_grouped['up_1m_forward_return'].nsmallest(5)
largest_umd_losses = table1_1m_grouped['umd_1m_average_returns'].nsmallest(5)

# Extract the years where the largest losses for 'umd' occurred
largest_umd_losses_years = list(largest_umd_losses.index)

# Filter the original grouped DataFrame for these years
up_losses_same_year = table1_1m_grouped.loc[largest_umd_losses_years]

# Display the DataFrame with the largest UMD losses
largest_umd_losses


year
1935.0   -0.443669
1941.0   -0.207530
1938.0   -0.154152
1932.0   -0.123300
2022.0   -0.113299
Name: umd_1m_average_returns, dtype: float64

In [27]:
up_losses_same_year

Unnamed: 0_level_0,up_1m_forward_return,down_1m_forward_return,umd_1m_average_returns
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1935.0,0.128469,0.733584,-0.443669
1941.0,0.048508,0.31378,-0.20753
1938.0,0.151414,0.348697,-0.154152
1932.0,0.055011,0.19615,-0.1233
2022.0,0.227158,0.298563,-0.113299


In [28]:
window_size = 36  # 3 years * 12 months per year


table1['umd_3yr_rolling_return'] = table1['umd_1m_average_returns'].rolling(window=window_size).apply(lambda x: (1 + x).prod() - 1)

negative_returns_count = table1['umd_3yr_rolling_return'][window_size - 1:].lt(0).sum()

total_periods = len(table1) - window_size + 1

percentage_negative_returns = (negative_returns_count / total_periods) * 100

print(f"Percentage of 3-year periods with negative returns: {percentage_negative_returns:.2f}%")

# If you want to see the calculated 3-year rolling returns, uncomment the next line
# print(table1[['year', 'umd_3yr_rolling_return']])



Percentage of 3-year periods with negative returns: 54.96%


### Analysis
- As expected, the largest up minus down losses correspond to comparatively low returns on the "winner" stocks, and comparatively overperforming "loser" stocks. This can be shown in the comparison of the two tables.
- The percentage of 3-year periods appears to have been inverted due to the bug; but generally a sub 50% percentage of 3 year periods with negative returns would provide further evidence of a persistent alpha to be gained through the momentum strategy, while one over the 3-year period would possibly lead to the conclusions that momentum strategies tend to have only a few very good years or possibly contradict the existence of alpha in the momentum strategy itself.
