In [1]:
import yfinance as yf
import numpy as np
import pandas as pd
import datetime as dt
from pandas.tseries.offsets import MonthEnd # This enables us to jump from one month to the other OR subtract a month from a date

In [2]:
df= pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies")[0] # indexing for 0 otherwise a series data would be 
                                                                            # returned

In [3]:
df.head()

Unnamed: 0,Symbol,Security,SEC filings,GICS Sector,GICS Sub-Industry,Headquarters Location,Date first added,CIK,Founded
0,MMM,3M,reports,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1976-08-09,66740,1902
1,AOS,A. O. Smith,reports,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916
2,ABT,Abbott,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800,1888
3,ABBV,AbbVie,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
4,ABMD,Abiomed,reports,Health Care,Health Care Equipment,"Danvers, Massachusetts",2018-05-31,815094,1981


In [None]:
tickers=df.Symbol.to_list()

In [5]:
# Using the tickers list, we now download the price data from yfinance

df2=yf.download(tickers, start="2009-12-31", end="2020-12-31")

[*********************100%***********************]  505 of 505 completed

4 Failed downloads:
- BF.B: No data found for this date range, symbol may be delisted
- CEG: Data doesn't exist for startDate = 1262214000, endDate = 1609369200
- BRK.B: No data found, symbol may be delisted
- OGN: Data doesn't exist for startDate = 1262214000, endDate = 1609369200


In [6]:
# We only need the Adj close price, so lets subset that column

price= df2["Adj Close"]
price.head()

Unnamed: 0_level_0,A,AAL,AAP,AAPL,ABBV,ABC,ABMD,ABT,ACN,ADBE,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-12-30,20.304356,4.591152,38.750172,6.499639,,21.83622,8.9,17.711931,33.033203,37.18,...,39.012676,14.028234,42.948311,32.793987,,18.858957,53.170139,28.389999,10.923774,
2009-12-31,20.284763,4.56287,38.24939,6.471692,,21.637022,8.73,17.533316,32.882648,36.779999,...,38.700314,13.877813,42.58609,32.434322,,18.730421,52.591835,28.35,10.949376,
2010-01-04,20.434929,4.496877,38.154896,6.572423,,22.101793,8.74,17.685949,33.33429,37.09,...,42.508533,13.786251,43.185623,32.581871,,18.794676,53.401482,28.67,11.376083,
2010-01-05,20.212959,5.005958,37.928123,6.583786,,21.944115,8.53,17.54306,33.54031,37.700001,...,45.093861,13.622754,43.354244,32.194534,,18.730421,55.091969,28.620001,11.777194,
2010-01-06,20.141132,4.798555,38.258842,6.479064,,21.73662,8.4,17.640486,33.89687,37.619999,...,44.502357,13.648911,43.72897,32.406654,,18.596512,55.074173,28.4,12.801297,


### Cumulative Monthly Returns

In [72]:
mtl_ret=price.pct_change().resample("M").agg(lambda x: (x+1).prod()-1)
mtl_ret.head()

# The code above is doing the following things:

        # First, it is calculating the daily returns from the daily adjusted price data 
        # Second, it is sort of creating breaks in between the months. Think of it like this: there are many boxes being created
            # and each of the boxes are given the label of datetime, in this case, the labels name are the months name
        # Third, the lambda function is denoting x as whatever result we get from "price.pct_change()"
            # Basically, x = daily returns. The daily returns are then added with 1 and multiplied to each other within 
            # the boxes of months.

Unnamed: 0_level_0,A,AAL,AAP,AAPL,ABBV,ABC,ABMD,ABT,ACN,ADBE,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-12-31,-0.000965,-0.00616,-0.012923,-0.0043,0.0,-0.009122,-0.019101,-0.010084,-0.004558,-0.010759,...,-0.008007,-0.010723,-0.008434,-0.010967,0.0,-0.006816,-0.010876,-0.001409,0.002344,0.0
2010-01-31,-0.097843,0.097107,-0.025445,-0.088597,0.0,0.045646,-0.092783,-0.004403,-0.012289,-0.121805,...,0.062682,-0.020735,-0.05514,-0.046631,0.0,-0.013729,-0.0472,-0.079365,0.478566,0.0
2010-02-28,0.122368,0.380414,0.034221,0.065396,0.0,0.031652,0.276515,0.025312,-0.024884,0.072755,...,0.027311,0.001444,0.015428,-0.013123,0.0,-0.014323,0.017934,0.094636,-0.022125,0.0
2010-03-31,0.093134,0.002728,0.028855,0.14847,0.0,0.031384,0.020772,-0.029477,0.049537,0.020779,...,0.192859,0.030484,0.030462,0.055622,0.0,0.136714,0.032618,0.036052,0.177994,0.0
2010-04-30,0.054376,-0.038095,0.075859,0.111021,0.0,0.066736,-0.065891,-0.011645,0.049417,-0.050042,...,0.163656,0.025943,0.011794,0.050745,0.0,0.114615,0.028885,-0.018581,0.315476,0.0


### Rolling 11 Month Cumulative Returns

In [8]:
past_11= (mtl_ret + 1).rolling(11).apply(np.prod)-1

# Calculating the past 11 month cumulated return for each month. This means we gotta use the rolling returns feauture. 
# Similarly, after that the apply() function is used so that we can cumulate the past 11 months returns on 
# on every single value of the series

# Here, the first 10 entries should have NaN values because for the it is only at the 11th month that our calculation really 
# starts

In [9]:
past_11.head(12)

Unnamed: 0_level_0,A,AAL,AAP,AAPL,ABBV,ABC,ABMD,ABT,ACN,ADBE,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-12-31,,,,,,,,,,,...,,,,,,,,,,
2010-01-31,,,,,,,,,,,...,,,,,,,,,,
2010-02-28,,,,,,,,,,,...,,,,,,,,,,
2010-03-31,,,,,,,,,,,...,,,,,,,,,,
2010-04-30,,,,,,,,,,,...,,,,,,,,,,
2010-05-31,,,,,,,,,,,...,,,,,,,,,,
2010-06-30,,,,,,,,,,,...,,,,,,,,,,
2010-07-31,,,,,,,,,,,...,,,,,,,,,,
2010-08-31,,,,,,,,,,,...,,,,,,,,,,
2010-09-30,,,,,,,,,,,...,,,,,,,,,,


### Defining the portfolio formation date

In [39]:
formation_date=dt.datetime(2010,12,31)
formation_date

# This means on this date, we are buying the winners and shorting the losers on the basis of the results that we would
# get from end_measurement variable. IOW, the decile portfolios are formed on 12-31 on the basis of the cumulative returns 
# until 11-31 and this process applies to every following month after that until the final date in the sample.


#By definition, the formation_date variable results in a datetime format

datetime.datetime(2010, 12, 31, 0, 0)

In [41]:
end_measurement= formation_date-MonthEnd(1)
end_measurement

# This answers the question "When do we want to end the recording of cumulative returns". In our case, we are recording the 
# returns of past 12 months skipping the most recent month. Thats why if the formation date is 2010 Dec 31, the measurement 
# should end at Nov 31. 

Timestamp('2010-11-30 00:00:00')

### Defining the past 12 month cumulative return at 2010-12-30

In [42]:
ret_12=past_11.loc[end_measurement]
ret_12.head()

# This results in the cumulative returns obtained by each stock over the past 12 months skipping the most recent month.

# So, imagine right now, we are sitting at 2010-12-31 and we wanna calculate the past 12 months skipping the most recent month.
# This basically means we want cumulative returns from t1 to the end of t11. 

A       0.127133
AAL     1.305785
AAP     0.636045
AAPL    0.476534
ABBV    0.000000
Name: 2010-11-30 00:00:00, dtype: float64

In [54]:
# Convert this series to a df

ret_12=pd.DataFrame(ret_12)
ret_12.head()

Unnamed: 0_level_0,2010-11-30 00:00:00,Decile
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0.127133,5
AAL,1.305785,9
AAP,0.636045,9
AAPL,0.476534,8
ABBV,0.0,2


### Sorting all these cumulated returns into different deciles sitting at 2010-12-30

In [55]:
ret_12["Decile"]=pd.qcut(ret_12.iloc[:,0], 10, labels=False, duplicates="drop")

In [67]:
# Changing the column names

ret_12.index.names=["Ticker"] #index
ret_12.columns=["12-1 Cumulative Returns","Decile"] # columns

#### The winners i.e. stocks that have the highest cumulative returns over the past 12-1 are located in decile 9 and losers are located in decile 0

In [96]:
# Lets define winners and losers for the date 2010-12-30

winners= ret_12[ret_12.Decile==9]
losers= ret_12[ret_12.Decile==0]

In [97]:
winners.head() # As you can see, this results in a df that has all the stocks that belong to decile 9

Unnamed: 0_level_0,12-1 Cumulative Returns,Decile
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
AAL,1.305785,9
AAP,0.636045,9
AKAM,1.05959,9
ALB,0.502406,9
ALK,0.591436,9


In [81]:
losers.head()

Unnamed: 0_level_0,12-1 Cumulative Returns,Decile
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
ADBE,-0.244154,0
AES,-0.187828,0
AMD,-0.246901,0
BAC,-0.27141,0
BAX,-0.143899,0


#### Next question to answer is, assuming that our holding period is 1 month, what are the subsequent returns for this strategy?

#### IOW, how the winners and losers would have performed from the formation date on forward assuming the holding period for this strategy is 1 month?

In [85]:
winner_ret= mtl_ret.loc[formation_date + MonthEnd(1), mtl_ret.columns.isin(winners.index)]

# "mtl_ret.loc[formation_date + MonthEnd(1)]" : This code is returning the monthly cumulative returns for a strategy that was
# formed at time t and held until t+1month

# "mtl_ret.columns.isin(winners["Ticker"])": This is the 2nd argument of the .loc[] method. It is saying "give me all the ticker
# symbols that, at formation_date + MonthEnd(1) date, can be found in winners df. 

In [84]:
# Do the same for loser return

loser_ret= mtl_ret.loc[formation_date + MonthEnd(1),mtl_ret.columns.isin(losers.index)]

In [86]:
momentum_profit=winner_ret.mean()-loser_ret.mean()
momentum_profit

# If we were to hold a l/S momentum portfolio for 1 month formed in 2010-12-31 by measuring and recording 12-1 momentum, the 
# avg momentum premium for that 1 month period was -4.85%

-0.04085240280896882

Unnamed: 0_level_0,12-1 Cumulative Returns,Decile
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
AAL,1.305785,9
AAP,0.636045,9
AKAM,1.05959,9
ALB,0.502406,9
ALK,0.591436,9


### Now, lets functionalize everything that we have done so far

In [130]:
def momentum (formation_date):
    end_measurement= formation_date - MonthEnd(1) #Month we are forming the portfolio in less 1 month
    ret_12= past_11.loc[end_measurement]    # past 12 months cumulative returns skipping the most recent month for the date 
                                                    # "end_measurement"
    ret_12=pd.DataFrame(ret_12) # convert series to a df
    ret_12["Decile"]=pd.qcut(ret_12.iloc[:,0], 10, labels=False, duplicates="drop") # Creatimng 10 deciles: 0 to 9   
    winners=ret_12[ret_12.Decile==9] #All the stocks that are in decile 9 are winners 
    losers=ret_12[ret_12.Decile==0]# All the stocks in decile 0 are losers
    winners_ret=mtl_ret.loc[formation_date + MonthEnd(1),mtl_ret.columns.isin(winners.index)] 
    losers_ret=mtl_ret.loc[formation_date + MonthEnd(1), mtl_ret.columns.isin(losers.index)]
    momentum_profit=winners_ret.mean()-losers_ret.mean()
    return momentum_profit

In [131]:
momentum(formation_date)

-0.04085240280896882

#### Now, we can use for loops to iterate over numerous formation dates

In [132]:
# This loop is just storing a lot of momentum prfits over a long time horizon 

profits=[] # creating a couple of empty lists
dates=[]

for i in range(120):
    profits.append(momentum(formation_date + MonthEnd(i))) # appending the profits with the function call of momentum and 
                                                        # specify the formation_date + monthend (iterator)
    dates.append(formation_date + MonthEnd(i))

In [133]:
profits

[-0.04085240280896882,
 nan,
 nan,
 -0.017370850260898248,
 0.0012689022911455024,
 0.020650347199796037,
 -0.008579056738416361,
 -0.02755033543094479,
 -0.07158312187463287,
 -0.06833141001041779,
 0.049076535081935546,
 -0.004038105044549305,
 -0.11328122675130387,
 -0.017515923759909624,
 0.02954102287232334,
 0.06769079421157191,
 0.02530045788890601,
 -0.017950807191625215,
 0.012981005880207792,
 -0.010061729852922668,
 -0.011780048029619641,
 -0.0007135609729020673,
 0.0255979161053591,
 -0.004626379323370865,
 -0.012818081356014102,
 -0.005477440534467446,
 0.013076918344502496,
 -0.01806339801454302,
 0.014283465867618007,
 -0.019988797982276575,
 0.053027341587483824,
 0.00529022152051976,
 0.06380550885212086,
 0.031240605012522932,
 0.03272118620481741,
 0.006230407880633927,
 0.02478445218890124,
 0.031321895969691496,
 -0.045236585334694955,
 -0.04620990665128958,
 0.029694406927034758,
 0.021454509230387216,
 -0.01984540936688479,
 0.01389735322120586,
 -0.0142521444656