In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as sm

df = pd.read_stata('Returns/NFL/NFLFavReturnsnovig.dta.')
df['AwayWins'] = 0
df['AwaySpread'] = 0
df['AwayReturns'] = 0
df['HomeWins'] = 0
df['HomeSpread'] = 0
df['HomeReturns'] = 0

teams = pd.concat([df['awayteam'],df['hometeam']]).unique()
teamDict = {}
for team in teams:
    teamDict[team] = {'wins':[], 'spread':[], 'returns':[], 'games':0}

df['year'] = df['date'] //10000

#print(df['awayspreadopenodds'])

In [2]:
year = 2009
momentum = ['wins', 'spread', 'returns']
N = 3
GAMES = 256

In [3]:
for index, row in df.iterrows():
    awayteam = row['awayteam']
    hometeam = row['hometeam']

    curyear = row['year']
    if curyear != year:
        year = curyear
        for team in teamDict: 
            for x in momentum:
                for i in range(len(teamDict[team][x])):
                    teamDict[team][x].pop(0)
            teamDict[team]['games'] = 0

    wins = 0
    spread = 0
    returns = 0
    for i in range(len(teamDict[awayteam]['wins'])):
        wins += teamDict[awayteam]['wins'][i]
        spread += teamDict[awayteam]['spread'][i]
        returns += teamDict[awayteam]['returns'][i]
    df.loc[[index],['AwayWins']] = (wins / N)
    df.loc[[index],['AwaySpread']] = spread
    df.loc[[index],['AwayReturns']] = (returns / N)
    df.loc[[index],['AwayGames']] = teamDict[awayteam]['games']

    wins = 0
    spread = 0
    returns = 0
    for i in range(len(teamDict[hometeam]['wins'])):
        wins += teamDict[hometeam]['wins'][i]
        spread += teamDict[hometeam]['spread'][i]
        returns += teamDict[hometeam]['returns'][i]
    df.loc[[index],['HomeWins']] = (wins / N)
    df.loc[[index],['HomeSpread']] = spread
    df.loc[[index],['HomeReturns']] = (returns / N)
    df.loc[[index],['HomeGames']] = teamDict[hometeam]['games']

    if len(teamDict[awayteam]['wins']) > N-1:
        teamDict[awayteam]['wins'].pop(0)
        teamDict[awayteam]['spread'].pop(0)
        teamDict[awayteam]['returns'].pop(0)
    if len(teamDict[hometeam]['wins']) > N-1:
        teamDict[hometeam]['wins'].pop(0)
        teamDict[hometeam]['spread'].pop(0)
        teamDict[hometeam]['returns'].pop(0)

    if row['awayscore'] > row['homescore']:
        teamDict[awayteam]['wins'].append(1)
        teamDict[hometeam]['wins'].append(0)
        
    else:
        teamDict[hometeam]['wins'].append(1)
        teamDict[awayteam]['wins'].append(0)


    teamDict[awayteam]['spread'].append(row['awayscore'] - row['homescore'])
    teamDict[hometeam]['spread'].append(row['homescore'] - row['awayscore'])

    if row['homescore'] - row['awayscore'] > -1 * row['homespreadopen']:
        teamDict[hometeam]['returns'].append(1)
        teamDict[awayteam]['returns'].append(0)
    elif row['homescore'] - row['awayscore'] == -1 * row['homespreadopen']: 
        teamDict[hometeam]['returns'].append(.5)
        teamDict[awayteam]['returns'].append(.5)
    else:
        teamDict[awayteam]['returns'].append(1)
        teamDict[hometeam]['returns'].append(0)

    teamDict[awayteam]['games'] += 1
    teamDict[hometeam]['games'] += 1

In [4]:
df['AwayWins'] = (df['AwayWins'] - df['AwayWins'].mean()) / df['AwayWins'].std()
df['AwaySpread'] = (df['AwaySpread'] - df['AwaySpread'].mean()) / df['AwaySpread'].std()
df['AwayReturns'] = (df['AwayReturns'] - df['AwayReturns'].mean()) / df['AwayReturns'].std()
df['HomeWins'] = (df['HomeWins'] - df['HomeWins'].mean()) / df['HomeWins'].std()
df['HomeSpread'] = (df['HomeSpread'] - df['HomeSpread'].mean()) / df['HomeSpread'].std()
df['HomeReturns'] = (df['HomeReturns'] - df['HomeReturns'].mean()) / df['HomeReturns'].std()

In [5]:
df['AwayChar'] = df['AwayWins'] + df['AwaySpread'] + df['AwayReturns']
df['HomeChar'] = df['HomeWins'] + df['HomeSpread'] + df['HomeReturns']

In [6]:

chardf = df.copy(deep=True)
chardf[(chardf.HomeGames < N) | (chardf.AwayGames < N)].index
chardf.drop(chardf[(chardf.HomeGames < N) | (chardf.AwayGames < N)].index, inplace= True)
#print(chardf)

In [7]:
chardf['Momentum'] = np.where(chardf['homespreadopen'] < 0, (chardf['HomeChar'] - chardf['AwayChar']), (chardf['AwayChar'] - chardf['HomeChar']))
chardf['games'] = chardf.groupby('date')['date'].transform('size')
chardf['ranks'] = chardf.groupby('date')['Momentum'].rank()

chardf['thresh'] = chardf.ranks / chardf.games
chardf['buy'] = chardf['thresh'].ge(.8)
chardf['sell'] = chardf['thresh'].le(.2)
#chardf.to_stata('MLBSpreadChar.dta')

In [8]:
chardf['overMom'] = chardf['HomeChar'] + chardf['AwayChar']
chardf['overRanks'] = chardf.groupby('date')['overMom'].rank()

chardf['overthresh'] = chardf.overRanks / chardf.games
chardf['overBuy'] = chardf['overthresh'].ge(.8)
chardf['overSell'] = chardf['overthresh'].le(.2)

In [9]:

chardf['weight'] = chardf['buy'] / chardf['games'] - chardf['sell'] / chardf['games']
chardf['smallocreturn'] = chardf['weight'] * chardf['spreadocreturn']
chardf['smallclosereturn'] = chardf['weight'] * chardf['spreadclosereturn']

chardf['smallmlocreturn'] = chardf['weight'] * chardf['mlocreturn']
chardf['smallmlclosereturn'] = chardf['weight'] * chardf['mlclosereturn']

chardf['overWeight'] = chardf['overBuy'] / chardf['games'] - chardf['overSell'] / chardf['games']
chardf['smalloverocreturn'] = chardf['overWeight'] * chardf['overocreturn']
chardf['smalloverclosereturn'] = chardf['overWeight'] * chardf['overclosereturn']

#chardf.to_stata('NFLSpreadChar.dta')

In [24]:
weights  = chardf[chardf.weight != 0]

NFLmomspreadret = pd.DataFrame(weights.groupby('date')['smallocreturn'].sum())
NFLmomspreadret.rename(columns = {'smallocreturn':'spreadocreturn'}, inplace=True)
NFLmomspreadret['spreadclosereturns'] = weights.groupby('date')['smallclosereturn'].sum()
NFLmomspreadret['spreadopenreturn'] = NFLmomspreadret['spreadocreturn'] + NFLmomspreadret['spreadclosereturns']
NFLmomspreadret['spreadtrading'] = NFLmomspreadret['spreadocreturn'] - NFLmomspreadret['spreadclosereturns']

means = NFLmomspreadret.mean()*GAMES * 2/5
std = NFLmomspreadret.std() * np.sqrt(GAMES) * np.sqrt(2/5)
print(means)
print(means / std)

returns = ['spreadocreturn', 'spreadclosereturns', 'spreadopenreturn', 'spreadtrading']
tstats = {}
for x in returns:
    tstats[x] = sm.ols(formula="{}~1".format(x), data = NFLmomspreadret).fit().tvalues['Intercept']
print(tstats)

NFLmommlret = pd.DataFrame(weights.groupby('date')['smallmlocreturn'].sum())
NFLmommlret.rename(columns = {'smallmlocreturn':'mlocreturn'}, inplace=True)
NFLmommlret['mlclosereturns'] = weights.groupby('date')['smallmlclosereturn'].sum()
NFLmommlret['mlopenreturn'] = NFLmommlret['mlocreturn'] + NFLmommlret['mlclosereturns']
NFLmommlret['mltrading'] = NFLmommlret['mlocreturn'] - NFLmommlret['mlclosereturns']
means = NFLmommlret.mean()*GAMES * 2/5
std = NFLmommlret.std() * np.sqrt(GAMES) * np.sqrt(2/5)
print(means)
print(means / std)

returns = ['mlocreturn', 'mlclosereturns', 'mlopenreturn', 'mltrading']
tstats = {}
for x in returns:
    tstats[x] = sm.ols(formula="{}~1".format(x), data = NFLmommlret).fit().tvalues['Intercept']
print(tstats)


spreadocreturn       -0.635111
spreadclosereturns   -2.864507
spreadopenreturn     -3.499618
spreadtrading         2.229397
dtype: float64
spreadocreturn       -0.222208
spreadclosereturns   -0.363030
spreadopenreturn     -0.445648
spreadtrading         0.250522
dtype: float64
{'spreadocreturn': -0.5658396639139647, 'spreadclosereturns': -0.9244358515673161, 'spreadopenreturn': -1.1348178904223074, 'spreadtrading': 0.6379399749073865}
mlocreturn        4.404215
mlclosereturns   -3.049725
mlopenreturn      1.354490
mltrading         7.453940
dtype: float64
mlocreturn        1.664570
mlclosereturns   -0.525906
mlopenreturn      0.231054
mltrading         1.088518
dtype: float64
{'mlocreturn': 4.238733982033246, 'mlclosereturns': -1.3391905653562703, 'mlopenreturn': 0.5883671035484977, 'mltrading': 2.7718495438676656}


In [25]:
overweights  = chardf[chardf.overWeight != 0]
NFLmomoverret = pd.DataFrame(overweights.groupby('date')['smalloverocreturn'].sum())
NFLmomoverret.rename(columns = {'smalloverocreturn':'overocreturn'}, inplace=True)
NFLmomoverret['overclosereturns'] = overweights.groupby('date')['smalloverclosereturn'].sum() 
NFLmomoverret['overopenreturn'] = NFLmomoverret['overocreturn'] + NFLmomoverret['overclosereturns']
NFLmomoverret['overtrading'] = NFLmomoverret['overocreturn'] - NFLmomoverret['overclosereturns']

means = NFLmomoverret.mean()*GAMES * 2/5
std = NFLmomoverret.std() * np.sqrt(GAMES) * np.sqrt(2/5)
print(means)
print(means / std)

returns = ['overocreturn', 'overclosereturns', 'overopenreturn', 'overtrading']
tstats = {}
for x in returns:
    tstats[x] = sm.ols(formula="{}~1".format(x), data = NFLmomoverret).fit().tvalues['Intercept']
print(tstats)

overocreturn       -1.719013
overclosereturns   -5.173183
overopenreturn     -6.892196
overtrading         3.454169
dtype: float64
overocreturn       -0.669838
overclosereturns   -0.651860
overopenreturn     -0.879030
overtrading         0.391971
dtype: float64
{'overocreturn': -1.705704651453798, 'overclosereturns': -1.6599247004331277, 'overopenreturn': -2.238400066139496, 'overtrading': 0.998131657453621}
