In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as sm

df = pd.read_stata('Returns/MLB/MLBFavReturnsnovig.dta.')
df['AwayWins'] = 0
df['AwaySpread'] = 0
df['AwayReturns'] = 0
df['HomeWins'] = 0
df['HomeSpread'] = 0
df['HomeReturns'] = 0


teams = pd.concat([df['awayteam'],df['hometeam']]).unique()
teamDict = {}
for team in teams:
    teamDict[team] = {'wins':[], 'spread':[], 'returns':[], 'games':0}

df['year'] = df['date'] //10000
#print(df['awayspreadopenodds'])

In [2]:
year = 2009
momentum = ['wins', 'spread', 'returns']
N = 9
GAMES = 2430

In [3]:
for index, row in df.iterrows():
    awayteam = row['awayteam']
    hometeam = row['hometeam']

    curyear = row['year']
    if curyear != year:
        year = curyear
        for team in teamDict: 
            for x in momentum:
                for i in range(len(teamDict[team][x])):
                    teamDict[team][x].pop(0)
            teamDict[team]['games'] = 0

    wins = 0
    spread = 0
    returns = 0
    for i in range(len(teamDict[awayteam]['wins'])):
        wins += teamDict[awayteam]['wins'][i]
        spread += teamDict[awayteam]['spread'][i]
        returns += teamDict[awayteam]['returns'][i]
    df.loc[[index],['AwayWins']] = (wins / N)
    df.loc[[index],['AwaySpread']] = spread
    df.loc[[index],['AwayReturns']] = (returns / N)
    df.loc[[index],['AwayGames']] = teamDict[awayteam]['games']

    wins = 0
    spread = 0
    returns = 0
    for i in range(len(teamDict[hometeam]['wins'])):
        wins += teamDict[hometeam]['wins'][i]
        spread += teamDict[hometeam]['spread'][i]
        returns += teamDict[hometeam]['returns'][i]
    df.loc[[index],['HomeWins']] = (wins / N)
    df.loc[[index],['HomeSpread']] = spread
    df.loc[[index],['HomeReturns']] = (returns / N)
    df.loc[[index],['HomeGames']] = teamDict[hometeam]['games']

    if len(teamDict[awayteam]['wins']) > N-1:
        teamDict[awayteam]['wins'].pop(0)
        teamDict[awayteam]['spread'].pop(0)
        teamDict[awayteam]['returns'].pop(0)
    if len(teamDict[hometeam]['wins']) > N-1:
        teamDict[hometeam]['wins'].pop(0)
        teamDict[hometeam]['spread'].pop(0)
        teamDict[hometeam]['returns'].pop(0)


    if row['awayscore'] > row['homescore']:
        teamDict[awayteam]['wins'].append(1)
        teamDict[hometeam]['wins'].append(0)
        
    else:
        teamDict[hometeam]['wins'].append(1)
        teamDict[awayteam]['wins'].append(0)


    teamDict[awayteam]['spread'].append(row['awayscore'] - row['homescore'])
    teamDict[hometeam]['spread'].append(row['homescore'] - row['awayscore'])

    if row['homescore'] - row['awayscore'] > 1.5 and row['homemlopen'] <= 100:
        teamDict[hometeam]['returns'].append(1)
        teamDict[awayteam]['returns'].append(0)
    elif row['homescore'] - row['awayscore'] < 1.5 and row['homemlopen'] < 100: 
        teamDict[awayteam]['returns'].append(1)
        teamDict[hometeam]['returns'].append(0)
    elif row['awayscore'] - row['homescore'] > 1.5 and row['awaymlopen'] < 100:
        teamDict[awayteam]['returns'].append(1)
        teamDict[hometeam]['returns'].append(0)
    else:
        teamDict[hometeam]['returns'].append(1)
        teamDict[awayteam]['returns'].append(0)

    teamDict[awayteam]['games'] += 1
    teamDict[hometeam]['games'] += 1

In [4]:
df['AwayWins'] = (df['AwayWins'] - df['AwayWins'].mean()) / df['AwayWins'].std()
df['AwaySpread'] = (df['AwaySpread'] - df['AwaySpread'].mean()) / df['AwaySpread'].std()
df['AwayReturns'] = (df['AwayReturns'] - df['AwayReturns'].mean()) / df['AwayReturns'].std()
df['HomeWins'] = (df['HomeWins'] - df['HomeWins'].mean()) / df['HomeWins'].std()
df['HomeSpread'] = (df['HomeSpread'] - df['HomeSpread'].mean()) / df['HomeSpread'].std()
df['HomeReturns'] = (df['HomeReturns'] - df['HomeReturns'].mean()) / df['HomeReturns'].std()

In [5]:
df['AwayChar'] = df['AwayWins'] + df['AwaySpread'] + df['AwayReturns']
df['HomeChar'] = df['HomeWins'] + df['HomeSpread'] + df['HomeReturns']

In [6]:
chardf = df.copy(deep=True)
chardf[(chardf.HomeGames < N) | (chardf.AwayGames < N)].index
chardf.drop(chardf[(chardf.HomeGames < N) | (chardf.AwayGames < N)].index, inplace= True)

In [7]:
chardf['Momentum'] = np.where(chardf['homemlopen'] < 0, (chardf['HomeChar'] - chardf['AwayChar']), (chardf['AwayChar'] - chardf['HomeChar']))
chardf['games'] = chardf.groupby('date')['date'].transform('size')
chardf['ranks'] = chardf.groupby('date')['Momentum'].rank()

chardf['thresh'] = chardf.ranks / chardf.games
chardf['buy'] = chardf['thresh'].ge(.8)
chardf['sell'] = chardf['thresh'].le(.2)
#chardf.to_stata('MLBSpreadChar.dta')

In [8]:
chardf['overMom'] = chardf['HomeChar'] + chardf['AwayChar']
chardf['overRanks'] = chardf.groupby('date')['overMom'].rank()

chardf['overthresh'] = chardf.overRanks / chardf.games
chardf['overBuy'] = chardf['overthresh'].ge(.8)
chardf['overSell'] = chardf['overthresh'].le(.2)

In [9]:
chardf['weight'] = chardf['buy'] / chardf['games'] - chardf['sell'] / chardf['games']

chardf['smallmlocreturn'] = chardf['weight'] * chardf['mlocreturn']
chardf['smallmlclosereturn'] = chardf['weight'] * chardf['mlclosereturn']

chardf['overWeight'] = chardf['overBuy'] / chardf['games'] - chardf['overSell'] / chardf['games']
chardf['smalloverocreturn'] = chardf['overWeight'] * chardf['overocreturn']
chardf['smalloverclosereturn'] = chardf['overWeight'] * chardf['overclosereturn']

chardf.to_stata('MLBSpreadChar.dta')

In [10]:
weights  = chardf[chardf.weight != 0]

MLBmommlret = pd.DataFrame(weights.groupby('date')['smallmlocreturn'].sum())
MLBmommlret.rename(columns = {'smallmlocreturn':'mlocreturn'}, inplace=True)
MLBmommlret['mlclosereturns'] = weights.groupby('date')['smallmlclosereturn'].sum()
MLBmommlret['mlopenreturn'] = MLBmommlret['mlocreturn'] + MLBmommlret['mlclosereturns']
MLBmommlret['mltrading'] = MLBmommlret['mlocreturn'] - MLBmommlret['mlclosereturns']
means = MLBmommlret.mean() * GAMES * 2/5
std = MLBmommlret.std() * np.sqrt(GAMES) * np.sqrt(2/5)
print(means)
print(means / std)

returns = ['mlocreturn', 'mlclosereturns', 'mlopenreturn', 'mltrading']
tstats = {}
for x in returns:
    tstats[x] = sm.ols(formula="{}~1".format(x), data = MLBmommlret).fit().tvalues['Intercept']
print(tstats)

#MLBmommlret.to_stata('MLBmlTrading.dta')

mlocreturn         6.748021
mlclosereturns    -9.153071
mlopenreturn      -2.405050
mltrading         15.901093
dtype: float64
mlocreturn        1.719952
mlclosereturns   -1.010475
mlopenreturn     -0.263972
mltrading         1.503339
dtype: float64
{'mlocreturn': 2.8259539794599293, 'mlclosereturns': -1.6602536245373682, 'mlopenreturn': -0.4337173160805376, 'mltrading': 2.470049268441507}


In [11]:
overweights  = chardf[chardf.overWeight != 0]
MLBmomoverret = pd.DataFrame(overweights.groupby('date')['smalloverocreturn'].sum())
MLBmomoverret.rename(columns = {'smalloverocreturn':'overocreturn'}, inplace=True)
#NBAmomoverret['overocreturn'] = chardf.groupby('date')['smalloverocreturn'].sum()
MLBmomoverret['overclosereturns'] = overweights.groupby('date')['smalloverclosereturn'].sum() 
MLBmomoverret['overopenreturn'] = MLBmomoverret['overocreturn'] + MLBmomoverret['overclosereturns']
MLBmomoverret['overtrading'] = MLBmomoverret['overocreturn'] - MLBmomoverret['overclosereturns']

means = MLBmomoverret.mean()*GAMES * 2/5
std = MLBmomoverret.std() * np.sqrt(GAMES) * np.sqrt(2/5)
print(means)
print(means / std)

returns = ['overocreturn', 'overclosereturns', 'overopenreturn', 'overtrading']
tstats = {}
for x in returns:
    tstats[x] = sm.ols(formula="{}~1".format(x), data = MLBmomoverret).fit().tvalues['Intercept']
print(tstats)

overocreturn         0.869487
overclosereturns   -10.071318
overopenreturn      -9.201831
overtrading         10.940804
dtype: float64
overocreturn        0.324553
overclosereturns   -1.000018
overopenreturn     -0.914177
overtrading         1.016303
dtype: float64
{'overocreturn': 0.533254140799461, 'overclosereturns': -1.6430717106938175, 'overopenreturn': -1.502031208112361, 'overtrading': 1.6698285833179372}
