In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as sm

df = pd.read_stata('Returns/MLB/MLBHomeReturnsnovig.dta')
df['Awaypointsfor'] = 0
df['Awaypointsagainst'] = 0
df['Homepointsfor'] = 0
df['Homepointsagainst'] = 0
df['AwayPayroll'] = 0
df['HomePayroll'] = 0
df['Homelagreturns'] = 0
df['Awaylagreturns'] = 0

dropteams = ['American', 'National', 'AL', 'NL', 'STF', 'ALT', 'STF']

df = df[~df['awayteam'].isin(dropteams)]
df = df[~df['hometeam'].isin(dropteams)]
teams = pd.concat([df['awayteam'],df['hometeam']]).unique()

teamDict = {}
for team in teams:
    teamDict[team] = {'pfor':[], 'pagainst':[], 'lagreturns':0, 'seasonret': 0}

df['year'] = df['date'] //10000

In [2]:
payroll = pd.read_excel('MLB Payroll.xlsx', index_col=0)

In [3]:
year = 2009
value = ['pfor', 'pagainst']
GAMES = 2430
N = 40
PYTHAG = 1.83

In [4]:
for index, row in df.iterrows():
    awayteam = row['awayteam']
    hometeam = row['hometeam']


    curyear = row['year']
    if curyear == 2023:
        break
    if curyear != year:
        year = curyear
        for team in teamDict: 
            teamDict[team]['lagreturns'] = teamDict[team]['seasonret']
            teamDict[team]['seasonret'] = 0

    pfor = 0
    pagainst = 0
    
    for i in range(len(teamDict[awayteam]['pfor'])):
        pfor += teamDict[awayteam]['pfor'][i]
        pagainst += teamDict[awayteam]['pagainst'][i]
    df.loc[[index],['Awaypointsfor']] = (pfor / N)
    df.loc[[index],['Awaypointsagainst']] = (pagainst / N)

    pfor = 0
    pagainst = 0
    for i in range(len(teamDict[hometeam]['pfor'])):
        pfor += teamDict[hometeam]['pfor'][i]
        pagainst += teamDict[hometeam]['pagainst'][i]
    df.loc[[index],['Homepointsfor']] = (pfor / N)
    df.loc[[index],['Homepointsagainst']] = (pagainst / N)

    if len(teamDict[awayteam]['pfor']) > N-1:
        teamDict[awayteam]['pfor'].pop(0)
        teamDict[awayteam]['pagainst'].pop(0)
    if len(teamDict[hometeam]['pfor']) > N-1:
        teamDict[hometeam]['pfor'].pop(0)
        teamDict[hometeam]['pagainst'].pop(0)

    teamDict[awayteam]['pfor'].append(row['awayscore'])
    teamDict[awayteam]['pagainst'].append(row['homescore'])
    teamDict[hometeam]['pfor'].append(row['homescore'])
    teamDict[hometeam]['pagainst'].append(row['awayscore'])

    teamDict[hometeam]['seasonret'] += 1. * df.at[index, 'mlhomeopenreturn']
    
    awaypayroll = payroll.at[awayteam, year]
    homepayroll = payroll.at[hometeam, year]
    df.loc[[index], ['AwayPayroll']] = (awaypayroll - homepayroll) * 10 ** -7
    df.loc[[index], ['HomePayroll']] = (homepayroll - awaypayroll) * 10 ** -7
    df.loc[[index], ['Homelagreturns']] = -1 * teamDict[hometeam]['lagreturns']

In [5]:
df['AwayWinEx'] = (df['Awaypointsfor'] ** PYTHAG / (df['Awaypointsfor'] ** PYTHAG + df['Awaypointsagainst'] ** PYTHAG))
df['HomeWinEx'] = (df['Homepointsfor'] ** PYTHAG / (df['Homepointsfor'] ** PYTHAG + df['Homepointsagainst'] ** PYTHAG))
df['Homepointdiff'] = (df['HomeWinEx'] - df['AwayWinEx']) * df['overopen']

df['Homepointfund'] = np.where(df.Homepointdiff > 0,
                                  np.where(df.homemlopen < 0, abs(df.Homepointdiff / df.homemlopen), df.Homepointdiff / (1/df.homemlopen)),
                                  np.where(df.homemlopen < 0, -1 * df.Homepointdiff / df.homemlopen, abs(df.Homepointdiff / df.homemlopen)))
df['Homepointpay'] = np.where(df['HomePayroll'] >0, 
                              np.where(df.homemlopen < 0, abs(df['HomePayroll'] / df.homemlopen), df['HomePayroll'] / (1/df.homemlopen)),
                              np.where(df.homemlopen < 0, -1 * df['HomePayroll'] / df.homemlopen, abs(df['HomePayroll'] / df.homemlopen)))

#df.to_stata('MLBValueChar.dta')

In [6]:
df['Homepointfund'] = (df['Homepointfund'] - df['Homepointfund'].mean()) / df['Homepointfund'].std()
df['Homepointpay'] = (df['Homepointpay'] - df['Homepointpay'].mean()) / df['Homepointpay'].std()
df['Homelagreturns'] = (df['Homelagreturns'] - df['Homelagreturns'].mean()) / df['Homelagreturns'].std()

In [7]:
df['HomeChar'] = df['Homepointfund'] + df['Homepointpay'] + df['Homelagreturns']

In [8]:
chardf = df.copy(deep=True)
chardf.drop(chardf[(chardf.year == 2009)].index, inplace= True)

In [9]:
chardf['pointValue'] = df['HomeChar']
chardf['games'] = chardf.groupby('date')['date'].transform('size')
chardf['ranks'] = chardf.groupby('date')['pointValue'].rank()

chardf['thresh'] = chardf.ranks / chardf.games
chardf['buy'] = chardf['thresh'].ge(.8)
chardf['sell'] = chardf['thresh'].le(.2)
#chardf.to_stata('MLBSpreadChar.dta')

In [10]:
chardf['overMom'] = chardf['HomeChar']
chardf['overRanks'] = chardf.groupby('date')['overMom'].rank()

chardf['overthresh'] = chardf.overRanks / chardf.games
chardf['overBuy'] = chardf['overthresh'].ge(.8)
chardf['overSell'] = chardf['overthresh'].le(.2)

In [11]:
chardf['weight'] = chardf['buy'] / chardf['games'] - chardf['sell'] / chardf['games']
chardf['smallocreturn'] = chardf['weight'] * chardf['mlhomeOCreturn']
chardf['smallclosereturn'] = chardf['weight'] * chardf['mlhomeclosereturn']

#chardf.to_stata('MLBmlChar.dta')

In [12]:
weights  = chardf[chardf.weight != 0]

MLBmlvalret = pd.DataFrame(weights.groupby('date')['smallocreturn'].sum())
MLBmlvalret.rename(columns = {'smallocreturn':'mlocreturn'}, inplace=True)
MLBmlvalret['mlclosereturns'] = weights.groupby('date')['smallclosereturn'].sum()
MLBmlvalret['mlopenreturn'] = MLBmlvalret['mlocreturn'] + MLBmlvalret['mlclosereturns']
MLBmlvalret['mltrading'] = MLBmlvalret['mlocreturn'] - MLBmlvalret['mlclosereturns']

means = MLBmlvalret.mean()*GAMES * 2/5
std = MLBmlvalret.std() * np.sqrt(GAMES) * np.sqrt(2/5)
print(means)
print(means / std)

returns = ['mlocreturn', 'mlclosereturns', 'mlopenreturn', 'mltrading']
tstats = {}
for x in returns:
    tstats[x] = sm.ols(formula="{}~1".format(x), data = MLBmlvalret).fit().tvalues['Intercept']
print(tstats)

mlocreturn        1.577463
mlclosereturns    1.477107
mlopenreturn      3.054570
mltrading         0.100356
dtype: float64
mlocreturn        1.874429
mlclosereturns    0.150373
mlopenreturn      0.309114
mltrading         0.010203
dtype: float64
{'mlocreturn': 3.0514681318224377, 'mlclosereturns': 0.2447993657990645, 'mlopenreturn': 0.5032211894957612, 'mltrading': 0.016609627485912545}
