In [1]:
#Load required packages
import pandas as pd
import numpy as np 
import researchpy as rp
import scipy.stats as stats
from scipy.stats import levene

### AFL

Games are from the 2009 season til the 2020 season and the data is sourced from http://www.aussportsbetting.com/data/historical-afl-results-and-odds-data/

In [2]:
#Read in data and neccessary columns
data_afl = pd.read_excel("afl.xlsx")
data_afl.columns = data_afl.iloc[0]
data_afl = data_afl.drop(0)
data_afl = data_afl[['Home Team','Away Team','Home Score','Away Score','Home Odds','Away Odds']]

In [3]:
#Create new variables for our analysis
data_afl = data_afl[(data_afl['Home Score'] - data_afl['Away Score']) != 0] #filter out draws/no results
data_afl['Winner'] = np.where(data_afl['Home Score'] > data_afl['Away Score'],data_afl['Home Team'],data_afl['Away Team'])
data_afl['Underdog'] = np.where(data_afl['Home Odds'] < data_afl['Away Odds'],data_afl['Away Team'],data_afl['Home Team'])
data_afl['Bookmakers Margin'] = (1/data_afl['Home Odds'])+(1/data_afl['Away Odds']) - 1
data_afl['Home Implied Odds'] = 1/data_afl['Home Odds']
data_afl['Away Implied Odds'] = 1/data_afl['Away Odds']
data_afl['Home Fair Odds'] = data_afl['Home Implied Odds']/(data_afl['Bookmakers Margin']+1)
data_afl['Away Fair Odds'] = data_afl['Away Implied Odds']/(data_afl['Bookmakers Margin']+1)
data_afl['Home Expected Return'] = data_afl['Home Odds']*data_afl['Home Fair Odds'] - 1
data_afl['Away Expected Return'] = data_afl['Away Odds']*data_afl['Away Fair Odds'] - 1
data_afl['Home Actual Return'] = np.where(data_afl['Winner'] == data_afl['Home Team'], data_afl['Home Odds'] - 1, -1)
data_afl['Away Actual Return'] = np.where(data_afl['Winner'] == data_afl['Away Team'], data_afl['Away Odds'] - 1, -1)
data_afl['Underdog Actual Return'] = np.where(data_afl['Underdog'] == data_afl['Away Team'], data_afl['Away Actual Return'], data_afl['Home Actual Return'])
data_afl['Favourite Actual Return'] = np.where(data_afl['Underdog'] == data_afl['Home Team'], data_afl['Away Actual Return'], data_afl['Home Actual Return'])

In [4]:
#inspect dataframe
data_afl.head()

Unnamed: 0,Home Team,Away Team,Home Score,Away Score,Home Odds,Away Odds,Winner,Underdog,Bookmakers Margin,Home Implied Odds,Away Implied Odds,Home Fair Odds,Away Fair Odds,Home Expected Return,Away Expected Return,Home Actual Return,Away Actual Return,Underdog Actual Return,Favourite Actual Return
1,Richmond,Geelong,81,50,1.83,1.97,Richmond,Geelong,0.0540623,0.546448,0.507614,0.518421,0.481579,-0.0512895,-0.0512895,0.83,-1.0,-1.0,0.83
2,Brisbane,Geelong,42,82,1.79,2.01,Geelong,Geelong,0.0561717,0.558659,0.497512,0.528947,0.471053,-0.0531842,-0.0531842,-1.0,1.01,1.01,-1.0
3,Port Adelaide,Richmond,40,46,1.73,2.09,Richmond,Richmond,0.0565036,0.578035,0.478469,0.54712,0.45288,-0.0534817,-0.0534817,-1.0,1.09,1.09,-1.0
4,Geelong,Collingwood,100,32,1.6,2.33,Geelong,Collingwood,0.0541845,0.625,0.429185,0.592875,0.407125,-0.0513995,-0.0513995,0.6,-1.0,-1.0,0.6
5,Richmond,St Kilda,80,49,1.26,3.88,Richmond,St Kilda,0.0513828,0.793651,0.257732,0.754864,0.245136,-0.0488716,-0.0488716,0.26,-1.0,-1.0,0.26


### NRL

Games are from the 2009 season til the 2020 season and the data is sourced from http://www.aussportsbetting.com/data/historical-nrl-results-and-odds-data/

In [5]:
#Read in data and neccessary columns
data_nrl = pd.read_excel("nrl.xlsx")
data_nrl.columns = data_nrl.iloc[0]
data_nrl = data_nrl.drop(0)
print(data_nrl.columns)
data_nrl = data_nrl[['Home Team','Away Team','Home Score','Away Score','Home Odds','Away Odds']]
data_nrl = data_nrl.dropna()

Index(['Date', 'Kick-off (local)', 'Home Team', 'Away Team', 'Home Score',
       'Away Score', 'Play Off Game?', 'Over Time?', 'Home Odds', 'Draw Odds',
       'Away Odds', 'Bookmakers Surveyed', 'Home Odds Open', 'Home Odds Min',
       'Home Odds Max', 'Home Odds Close', 'Away Odds Open', 'Away Odds Min',
       'Away Odds Max', 'Away Odds Close', 'Home Line Open', 'Home Line Min',
       'Home Line Max', 'Home Line Close', 'Away Line Open', 'Away Line Min',
       'Away Line Max', 'Away Line Close', 'Home Line Odds Open',
       'Home Line Odds Min', 'Home Line Odds Max', 'Home Line Odds Close',
       'Away Line Odds Open', 'Away Line Odds Min', 'Away Line Odds Max',
       'Away Line Odds Close', 'Total Score Open', 'Total Score Min',
       'Total Score Max', 'Total Score Close', 'Total Score Over Open',
       'Total Score Over Min', 'Total Score Over Max',
       'Total Score Over Close', 'Total Score Under Open',
       'Total Score Under Min', 'Total Score Under Max',
      

In [6]:
#Create new variables for our analysis
data_nrl = data_nrl[(data_nrl['Home Score'] - data_nrl['Away Score']) != 0] #filter out draws/no results
data_nrl['Winner'] = np.where(data_nrl['Home Score'] > data_nrl['Away Score'],data_nrl['Home Team'],data_nrl['Away Team'])
data_nrl['Underdog'] = np.where(data_nrl['Home Odds'] < data_nrl['Away Odds'],data_nrl['Away Team'],data_nrl['Home Team'])
data_nrl['Bookmakers Margin'] = (1/data_nrl['Home Odds'])+(1/data_nrl['Away Odds']) - 1
data_nrl['Home Implied Odds'] = 1/data_nrl['Home Odds']
data_nrl['Away Implied Odds'] = 1/data_nrl['Away Odds']
data_nrl['Home Fair Odds'] = data_nrl['Home Implied Odds']/(data_nrl['Bookmakers Margin']+1)
data_nrl['Away Fair Odds'] = data_nrl['Away Implied Odds']/(data_nrl['Bookmakers Margin']+1)
data_nrl['Home Expected Return'] = data_nrl['Home Odds']*data_nrl['Home Fair Odds'] - 1
data_nrl['Away Expected Return'] = data_nrl['Away Odds']*data_nrl['Away Fair Odds'] - 1
data_nrl['Home Actual Return'] = np.where(data_nrl['Winner'] == data_nrl['Home Team'], data_nrl['Home Odds'] - 1, -1)
data_nrl['Away Actual Return'] = np.where(data_nrl['Winner'] == data_nrl['Away Team'], data_nrl['Away Odds'] - 1, -1)
data_nrl['Underdog Actual Return'] = np.where(data_nrl['Underdog'] == data_nrl['Away Team'], data_nrl['Away Actual Return'], data_nrl['Home Actual Return'])
data_nrl['Favourite Actual Return'] = np.where(data_nrl['Underdog'] == data_nrl['Home Team'], data_nrl['Away Actual Return'], data_nrl['Home Actual Return'])

In [7]:
#inspect dataframe
data_nrl.head()

Unnamed: 0,Home Team,Away Team,Home Score,Away Score,Home Odds,Away Odds,Winner,Underdog,Bookmakers Margin,Home Implied Odds,Away Implied Odds,Home Fair Odds,Away Fair Odds,Home Expected Return,Away Expected Return,Home Actual Return,Away Actual Return,Underdog Actual Return,Favourite Actual Return
1,Penrith Panthers,Melbourne Storm,20,26,2.24,1.7,Melbourne Storm,Penrith Panthers,0.0346639,0.446429,0.588235,0.431472,0.568528,-0.0335025,-0.0335025,-1.0,0.7,-1.0,0.7
2,Penrith Panthers,South Sydney Rabbitohs,20,16,1.38,3.18,Penrith Panthers,South Sydney Rabbitohs,0.0391031,0.724638,0.314465,0.697368,0.302632,-0.0376316,-0.0376316,0.38,-1.0,-1.0,0.38
3,Melbourne Storm,Canberra Raiders,30,10,1.45,2.86,Melbourne Storm,Canberra Raiders,0.0393055,0.689655,0.34965,0.663573,0.336427,-0.037819,-0.037819,0.45,-1.0,-1.0,0.45
4,Parramatta Eels,South Sydney Rabbitohs,24,38,3.57,1.3,South Sydney Rabbitohs,Parramatta Eels,0.0493428,0.280112,0.769231,0.26694,0.73306,-0.0470226,-0.0470226,-1.0,0.3,-1.0,0.3
5,Sydney Roosters,Canberra Raiders,18,22,1.43,2.91,Canberra Raiders,Canberra Raiders,0.0429433,0.699301,0.343643,0.670507,0.329493,-0.0411751,-0.0411751,-1.0,1.91,1.91,-1.0


### NFL

Games are from the 2006 season til the 2020 season and the data is sourced from http://www.aussportsbetting.com/data/historical-nfl-results-and-odds-data/

In [8]:
#Read in data and neccessary columns
data_nfl = pd.read_excel("nfl.xlsx")
print(data_nfl.columns)
data_nfl = data_nfl[['Home Team','Away Team','Home Score','Away Score','Home Odds Close','Away Odds Close']]
data_nfl = data_nfl.dropna()

Index(['Date', 'Home Team', 'Away Team', 'Home Score', 'Away Score',
       'Overtime?', 'Playoff Game?', 'Neutral Venue?', 'Home Odds Open',
       'Home Odds Min', 'Home Odds Max', 'Home Odds Close', 'Away Odds Open',
       'Away Odds Min', 'Away Odds Max', 'Away Odds Close', 'Home Line Open',
       'Home Line Min', 'Home Line Max', 'Home Line Close', 'Away Line Open',
       'Away Line Min', 'Away Line Max', 'Away Line Close',
       'Home Line Odds Open', 'Home Line Odds Min', 'Home Line Odds Max',
       'Home Line Odds Close', 'Away Line Odds Open', 'Away Line Odds Min',
       'Away Line Odds Max', 'Away Line Odds Close', 'Total Score Open',
       'Total Score Min', 'Total Score Max', 'Total Score Close',
       'Total Score Over Open', 'Total Score Over Min', 'Total Score Over Max',
       'Total Score Over Close', 'Total Score Under Open',
       'Total Score Under Min', 'Total Score Under Max',
       'Total Score Under Close', 'Notes'],
      dtype='object')


In [9]:
#Create new variables for our analysis
data_nfl = data_nfl[(data_nfl['Home Score'] - data_nfl['Away Score']) != 0]
data_nfl['Winner'] = np.where(data_nfl['Home Score'] > data_nfl['Away Score'],data_nfl['Home Team'],data_nfl['Away Team'])
data_nfl['Underdog'] = np.where(data_nfl['Home Odds Close'] < data_nfl['Away Odds Close'],data_nfl['Away Team'],data_nfl['Home Team'])
data_nfl['Bookmakers Margin'] = (1/data_nfl['Home Odds Close'])+(1/data_nfl['Away Odds Close']) - 1
data_nfl['Home Implied Odds'] = 1/data_nfl['Home Odds Close']
data_nfl['Away Implied Odds'] = 1/data_nfl['Away Odds Close']
data_nfl['Home Fair Odds'] = data_nfl['Home Implied Odds']/(data_nfl['Bookmakers Margin']+1)
data_nfl['Away Fair Odds'] = data_nfl['Away Implied Odds']/(data_nfl['Bookmakers Margin']+1)
data_nfl['Home Expected Return'] = data_nfl['Home Odds Close']*data_nfl['Home Fair Odds'] - 1
data_nfl['Away Expected Return'] = data_nfl['Away Odds Close']*data_nfl['Away Fair Odds'] - 1
data_nfl['Home Actual Return'] = np.where(data_nfl['Winner'] == data_nfl['Home Team'], data_nfl['Home Odds Close'] - 1, -1)
data_nfl['Away Actual Return'] = np.where(data_nfl['Winner'] == data_nfl['Away Team'], data_nfl['Away Odds Close'] - 1, -1)
data_nfl['Underdog Actual Return'] = np.where(data_nfl['Underdog'] == data_nfl['Away Team'], data_nfl['Away Actual Return'], data_nfl['Home Actual Return'])
data_nfl['Favourite Actual Return'] = np.where(data_nfl['Underdog'] == data_nfl['Home Team'], data_nfl['Away Actual Return'], data_nfl['Home Actual Return'])

In [10]:
#inspect dataframe
data_nfl.head()

Unnamed: 0,Home Team,Away Team,Home Score,Away Score,Home Odds Close,Away Odds Close,Winner,Underdog,Bookmakers Margin,Home Implied Odds,Away Implied Odds,Home Fair Odds,Away Fair Odds,Home Expected Return,Away Expected Return,Home Actual Return,Away Actual Return,Underdog Actual Return,Favourite Actual Return
0,Tampa Bay Buccaneers,Los Angeles Rams,24,27,1.47,2.75,Los Angeles Rams,Los Angeles Rams,0.043908,0.680272,0.363636,0.651659,0.348341,-0.042062,-0.042062,-1.0,1.75,1.75,-1.0
1,Las Vegas Raiders,Kansas City Chiefs,31,35,3.8,1.27,Kansas City Chiefs,Las Vegas Raiders,0.050559,0.263158,0.787402,0.250493,0.749507,-0.048126,-0.048126,-1.0,0.27,-1.0,0.27
2,Indianapolis Colts,Green Bay Packers,34,31,1.8,2.05,Indianapolis Colts,Green Bay Packers,0.04336,0.555556,0.487805,0.532468,0.467532,-0.041558,-0.041558,0.8,-1.0,-1.0,0.8
3,Minnesota Vikings,Dallas Cowboys,28,31,1.3,3.65,Dallas Cowboys,Dallas Cowboys,0.043203,0.769231,0.273973,0.737374,0.262626,-0.041414,-0.041414,-1.0,2.65,2.65,-1.0
4,Denver Broncos,Miami Dolphins,20,13,2.6,1.55,Denver Broncos,Denver Broncos,0.029777,0.384615,0.645161,0.373494,0.626506,-0.028916,-0.028916,1.6,-1.0,1.6,-1.0


### NBA

Includes all games from the 2008-09 season til the 2019-20 season and is sourced from https://www.sportsbookreviewsonline.com/scoresoddsarchives/nba/nbaoddsarchives.htm

In [11]:
#Read in data and neccessary columns
data_nba = pd.DataFrame()
nba = ["nba odds 2019-20.xlsx","nba odds 2018-19.xlsx","nba odds 2017-18.xlsx","nba odds 2016-17.xlsx",
       "nba odds 2015-16.xlsx","nba odds 2014-15.xlsx","nba odds 2013-14.xlsx","nba odds 2012-13.xlsx",
       "nba odds 2011-12.xlsx","nba odds 2010-11.xlsx","nba odds 2009-10.xlsx","nba odds 2008-09.xlsx"]

for season in nba:
    data = pd.read_excel(season)
    data = data[['Date','Team','ML','Final']]
    data['Odds'] = np.where(data['ML'] > 0, 1 + data['ML']/100, 1 - (100/data['ML']))
    data = data.drop(['Date','ML'], axis = 1)
    data_nba = pd.concat([data_nba,data])

In [12]:
#Reshape data to contain one game per row instead of just one team
home_team = data_nba.iloc[list(range(1,len(data_nba),2))]
away_team = data_nba.iloc[list(range(0,len(data_nba),2))]
home_team.columns = ['Home Team','Home Score','Home Odds']
away_team.columns = ['Away Team','Away Score','Away Odds']
home_team = home_team.reset_index(drop=True)
away_team = away_team.reset_index(drop=True)
data_nba = pd.concat([home_team,away_team], axis = 1)

In [13]:
#Create new variables for our analysis
data_nba['Winner'] = np.where(data_nba['Home Score'] > data_nba['Away Score'],data_nba['Home Team'],data_nba['Away Team'])
data_nba['Underdog'] = np.where(data_nba['Home Odds'] < data_nba['Away Odds'],data_nba['Away Team'],data_nba['Home Team'])
data_nba['Bookmakers Margin'] = (1/data_nba['Home Odds'])+(1/data_nba['Away Odds']) - 1
data_nba['Home Implied Odds'] = 1/data_nba['Home Odds']
data_nba['Away Implied Odds'] = 1/data_nba['Away Odds']
data_nba['Home Fair Odds'] = data_nba['Home Implied Odds']/(data_nba['Bookmakers Margin']+1)
data_nba['Away Fair Odds'] = data_nba['Away Implied Odds']/(data_nba['Bookmakers Margin']+1)
data_nba['Home Expected Return'] = data_nba['Home Odds']*data_nba['Home Fair Odds'] - 1
data_nba['Away Expected Return'] = data_nba['Away Odds']*data_nba['Away Fair Odds'] - 1
data_nba['Home Actual Return'] = np.where(data_nba['Winner'] == data_nba['Home Team'], data_nba['Home Odds'] - 1, -1)
data_nba['Away Actual Return'] = np.where(data_nba['Winner'] == data_nba['Away Team'], data_nba['Away Odds'] - 1, -1)
data_nba['Underdog Actual Return'] = np.where(data_nba['Underdog'] == data_nba['Away Team'], data_nba['Away Actual Return'], data_nba['Home Actual Return'])
data_nba['Favourite Actual Return'] = np.where(data_nba['Underdog'] == data_nba['Home Team'], data_nba['Away Actual Return'], data_nba['Home Actual Return'])

In [14]:
#inspect dataframe
data_nba.head()

Unnamed: 0,Home Team,Home Score,Home Odds,Away Team,Away Score,Away Odds,Winner,Underdog,Bookmakers Margin,Home Implied Odds,Away Implied Odds,Home Fair Odds,Away Fair Odds,Home Expected Return,Away Expected Return,Home Actual Return,Away Actual Return,Underdog Actual Return,Favourite Actual Return
0,Toronto,130,1.357143,NewOrleans,122,3.3,Toronto,NewOrleans,0.039872,0.736842,0.30303,0.708589,0.291411,-0.038344,-0.038344,0.357143,-1.0,-1.0,0.357143
1,LAClippers,112,2.5,LALakers,102,1.555556,LAClippers,LAClippers,0.042857,0.4,0.642857,0.383562,0.616438,-0.041096,-0.041096,1.5,-1.0,1.5,-1.0
2,Indiana,110,1.333333,Detroit,119,3.4,Detroit,Detroit,0.044118,0.75,0.294118,0.71831,0.28169,-0.042254,-0.042254,-1.0,2.4,2.4,-1.0
3,Orlando,94,1.181818,Cleveland,85,5.0,Orlando,Cleveland,0.046154,0.846154,0.2,0.808824,0.191176,-0.044118,-0.044118,0.181818,-1.0,-1.0,0.181818
4,Charlotte,126,2.45,Chicago,125,1.588235,Charlotte,Charlotte,0.037793,0.408163,0.62963,0.393299,0.606701,-0.036417,-0.036417,1.45,-1.0,1.45,-1.0


### Australia (AFL + NRL)

In [15]:
#Combine AFL and NRL games
australia = pd.concat([data_nrl, data_afl])

In [16]:
#inspect DataFrame
australia.head()

Unnamed: 0,Home Team,Away Team,Home Score,Away Score,Home Odds,Away Odds,Winner,Underdog,Bookmakers Margin,Home Implied Odds,Away Implied Odds,Home Fair Odds,Away Fair Odds,Home Expected Return,Away Expected Return,Home Actual Return,Away Actual Return,Underdog Actual Return,Favourite Actual Return
1,Penrith Panthers,Melbourne Storm,20,26,2.24,1.7,Melbourne Storm,Penrith Panthers,0.0346639,0.446429,0.588235,0.431472,0.568528,-0.0335025,-0.0335025,-1.0,0.7,-1.0,0.7
2,Penrith Panthers,South Sydney Rabbitohs,20,16,1.38,3.18,Penrith Panthers,South Sydney Rabbitohs,0.0391031,0.724638,0.314465,0.697368,0.302632,-0.0376316,-0.0376316,0.38,-1.0,-1.0,0.38
3,Melbourne Storm,Canberra Raiders,30,10,1.45,2.86,Melbourne Storm,Canberra Raiders,0.0393055,0.689655,0.34965,0.663573,0.336427,-0.037819,-0.037819,0.45,-1.0,-1.0,0.45
4,Parramatta Eels,South Sydney Rabbitohs,24,38,3.57,1.3,South Sydney Rabbitohs,Parramatta Eels,0.0493428,0.280112,0.769231,0.26694,0.73306,-0.0470226,-0.0470226,-1.0,0.3,-1.0,0.3
5,Sydney Roosters,Canberra Raiders,18,22,1.43,2.91,Canberra Raiders,Canberra Raiders,0.0429433,0.699301,0.343643,0.670507,0.329493,-0.0411751,-0.0411751,-1.0,1.91,1.91,-1.0


In [17]:
#Total Underdog Actual Return
sum(australia['Underdog Actual Return'])

-456.8099999999998

In [18]:
#Total Favourite Actual Return
sum(australia['Favourite Actual Return'])

-161.9699999999998

In [19]:
#Expected Return - Note this is the same both underdog and favourite teams
sum(australia['Home Expected Return'])

-225.7188155658405

So here we are conducting a t-test to determine whether there is a difference in average return when betting on the favourite vs the underdog in Australia. Our hypotheseses are:

- Null Hypothesis: There is no difference in return between the Favourite teams and the Underdog teams

- Alternative Hypothesis: The Underdog teams produce less return than the Favourite teams

The assumptions are:

1) The data is continous - Which is True

2) Data follows the normal probability distribution - We can make this assumption has the sample size of n = 4637 is large enough such that the data follows the central limit theorem

3) The two samples are indepedendent - We have to be careful here as the underdog and favourite return are dependent on each other i.e. when the favourite wins the underdog loses and vice versa. To ensure we do not violate this assumption, we are going to compare the mean of the expected return and the mean of the underdog actual return 

4) The sample is random and observations are independent of each other - Which is true as the odds of each game are not dependent on the odds of another game

5) Variance of the two groups are equal - We cannot make this assumption as demonstrated by the Levene's test of equality of variance below 

In [20]:
#Determine whether we can assume equal variance
levene(australia['Underdog Actual Return'], australia['Home Expected Return'])

LeveneResult(statistic=1701.7961404106993, pvalue=0.0)

In [21]:
summary, results = rp.ttest(group1= australia['Underdog Actual Return'], group1_name= "Underdog",
         group2= australia['Home Expected Return'], group2_name= "Expected Return", equal_variances = False)

In [22]:
summary

Unnamed: 0,Variable,N,Mean,SD,SE,95% Conf.,Interval
0,Underdog,4637.0,-0.098514,1.477552,0.021698,-0.141053,-0.055975
1,Expected Return,4637.0,-0.048678,0.008577,0.000126,-0.048925,-0.048431
2,combined,9274.0,-0.073596,1.045046,0.010852,-0.094868,-0.052324


In [23]:
results

Unnamed: 0,Welch's t-test,results
0,Difference (Underdog - Expected Return) =,-0.0498
1,Degrees of freedom =,4636.3125
2,t =,-2.2968
3,Two side test p value =,0.0217
4,Difference < 0 p value =,0.0108
5,Difference > 0 p value =,0.9892
6,Cohen's d =,-0.0477
7,Hedge's g =,-0.0477
8,Glass's delta =,-0.0337
9,r =,0.0337


With a p value of 0.0108 we reject the null hypothesis and accept the alternative hypothesis that in Australia, the Underdog teams produce less return than the Favourite teams 

### USA (NBA + NFL)

In [24]:
#combine nba and nfl games
usa = pd.concat([data_nba, data_nfl])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


In [25]:
#inspect dataframe
usa.head()

Unnamed: 0,Away Actual Return,Away Expected Return,Away Fair Odds,Away Implied Odds,Away Odds,Away Odds Close,Away Score,Away Team,Bookmakers Margin,Favourite Actual Return,...,Home Expected Return,Home Fair Odds,Home Implied Odds,Home Odds,Home Odds Close,Home Score,Home Team,Underdog,Underdog Actual Return,Winner
0,-1.0,-0.038344,0.291411,0.30303,3.3,,122,NewOrleans,0.039872,0.357143,...,-0.038344,0.708589,0.736842,1.357143,,130,Toronto,NewOrleans,-1.0,Toronto
1,-1.0,-0.041096,0.616438,0.642857,1.555556,,102,LALakers,0.042857,-1.0,...,-0.041096,0.383562,0.4,2.5,,112,LAClippers,LAClippers,1.5,LAClippers
2,2.4,-0.042254,0.28169,0.294118,3.4,,119,Detroit,0.044118,-1.0,...,-0.042254,0.71831,0.75,1.333333,,110,Indiana,Detroit,2.4,Detroit
3,-1.0,-0.044118,0.191176,0.2,5.0,,85,Cleveland,0.046154,0.181818,...,-0.044118,0.808824,0.846154,1.181818,,94,Orlando,Cleveland,-1.0,Orlando
4,-1.0,-0.036417,0.606701,0.62963,1.588235,,125,Chicago,0.037793,-1.0,...,-0.036417,0.393299,0.408163,2.45,,126,Charlotte,Charlotte,1.45,Charlotte


In [26]:
#Total Underdog Actual Return
sum(usa['Underdog Actual Return'])

-655.9772390851748

In [27]:
#Total Favourite Actual Return
sum(usa['Favourite Actual Return'])

-687.8776833314809

In [28]:
#Expected Return - Note this is the same both underdog and favourite teams
sum(usa['Home Expected Return'])

-616.7830556894684

Very similar to what we did with the Australia betting market, we are conducting a t-test to determine whether there is a difference in average return when betting on the favourite vs the underdog in the USA. Our hypotheseses are:

- Null Hypothesis: There is no difference in return between the Favourite teams and the Underdog teams

- Alternative Hypothesis: The Underdog teams produce different returns than the Favourite teams

The assumptions are:

1) The data is continous - Which is True

2) Data follows the normal probability distribution - We can make this assumption has the sample size of n = 17103 is large enough such that the data follows the central limit theorem

3) The two samples are indepedendent - We have to be careful here as the underdog and favourite return are dependent on each other i.e. when the favourite wins the underdog loses and vice versa. To ensure we do not violate this assumption, we are going to compare the mean of the expected return and the mean of the underdog actual return 

4) The sample is random and observations are independent of each other - Which is true as the odds of each game are not dependent on the odds of another game

5) Variance of the two groups are equal - We cannot make this assumption as demonstrated by the Levene's test of equality of variance below 

In [29]:
#Determine whether we can assume equal variance
levene(usa['Underdog Actual Return'], usa['Home Expected Return'])

LeveneResult(statistic=5790.841506871259, pvalue=0.0)

In [30]:
summary, results = rp.ttest(group1= usa['Underdog Actual Return'], group1_name= "Underdog",
         group2= usa['Home Expected Return'], group2_name= "Expected Return", equal_variances = False)

In [31]:
summary

Unnamed: 0,Variable,N,Mean,SD,SE,95% Conf.,Interval
0,Underdog,17104.0,-0.038352,1.644012,0.012571,-0.062992,-0.013713
1,Expected Return,17104.0,-0.036061,0.011625,8.9e-05,-0.036235,-0.035887
2,combined,34208.0,-0.037207,1.162505,0.006285,-0.049526,-0.024887


In [32]:
results

Unnamed: 0,Welch's t-test,results
0,Difference (Underdog - Expected Return) =,-0.0023
1,Degrees of freedom =,17104.7103
2,t =,-0.1823
3,Two side test p value =,0.8554
4,Difference < 0 p value =,0.4277
5,Difference > 0 p value =,0.5723
6,Cohen's d =,-0.002
7,Hedge's g =,-0.002
8,Glass's delta =,-0.0014
9,r =,0.0014


With a p value of 0.8514, we accept the null hypothesis that there is no difference in return when betting on the underdog as opposed to the favourite team in the USA.