# Analysis
Weigh the odds against the predictions and make your bet!

The selected bets are stored in `./Book.csv`.

Remember to fill in the actualy results of your bet in the `Book` after the match.

In [None]:
import pandas as pd

## Read the odds and predictions files

In [None]:
# odds
dfOdds = pd.read_csv('./data/This_months_odds.csv')
print(dfOdds.shape)
display(dfOdds.head())

# predictions
dfPreds = pd.read_csv('./data/This_months_predictions.csv')
print(dfPreds.shape)
dfPreds.head()

## Unpivot to go from matches to bets
So each row in the table corresponds to 1 possible bet (home win, away win or tie).

In [None]:
# unpivot the odds columns
dfOdds = dfOdds.melt(id_vars=['league', 'home_team', 'away_team'])

# strip "odd_" from the bet outcome
dfOdds.variable = dfOdds.variable.str.replace('odd_', '')

# rename the column we got from the unpivot (melt)
dfOdds.rename(columns={'variable': 'outcome', 'value': 'odd'}, inplace=True)

# show
print(dfOdds.shape)
dfOdds.head()

In [None]:
# unpivot the prediction columns
dfPreds = dfPreds.melt(id_vars=['date', 'league', 'home_team', 'away_team'])
dfPreds

# strip "prob_" from the bet outcome
dfPreds.variable = dfPreds.variable.str.replace('prob_', '')

# rename the column we got from the unpivot (melt)
dfPreds.rename(columns={'variable': 'outcome', 'value': 'prob'}, inplace=True)

# show
print(dfPreds.shape)
dfPreds.head()

## Do some data mapping to align both tables

In [None]:
# map the league names to match the odds
leaguesMap = {'french ligue 1': 'ligue_1',
              'german bundesliga': 'bundesliga',
              'dutch eredivisie': 'eredivisie',
              'italy serie a': 'serie_a',
              'spanish primera division': 'la_liga',
              'barclays premier league': 'premier_league'}
dfPreds['league'] = dfPreds.league.replace(leaguesMap)

In [None]:
# check if team names differ from odds / preds
# all team names from odds (filter 1 league)
filt = dfOdds.league == 'ligue_1'
teamsFromOdds = set(dfOdds[filt].home_team).union(set(dfOdds[filt].away_team))

# all teams names from predictions (filter 1 league)
filt = dfPreds.league == 'ligue_1'
teamsFromPreds = set(dfPreds[filt].home_team).union(set(dfPreds[filt].away_team))

# find set difference
teamsFromOdds.symmetric_difference(teamsFromPreds)

In [None]:
# map the team names to match the odds
teamsMap = { 
    # NL
    'emmen': 'fc emmen',
    'nac': 'nac breda',
    'heerenveen': 'sc heerenveen',
    'excelsior': 'excelsior rotterdam',
    'psv': 'psv eindhoven',
    'az': 'az alkmaar',
    'heracles': 'heracles almelo',
    # ES
    'athletic bilbao': 'athletic club bilbao',
    'sevilla fc': 'sevilla',
    'real valladolid': 'valladolid',
    'sd huesca': 'huesca',
    'barcelona': 'fc barcelona',
    # DE
    'mainz': 'mainz 05',
    'tsg hoffenheim': 'hoffenheim',
    'fc augsburg': 'augsburg',
    'bayern munich': 'bayern munchen',
    # EN
    'afc bournemouth': 'bournemouth',
    'west ham united': 'west ham',
    'tottenham hotspur': 'tottenham',
    'brighton and hove albion': 'brighton & hove albion',
    'newcastle': 'newcastle united',
    'wolverhampton': 'wolverhampton wanderers',
    # IT
    'as roma': 'roma',
    'internazionale': 'inter',
    # FR
    'paris saint-germain': 'paris sg',
    'st etienne': 'saint-etienne',
    'dijon fco': 'dijon',
    'nimes': 'nimes olympique',
    'stade rennes': 'rennes',
    'as monaco': 'monaco'
}
dfPreds['home_team'] = dfPreds.home_team.replace(teamsMap)
dfPreds['away_team'] = dfPreds.away_team.replace(teamsMap)

In [None]:
# try to merge on [league], [home_team] and [away_team]
dfJoin = dfOdds.merge(dfPreds, how='left', indicator=True,
                      on=['league', 'home_team', 'away_team', 'outcome']
                     )

# check for mismatches - result should be empty data frame
display(dfJoin[dfJoin['_merge'] == 'left_only'])

# and then drop _merge to continue
dfJoin.drop(columns=['_merge'], inplace=True)

## Calculate expectation values and select bets
Expectation value = odd * probability

Select bets with `expect` > 1.15

In [None]:
# calculate the expectation value for each possible bet
dfJoin['expect'] = dfJoin.odd * dfJoin.prob
print(dfJoin.shape)

In [None]:
# select bets higher then the threshold
dfBets = dfJoin[dfJoin.expect > 1.15].copy()

# sort and set a fixed column order
columnOrder = ['date', 'league', 'home_team', 'away_team', 'outcome', 'odd', 'prob', 'expect']
dfBets = dfBets.sort_values(by=['league', 'date'])[columnOrder]

# show
print(dfBets.shape)
dfBets

## Congratulations. You will be very rich, sir.
Now save the precious analysis results.

In [None]:
# first add the date of today
from datetime import datetime
now = datetime.now()
dfBets['date_of_bet'] = '%s-%s-%s' % (now.day, now.month, now.year)

# write to Book.csv - caution!
#dfBets.to_csv('./Book.csv', index=False, float_format='%.3f')

# append to Book.csv
dfBets.to_csv('./Book.csv', index=False, float_format='%.3f', mode='a', header=False)