# Moptibet
Moptibet gathers betting odd’s data from multiple sources, consolidate said data, and highlight betting opportunities, through both arbitrage due to inconsistent odds in the market and using predictive analytics to guess winners.

### Imports

In [1]:
import pandas as pd
import numpy as np
import sqlalchemy as db
import sys

In [2]:
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup

### Web Scraping

#### Site Specific HTML Parsers

In [3]:
def draftkings_parse(input_dict, driver, key):
    outcomes = [x.text for x in driver.find_elements_by_class_name(input_dict['outcomes_class'])]
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    odds = [x.text for x in soup.findAll('span',{'class', input_dict['odds_class']})]
    # odds = [x.text for x in driver.find_elements_by_class_name(input_dict['odds_class'])]
    return list(zip(outcomes[::3], outcomes[2::3], odds[::3], odds[1::3], odds[2::3], [key] * len(outcomes[::3]), [datetime.now()] * len(outcomes[::3])))

def fanduel_parse(input_dict, driver, key):
    outcomes = [x.text for x in driver.find_elements_by_class_name(input_dict['outcomes_class'])]
    odds = [x.text for x in driver.find_elements_by_class_name(input_dict['odds_class'])]
    return list(zip(outcomes[::2], outcomes[1::2], odds[::3], odds[1::3], odds[2::3], [key] * len(outcomes[::3]), [datetime.now()] * len(outcomes[::3])))

def bovada_parse(input_dict, driver, key):
    outcomes = [x.text for x in driver.find_elements_by_class_name(input_dict['outcomes_class'])]
    odds = [x.text for x in driver.find_elements_by_class_name(input_dict['odds_class'])]
    return list(zip(outcomes[::2], outcomes[1::2], odds[2::7], odds[4::7], odds[3::7], [key] * len(outcomes[::3]), [datetime.now()] * len(outcomes[::3])))

#### Site Specific Scraper Mapping

In [4]:
prem_dir = {'draftkings':{'url':'https://sportsbook.draftkings.com/leagues/soccer/53591936',
                          'parser': draftkings_parse,
                          'args':{'outcomes_class':'sportsbook-outcome-cell__label',
                                  'odds_class':'sportsbook-odds american default-color'}},
            'fanduel':{'url':'https://sportsbook.fanduel.com/sports/navigation/730.1/7567.1',
                       'parser': fanduel_parse,
                       'args':{'outcomes_class':'name',
                               'odds_class':'selectionprice'}},
            'bovada':{'url':'https://www.bovada.lv/sports/soccer/england-premier-league',
                      'parser': bovada_parse,
                      'args':{'outcomes_class':'name',
                              'odds_class':'bet-price'}}
           }

output_cols = ['home_team','away_team', 'home_odds','tie_odds', 'away_odds', 'source', 'datetime']

#### Setting Scraper settings and appropriate driver

In [5]:
sys.platform

'darwin'

In [6]:
options = Options()
options.headless = True
options.add_argument("--window-size=1920,1200")

if sys.platform == 'darwin':
    DRIVER_PATH = 'driver/mac/chromedriver'
elif sys.platform == 'linux' or sys.platform == 'linux2':
    DRIVER_PATH = 'driver/linux/chromedriver'
elif sys.platform == 'win32':
    DRIVER_PATH = 'driver/windows/chromedriver'



#### Iterate over sites, persisting outcomes, odds

In [45]:
class Web_Scraper:
    def __init__(self, map_dir, output_cols, scr_options=None):
        self.map_dir = map_dir
        self.output_cols = output_cols
        self.scr_options = scr_options
        
    def set_options(self):
        if self.scr_options is not None:
            options = Options()
            options.headless = self.scr_options['headless']
            options.add_argument(''.join('--window-size=',self.scr_options['width'],',',self.scr_options['height']))
        else:
            options = Options()
            options.headless = True
            options.add_argument('--window-size=1920,1200')
    
    def os_driver_path(self):
        if sys.platform == 'darwin':
            DRIVER_PATH = 'driver/mac/chromedriver'
        elif sys.platform == 'linux' or sys.platform == 'linux2':
            DRIVER_PATH = 'driver/linux/chromedriver'
        elif sys.platform == 'win32':
            DRIVER_PATH = 'driver/windows/chromedriver'
        return DRIVER_PATH
    
    def go(self):
        dn = []
        for key in self.map_dir:
            driver = webdriver.Chrome(options=self.set_options(), executable_path=self.os_driver_path())
            driver.get(self.map_dir[key]['url'])
            driver.implicitly_wait(3)
            
            data = pd.DataFrame(prem_dir[key]['parser'](prem_dir[key]['args'],driver, key),columns=self.output_cols)
            dn.append(data)
            driver.quit()
        return pd.concat(dn, axis=0)
   
    ##########
    #
    # N E X T
    #
    ##########

    def update_db(self):
        pass

In [46]:
Prem_Web_Scraper = Web_Scraper(prem_dir, output_cols)

In [47]:
test = Prem_Web_Scraper.go()

In [48]:
test.to_sql()

Unnamed: 0,home_team,away_team,home_odds,tie_odds,away_odds,source,datetime
0,Leicester City,Manchester United,205,250,130,draftkings,2020-07-23 01:14:50.661729
1,Manchester City,Norwich City,-1250,1100,2800,draftkings,2020-07-23 01:14:50.661729
2,West Ham,Aston Villa,220,245,123,draftkings,2020-07-23 01:14:50.661729
3,Burnley,Brighton,135,235,205,draftkings,2020-07-23 01:14:50.661729
4,Newcastle,Liverpool,750,400,-275,draftkings,2020-07-23 01:14:50.661729
5,Everton,Bournemouth,128,275,190,draftkings,2020-07-23 01:14:50.661729
6,Chelsea,Wolves,-118,280,310,draftkings,2020-07-23 01:14:50.661729
7,Southampton,Sheffield United,123,240,225,draftkings,2020-07-23 01:14:50.661729
8,Crystal Palace,Tottenham,480,310,-175,draftkings,2020-07-23 01:14:50.661729
9,Arsenal,Watford,-106,285,265,draftkings,2020-07-23 01:14:50.661729


#### Grab Probabilities and Fixtures from 538 

In [8]:
matches_538 = pd.read_csv('https://projects.fivethirtyeight.com/soccer-api/club/spi_matches_latest.csv')
prem_matches = matches_538[matches_538['league'] == 'Barclays Premier League']

columns = ['date', 'league', 'team1', 'team2', 'spi1',
       'spi2', 'prob1', 'prob2', 'probtie', 'proj_score1', 'proj_score2',
       'importance1', 'importance2', 'score1', 'score2', 'xg1', 'xg2', 'nsxg1',
       'nsxg2', 'adj_score1', 'adj_score2']

prem_matches[prem_matches['date']=='2020-07-16']

Unnamed: 0,season,date,league_id,league,team1,team2,spi1,spi2,prob1,prob2,...,importance1,importance2,score1,score2,xg1,xg2,nsxg1,nsxg2,adj_score1,adj_score2
8756,2019,2020-07-16,2411,Barclays Premier League,Everton,Aston Villa,74.56,63.46,0.4572,0.2793,...,0.0,59.5,1.0,1.0,1.08,1.26,1.54,1.02,1.05,1.05
8760,2019,2020-07-16,2411,Barclays Premier League,Leicester City,Sheffield United,80.58,73.52,0.5859,0.1605,...,100.0,2.0,2.0,0.0,3.05,0.84,1.48,0.29,1.86,0.0
8775,2019,2020-07-16,2411,Barclays Premier League,Southampton,Brighton and Hove Albion,73.64,69.29,0.4521,0.2844,...,0.0,1.8,1.0,1.0,2.39,0.89,1.28,1.02,1.05,1.05
8776,2019,2020-07-16,2411,Barclays Premier League,Crystal Palace,Manchester United,68.3,88.1,0.1098,0.6879,...,0.0,100.0,0.0,2.0,0.6,1.94,0.96,2.14,0.0,1.91


#### Connect to Remote PostgreSQL server

In [9]:
# need to hide password

DB_URI = 'postgres+psycopg2://austin:stretford20x@ubuntu_t_serv/modbet'

conn = db.create_engine(DB_URI).connect()

In [10]:
results = pd.read_sql('SELECT * FROM test.test;', conn)
results.head()

Unnamed: 0,coltest
0,Hello World


In [11]:
conn.close()