In [1]:
import os, json
from datetime import datetime
from datetime import timedelta
import time
import pandas as pd
import requests
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import numpy as np
from keras.models import load_model

OPTIONS_FILE = os.path.join("..", "options.json")
FILE_NAME = os.path.join("..", "espn", "2020_header.csv")
MODEL_PATH4 = os.path.join("..", "model4.h5")
MODEL_PATH5 = os.path.join("..", "model5.h5")
MODEL_PATH6 = os.path.join("..", "model6.h5")
MODEL_PATH7 = os.path.join("..", "model7.h5")
MODEL_PATH9 = os.path.join("..", "model9.h5")
URL = "https://www.oddsportal.com/basketball/usa/nba/"

teams = {"Boston Celtics":"bos",
         "Brooklyn Nets":"bkn",
         "Philadelphia 76ers": "phi",
          "New York Knicks": "ny", 
          "Toronto Raptors": "tor", 
          "Golden State Warriors": "gs",
          "Los Angeles Clippers": "lac",
          "Los Angeles Lakers": "lal",
          "Phoenix Suns": "phx",
          "Sacramento Kings": "sac",
          "Chicago Bulls": "chi",
          "Milwaukee Bucks": "mil",
          "Cleveland Cavaliers": "cle",
          "Detroit Pistons": "det",
          "Indiana Pacers": "ind",
          "Atlanta Hawks":"atl",
          "Miami Heat": "mia",
          "Orlando Magic": "orl",
          "Charlotte Hornets": "cha",
          "Washington Wizards":  "wsh",#was
          "Denver Nuggets": "den",
          "Utah Jazz": "utah", #uta
          "Minnesota Timberwolves": "min",
          "Portland Trail Blazers": "por", 
          "Oklahoma City Thunder": "okc",
          "Dallas Mavericks": "dal",
          "San Antonio Spurs": "sa",
          "Houston Rockets": "hou",#
          "Memphis Grizzlies": "mem",
          "New Orleans Pelicans": "no" 
        }

def get_dates(df):
    
    #dates_ind = np.where(df["Teams"] == "1")[0]
    #print(df.head(2))
    dates_ind = np.where(df["Teams"] == df["Home Coeff"])[0]
    dates_ind = np.append(0 ,dates_ind)
    dates_ind = np.append(dates_ind,len(df))
    remove = False; remove_ind = []
    dates = []    
    for i in range(len(dates_ind)-1):
        if "Tomorrow" in df["Date_"][dates_ind[i]]:
            date = datetime.now() + timedelta(days = 1)
        elif "Today" in df["Date_"][dates_ind[i]]:
            date = datetime.now()
        for j in range(dates_ind[i], dates_ind[i+1]):
            dates.append(date)
            if remove:
                remove_ind.append(j)
    return dates, set(np.append(remove_ind,dates_ind[:-1]))

def get_odds_df(driver):
    tbl = driver.find_element_by_xpath('/html/body/div[1]/div/div[2]/div[6]/div[1]/div/div[1]/div[2]/div[1]/table').get_attribute('outerHTML')
    df = pd.read_html(tbl)[0]
    _ , cols = zip(*df.columns)
    df.loc[-1] = cols
    df.columns = [ "Date_", "Teams","Teams 2", "Score", "Home Coeff","Away Coeff", "-"]
    df = df.sort_index().reset_index()
    
    #df = df.drop(["-","--","---"], axis =1)
    df = df.sort_index().reset_index()

    dates, dates_ind =  get_dates(df)
    df["Date"] = [datetime.strptime(str(date)[:10], '%Y-%m-%d') for date in dates]
    df = df.dropna(subset=["Teams"])
    df = df[df["Teams"].str.contains("toolTipTV")]
    #df = df.drop(dates_ind)
    df["Teams"] = df["Teams"].apply(lambda x: x[x.find("});")+3:])
    df["Home"] = df["Teams"].apply(lambda x: teams[x.split("-")[0].strip()].upper())
    df["Away"] = df["Teams"].apply(lambda x: teams[x.split("-")[1].strip()].upper())

    return df[["Date","Home","Away", "Home Coeff", "Away Coeff"]]
    

def read_options():
	with open(OPTIONS_FILE) as f:
		options = json.load(f)
	date = str(datetime.now())[:10]
	
	if (date == options["Date"]) and (options["oddsportal_history"] is False):
		print("Oddsportal historical data not updated today")
		return False
		
	if (date == options["Date"]) and (options["espn_history"] is False):
		print("Espn historical data not updated today")
		return False
		
	if ((date == options["Date"]) and (options["oddsportal_history"] is True) and (options["espn_history"] is True)):
		return True

def select_EU_odds(driver):
    element = driver.find_element_by_id('user-header-oddsformat-expander')
    element.click()
    element = driver.find_element_by_id('user-header-oddsformat').find_element_by_tag_name("li")
    element.click()	


def read_header(year):
    file_name = os.path.join("..", "espn", year + "_header.csv")
    header_df = pd.read_csv(file_name)
    header_df = header_df.reset_index(drop = True)
    header_df["Date"] = header_df["Date"].apply(lambda x: datetime.strptime(x[:10], '%Y-%m-%d'))
    header_df["Home"] = header_df["Home"].apply(lambda x: x.strip())
    header_df["Away"] = header_df["Away"].apply(lambda x: x.strip())
    header_df = header_df.sort_values(by="Date")
    return header_df

def get_last_game_diff(x):
    if x < -35:
        return -1
    elif x > 35:
        return 1
    else:
        return x/35	
	
def get_wins(row, header, updated_model = False):
	#HOME
	header_home = header[(header["Home"] == row["Home"]) | (header["Away"] == row["Home"])]
	home_last_match = header_home.tail(1).iloc[0]
	if home_last_match["Home"] == row["Home"]:
		home_win = home_last_match["Home Wins"] / (home_last_match["Home Wins"] + home_last_match["Home Losses"])
		home_last_game_is_away = 0
		home_last_game_diff = get_last_game_diff(home_last_match["Home Pts"] - home_last_match["Away Pts"])
	else:
		home_win = home_last_match["Away Wins"] / (home_last_match["Away Wins"] + home_last_match["Away Losses"])
		home_last_game_is_away = 1
		home_last_game_diff = get_last_game_diff(home_last_match["Away Pts"] - home_last_match["Home Pts"])
	home_home_last_match = header_home[header_home["Home"] == row["Home"]].tail(1).iloc[0]
	home_home_win = home_home_last_match["Home home Wins"] / (home_home_last_match["Home home Wins"] + home_home_last_match["Home home Losses"])	
	home_games_last_week =  len(header_home[header_home["Date"].between(row["Date"] + timedelta(days = -7),
																	row["Date"])])/5
																																																
	#AWAY																
	header_away = header[(header["Home"] == row["Away"]) | (header["Away"] == row["Away"])]

	away_last_match = header_away.tail(1).iloc[0]
	if away_last_match["Away"] == row["Away"]:
		away_win = away_last_match["Away Wins"] / (away_last_match["Away Wins"] + away_last_match["Away Losses"])
		away_last_game_is_away = 1
		away_last_game_diff = get_last_game_diff(away_last_match["Away Pts"] - away_last_match["Home Pts"])
	else:
		away_win = away_last_match["Home Wins"] / (away_last_match["Home Wins"] + away_last_match["Home Losses"])
		away_last_game_is_away = 0
		away_last_game_diff = get_last_game_diff(away_last_match["Home Pts"] - away_last_match["Away Pts"])
	away_away_last_match = header_away[header_away["Away"] == row["Away"]].tail(1).iloc[0]
	away_away_win = away_away_last_match["Away away Wins"] / (away_away_last_match["Away away Wins"] + away_away_last_match["Away away Losses"])		
	away_games_last_week =  len(header_away[header_away["Date"].between(row["Date"] + timedelta(days = -7),
																	row["Date"])]) /5
	print(header_away[header_away["Date"].between(row["Date"] + timedelta(days = -7),
																	row["Date"])])
	if updated_model:
		df = pd.DataFrame( data  ={0: [home_win], 
								   1: [home_home_win], 
								   2: [away_win], 
								   3: [away_away_win], 
								   4: [home_games_last_week],
								   5: [home_last_game_is_away],
								   6: [home_last_game_diff],
								   7: [away_games_last_week],
								   8: [away_last_game_is_away],
								   9: [away_last_game_diff]}).iloc[0]
	else:
		df = pd.DataFrame( data  ={0: [home_win], 
								   1: [home_home_win], 
								   2: [away_win], 
								   3: [away_away_win], 
								   4: [home_games_last_week],
								   5: [away_games_last_week]}).iloc[0]

	return df
	
def scrape_today_games():
	driver = webdriver.Chrome(ChromeDriverManager().install())
	driver.get(URL)
	select_EU_odds(driver)
	driver.get(URL)
	driver.get(URL)
	df = get_odds_df(driver)
	
	print(df)
	driver.close()
	return df
	
	
def get_predictions(model_path, x):
	model = load_model(model_path)
	pred = model.predict(x)
	return np.round(pred*100,2)
	
	
if __name__ == "__main__":


    todays_df = scrape_today_games()
    header = read_header("2020")

    todays_df["Away Coeff 2"] = todays_df["Away Coeff"].apply(lambda x: 1 if float(x) > 10 else (float(x)-1)/ 9.0)
    todays_df["Home Coeff 2"] = todays_df["Home Coeff"].apply(lambda x: 1 if float(x) > 10 else (float(x)-1)/ 9.0)
    todays_df = todays_df[(pd.to_numeric(todays_df["Away Coeff"])>1.7) | (pd.to_numeric(todays_df["Home Coeff"])>1.7)]
    columns = ['Home Wins %','Home home Wins %', 'Away Wins %', 'Away away Wins %', 'Home games last week', 'Away games last week']
    columns2 = ['Home Wins %','Home home Wins %', 'Away Wins %', 'Away away Wins %', 'Home games last week','Home last game is away',\
        'Home last game diff', 'Away games last week','Away last game is away', 'Away last game diff']
    todays_df2 = todays_df.copy()
    todays_df2[columns2] = todays_df2.apply(lambda row: get_wins(row, header, True), axis = 1)
    todays_df[columns] = todays_df.apply(lambda row: get_wins(row, header), axis = 1)


    #Away=================================
    df = todays_df[pd.to_numeric(todays_df["Away Coeff"])>1.7]				
    x = np.array(df[['Home Wins %', 'Home home Wins %', 'Away Wins %',\
                'Away away Wins %', 'Home Coeff 2', 'Away Coeff 2', 'Home games last week', 'Away games last week']])

    df1 = todays_df2[pd.to_numeric(todays_df2["Away Coeff"])>1.7]
    x1 = np.array(df1[['Home Wins %', 'Home home Wins %', 'Away Wins %',\
            'Away away Wins %',  'Home Coeff', 'Away Coeff', 'Home games last week', 'Away games last week',\
            'Home last game diff', 'Away last game diff', 'Home last game is away', 'Away last game is away' ]])



    df["Predictions 4"] = get_predictions(MODEL_PATH4, x)
    df["Predictions 5"] = get_predictions(MODEL_PATH5, x)
    df["Predictions 6"] = get_predictions(MODEL_PATH6, x)
    df["Predictions 9"] = get_predictions(MODEL_PATH9, x1)

    del df["Home Coeff 2"]
    del df["Away Coeff 2"] 
    print("\n============AWAY===============")
    print(df)

    #Home=======================================

    df2 = todays_df[pd.to_numeric(todays_df["Home Coeff"])>1.7]		
    x2 = np.array(df2[['Home Wins %', 'Home home Wins %', 'Away Wins %',\
                'Away away Wins %', 'Home Coeff 2', 'Away Coeff 2', 'Home games last week', 'Away games last week']])			
    df2["Predictions 7"] = get_predictions(MODEL_PATH7, x2)


    del df2["Home Coeff 2"]
    del df2["Away Coeff 2"] 
    print("\n============Home===============")
    print(df2)

		

Using TensorFlow backend.



Looking for [chromedriver 80.0.3987.16 win32] driver in cache 
File found in cache by path [C:\Users\Giedrius\.wdm\drivers\chromedriver\80.0.3987.16\win32\chromedriver.exe]


KeyboardInterrupt: 

In [11]:
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get(URL)
select_EU_odds(driver)
driver.get(URL)
driver.get(URL)



Looking for [chromedriver 79.0.3945.36 win32] driver in cache 
File found in cache by path [C:\Users\Giedrius\.wdm\drivers\chromedriver\79.0.3945.36\win32\chromedriver.exe]


In [19]:
tbl = driver.find_element_by_xpath('/html/body/div[1]/div/div[2]/div[6]/div[1]/div/div[1]/div[2]/div[1]/table').get_attribute('outerHTML')
df = pd.read_html(tbl)[0]
_ , cols = zip(*df.columns)
df.loc[-1] = cols
df.columns = [ "Date_", "Teams","Teams 2", "Score", "Home Coeff","Away Coeff", "-"]
df = df.sort_index().reset_index()

In [20]:
df

Unnamed: 0,index,Date_,Teams,Teams 2,Score,Home Coeff,Away Coeff,-
0,-1,"Today, 16 Jan","Today, 16 Jan.1","Today, 16 Jan.2","Today, 16 Jan.3",1.0,2.0,B's
1,0,00:00,Boston Celtics - Detroit Pistons,Boston Celtics - Detroit Pistons,103:116,1.21,4.75,11
2,1,00:00,Philadelphia 76ers - Brooklyn Nets,Philadelphia 76ers - Brooklyn Nets,117:106,1.38,3.15,11
3,2,00:30,Miami Heat - San Antonio Spurs,Miami Heat - San Antonio Spurs,106:100,1.46,2.83,11
4,3,01:00,Chicago Bulls - Washington Wizards,Chicago Bulls - Washington Wizards,115:106,1.54,2.6,11
5,4,01:00,Minnesota Timberwolves - Indiana Pacers,Minnesota Timberwolves - Indiana Pacers,99:104,2.49,1.58,11
6,5,01:00,Oklahoma City Thunder - Toronto Raptors,Oklahoma City Thunder - Toronto Raptors,121:130,1.89,1.96,11
7,6,02:00,Denver Nuggets - Charlotte Hornets,Denver Nuggets - Charlotte Hornets,100:86,1.13,6.33,11
8,7,02:30,Houston Rockets - Portland Trail Blazers,Houston Rockets - Portland Trail Blazers,107:117,1.26,4.11,11
9,8,03:00,Sacramento Kings - Dallas Mavericks,Sacramento Kings - Dallas Mavericks,123:127,2.47,1.59,11


In [14]:
tbl = driver.find_element_by_xpath('/html/body/div[1]/div/div[2]/div[6]/div[1]/div/div[1]/div[2]/div[1]/table').get_attribute('outerHTML')
df = pd.read_html(tbl)[0]
_ , cols = zip(*df.columns)
df.loc[-1] = cols
df.columns = [ "Date_", "Teams","Teams 2", "Score", "Home Coeff","Away Coeff", "-"]
print(df.head(2))

#df = df.drop(["-","--","---"], axis =1)
df = df.sort_index().reset_index()

dates, dates_ind =  get_dates(df)
df["Date"] = [datetime.strptime(str(date)[:10], '%Y-%m-%d') for date in dates]
df = df.dropna(subset=["Teams"])
df = df[df["Teams"].str.contains("toolTipTV")]
#df = df.drop(dates_ind)
df["Teams"] = df["Teams"].apply(lambda x: x[x.find("});")+3:])
df["Home"] = df["Teams"].apply(lambda x: teams[x.split("-")[0].strip()].upper())
df["Away"] = df["Teams"].apply(lambda x: teams[x.split("-")[1].strip()].upper())

df = df.reset_index()

   Date_                               Teams  \
0  00:00    Boston Celtics - Detroit Pistons   
1  00:00  Philadelphia 76ers - Brooklyn Nets   

                              Teams 2    Score Home Coeff Away Coeff   -  
0    Boston Celtics - Detroit Pistons  103:116       1.21       4.75  11  
1  Philadelphia 76ers - Brooklyn Nets  117:106       1.38       3.15  11  
