In [10]:
import pandas as pd; pd.set_option('display.max_columns', None), pd.set_option('display.max_rows', None)
from pandas import DataFrame

from datetime import date

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import metrics 

import numpy as np

from matplotlib import pyplot as plt

import tensorflow as tf
from tensorflow import keras
#from keras.layers import Input, Flatten, Dense

import pybaseball as pyb
from pybaseball import team_game_logs
from pybaseball import schedule_and_record
from pybaseball import playerid_reverse_lookup

import streamlit as st
import warnings
warnings.filterwarnings('ignore')

In [11]:
#pulls hitting data from baseball reference for a specified team and year
def get_batting (team, year):
    url = 'https://www.baseball-reference.com/teams/{}/{}.shtml#all_team_batting'
    next_url = url.format(team, year)
    data = pd.read_html(next_url, header = 0)
    return data[-2]

#get_batting('TBR', 2021)

#pulls pitching data from baseball reference for a specified team and year
def get_pitching (team, year):
    url = 'https://www.baseball-reference.com/teams/{}/{}.shtml#team_pitching'
    next_url = url.format(team, year)
    data = pd.read_html(next_url)
    return data[-1]

#removes day of the week from date
def clean_date(team):
    k = schedule_and_record(2021, team)
    b = k.Date
    my_list = []
    for i in b:
        if "," in i:
            param, value = i.split(",",1)
            my_list.append(value)
    k['Date'] = my_list
    return k

all_teams = ['TBR', 'BOS', 'NYY', 'TOR', 'BAL', 'CHW', 'CLE', 'DET', 'KC', 'MIN', 'HOU', 'OAK',
           'SEA', 'LAA', 'TEX','NYM','PHI', 'ATL', 'WSN', 'MIA', 'MIL', 'CIN', 'STL', 'CHC', 'PIT',
          'SF', 'LAD', 'SD', 'COL', 'ARI']
#takes in one team as an arg and spits out todays matchup for that team along with probable pitchers 
def get_today(team):
    try:
        u = 'https://www.baseball-reference.com/teams/{}/2021.shtml#team_pitching'
        next_u = u.format(team)
        d = pd.read_html(next_u)
        matchup = d[0].drop(columns = [1, 2])
        matchup.rename(columns = {0:'Date'}, inplace = True)
        param, value = matchup.Date[0].split(",",1)
        matchup.replace(matchup.Date[0], param, inplace = True)
        pitchers = d[1]
        pitchers.rename(columns = {0:'Team', 1:'Pitchers'}, inplace = True)
        home_starter = pitchers.Pitchers[1]
        away_starter = pitchers.Pitchers[0]
        listy = ['date', away_starter, home_starter]
        pitchers.rename(columns = {0:'Team', 1:'Pitchers'}, inplace = True)
        matchup['Pitchers'] = listy
    except: 
        home_starter = 'undefined'
        away_starter = 'undefined'
        pass
    return matchup
#takes in a team as an arg and spits out the home team in its current matchup
def get_home(team):
    try:
        u = 'https://www.baseball-reference.com/teams/{}/2021.shtml#team_pitching'
        next_u = u.format(team)
        d = pd.read_html(next_u)
        matchup = d[1]
        matchup.rename(columns = {0:'Team'}, inplace = True)
        homeTeam = matchup.Team[1]
        awayTeam = matchup.Team[0]
    except: 
        homeTeam = 'undefined'
        awayTeam = 'undefined'
        pass
    return homeTeam
#creates formatted df for passing into the model. takes in two teams that are facing eachother on a given day
#and formats a df to display all of their relevant stats and elo 

#pqm is a self created pitcher quality metric. takes into acccount a collection of pitchers stats and reconciles them
#into one, whole number stat
def pqm(team, pitcher_name):
    team_data = get_pitching(team, 2021)
    pitcher_data = team_data[team_data['Name'].str.contains(pitcher_name)]
    era = float(pitcher_data.ERA.iloc[0])
    whip = float(pitcher_data.WHIP.iloc[0])
    bb9 = float(pitcher_data.BB9.iloc[0])
    fip = float(pitcher_data.FIP.iloc[0])
    pqm = round((((era * fip)/2)* (whip * bb9))/2)
    return pqm
def elo(team):
    team_sr = schedule_and_record(2021, team)
    team_sr.rename(columns = {'W/L' : 'Result', 'W-L' : 'Record'}, inplace = True)
    num_wins = team_sr.Result.str.count('W').sum()
    num_losses = team_sr.Result.str.count('L').sum()
    elo = 1500 + (25 * num_wins) - (25 * num_losses)
    return elo

#pqm('TBR', 'Shane McClanahan')

def create_df(team, team2):
    data = get_today(team)
    p1 = data.Pitchers[1]
    p2 = data.Pitchers[2]
    t1 = data.Date[1]
    t2 = data.Date[2]
    data['t1_pqm'] = pqm(team, p1)
    data['t2_pqm'] = pqm(team2, p2)
    data['t1_elo'] = elo(team)
    data['t2_elo'] = elo(team2)
    data['home_team'] = t2
    data['away_team'] = t1
    data['home_pitcher'] = p2
    data['away_pitcher'] = p1
    data['t1_era'] = get_era(team, p1)
    data['t2_era'] = get_era(team2, p2)
    data['t1_bb9'] = get_bb9(team, p1)
    data['t2_bb9'] = get_bb9(team2, p2)
    data['t1_fip'] = get_fip(team, p1)
    data['t2_fip'] = get_fip(team2, p2)
    data['t1_so'] = get_so(team, p1)
    data['t2_so'] = get_so(team2, p2)
    data['t1_ba'] = get_ba(team)
    data['t2_ba'] = get_ba(team2)
    data['t1_ops'] = get_ops(team)
    data['t2_ops'] = get_ops(team2)
    data.drop(data.index[[1,2]], inplace= True)
    data.drop(columns = 'Pitchers', inplace = True)
    return data 

def get_era(team, pitcher_name):
    team_data = get_pitching(team,2021)
    pitcher_data = team_data[team_data['Name'].str.contains(pitcher_name)]
    era = float(pitcher_data.ERA.iloc[0])
    return era
#get_era('NYY', 'Lucas Luetge')
def get_bb9(team, pitcher_name):
    team_data = get_pitching(team,2021)
    pitcher_data = team_data[team_data['Name'].str.contains(pitcher_name)]
    bb9 = float(pitcher_data.BB9.iloc[0])
    return bb9
def get_fip(team, pitcher_name):
    team_data = get_pitching(team,2021)
    pitcher_data = team_data[team_data['Name'].str.contains(pitcher_name)]
    fip = float(pitcher_data.FIP.iloc[0])
    return fip
def get_whip(team, pitcher_name):
    team_data = get_pitching(team,2021)
    pitcher_data = team_data[team_data['Name'].str.contains(pitcher_name)]
    whip = float(pitcher_data.WHIP.iloc[0])
    return whip
def get_so(team, pitcher_name):
    team_data = get_pitching(team,2021)
    pitcher_data = team_data[team_data['Name'].str.contains(pitcher_name)]
    so = float(pitcher_data.SO.iloc[0])
    return so
def get_ba(team):
    team_data = team_game_logs(2021, team)
    ba = team_data.BA
    current_ba = ba.iloc[-1]
    return current_ba
def get_ops(team):
    team_data = team_game_logs(2021, team)
    ops = team_data.OPS
    current_ops = ops.iloc[-1]
    return current_ops


In [51]:
stats1 = pd.concat([create_df('DET', 'TBR'),
               create_df('COL', 'WSN'),
                 create_df('CLE', 'NYY'), create_df('MIN', 'TOR'),
                 create_df('LAD', 'CIN'),
                   create_df('PHI', 'NYM'), create_df('PIT', 'MIA'),
                   create_df('CHW', 'TEX'), create_df('ARI', 'HOU'),
                   create_df('SEA', 'KCR'), create_df('CHC', 'MIL'),
                   create_df('ATL', 'SFG')])
# 0= team1 win, 1 = team2 win , team2 is home, team1 is away

In [55]:
stats1

Unnamed: 0,Date,t1_pqm,t2_pqm,t1_elo,t2_elo,home_team,away_team,home_pitcher,away_pitcher,t1_era,t2_era,t1_bb9,t2_bb9,t1_fip,t2_fip,t1_so,t2_so,t1_ba,t2_ba,t1_ops,t2_ops,Result
0,Sep 17,13,28,1325.0,2375.0,Tampa Bay Rays,Detroit Tigers,Luis Patino,Casey Mize,3.66,4.62,2.6,3.8,4.85,4.72,108.0,65.0,0.241,0.24,0.709,0.745,1
0,Sep 17,13,60,1250.0,850.0,Washington Nationals,Colorado Rockies,Josiah Gray,German Marquez,3.93,5.85,3.0,4.1,3.71,6.87,167.0,39.0,0.247,0.258,0.729,0.754,0
0,Sep 17,13,23,1450.0,1925.0,New York Yankees,Cleveland Indians,Corey Kluber,Zach Plesac,4.45,4.02,2.1,4.2,4.67,4.12,88.0,72.0,0.236,0.236,0.709,0.725,1
0,Sep 17,8,9,1025.0,1950.0,Toronto Blue Jays,Minnesota Twins,Hyun Jin Ryu,Michael Pineda,3.87,4.11,1.7,1.9,4.18,3.82,79.0,131.0,0.24,0.267,0.735,0.798,0
0,Sep 17,4,19,2525.0,1625.0,Cincinnati Reds,Los Angeles Dodgers,Luis Castillo,Walker Buehler,2.32,4.24,2.3,3.5,3.23,3.84,189.0,171.0,0.242,0.248,0.752,0.754,1
0,Sep 17,4,18,1550.0,1425.0,New York Mets,Philadelphia Phillies,Taijuan Walker,Zack Wheeler,2.86,4.29,1.9,3.1,2.67,4.46,225.0,137.0,0.242,0.239,0.731,0.708,0
0,Sep 17,64,11,550.0,950.0,Miami Marlins,Pittsburgh Pirates,Elieser Hernandez,Wil Crowe,5.97,4.14,4.5,1.7,5.91,5.14,95.0,37.0,0.235,0.235,0.669,0.677,0
0,Sep 17,19,21,2000.0,550.0,Texas Rangers,Chicago White Sox,Taylor Hearn,Dylan Cease,4.22,3.99,3.8,3.4,3.66,5.0,202.0,83.0,0.255,0.231,0.755,0.67,0
0,Sep 17,16,22,200.0,2150.0,Houston Astros,Arizona Diamondbacks,Brandon Bielak,Madison Bumgarner,4.66,4.47,2.5,3.5,4.64,4.32,112.0,37.0,0.235,0.27,0.689,0.785,1
0,Sep 17,9,30,1750.0,1150.0,Kansas City Royals,Seattle Mariners,Brady Singer,Chris Flexen,3.73,4.85,2.0,3.8,3.86,4.14,107.0,124.0,0.223,0.247,0.681,0.7,0


In [317]:
def todays_matchups(date):#date in numeric format ie, 20210909
    url = 'https://www.cbssports.com/mlb/schedule/{}'
    u2 = url.format(date)
    data = pd.read_html(u2)
    v = data[1]
    arrl = len(v.Away)
    i = 0
    k = 0
    while i < arrl:
        obj = v.Away[i]
        if obj == 'Detroit':
            v.replace(obj, 'DET', inplace = True)
        if obj == 'Minnesota':
            v.replace(obj, 'MIN', inplace = True)
        if obj == 'N.Y. Mets':
            v.replace(obj, 'NYM', inplace = True)
        if obj == 'Kansas City':
            v.replace(obj, 'KCR', inplace = True)
        if obj == 'Toronto':
            v.replace(obj, 'TOR', inplace = True)
        if obj == 'Tampa Bay':
            v.replace(obj, 'TBR', inplace = True)
        if obj == 'Washington':
            v.replace(obj, 'WSN', inplace = True)
        if obj == 'Cincinnati':
            v.replace(obj, 'CIN', inplace = True)
        if obj == 'Philadelphia':
            v.replace(obj, 'PHI', inplace = True)
        if obj == 'L.A. Dodgers':
            v.replace(obj, 'LAD', inplace = True)
        if obj == 'L.A. Angels':
            v.replace(obj, 'LAA', inplace = True)
        if obj == 'Chi. White Sox':
            v.replace(obj, 'CHW', inplace = True)
        if obj == 'Cleveland':
            v.replace(obj, 'CLE', inplace = True)
        if obj == 'Pittsburgh':
            v.replace(obj, 'PIT', inplace = True)
        if obj == 'Miami':
            v.replace(obj, 'MIA', inplace = True)
        if obj == 'Baltimore':
            v.replace(obj, 'BAL', inplace = True)
        if obj == 'N.Y. Yankees':
            v.replace(obj, 'NYY', inplace = True)
        if obj == 'Boston':
            v.replace(obj, 'BOS', inplace = True)
        if obj == 'Atlanta':
            v.replace(obj, 'ATL', inplace = True)
        if obj == 'Chi. Cubs':
            v.replace(obj, 'CHC', inplace = True)
        if obj == 'Milwaukee':
            v.replace(obj, 'MIL', inplace = True)
        if obj == 'St. Louis':
            v.replace(obj, 'STL', inplace = True)
        if obj == 'San Diego':
            v.replace(obj, 'SDP', inplace = True)
        if obj == 'Oakland':
            v.replace(obj, 'OAK', inplace = True)
        if obj == 'Colorado':
            v.replace(obj, 'COL', inplace = True)
        i+=1
    while k < arrl:
        obj2 = v.Home[k]
        if obj2 == 'Detroit':
            v.replace(obj2, 'DET', inplace = True)
        if obj2 == 'Minnesota':
            v.replace(obj2, 'MIN', inplace = True)
        if obj2 == 'N.Y.Mets':
            v.replace(obj2, 'NYM', inplace = True)
        if obj2 == 'Kansas City':
            v.replace(obj2, 'KCR', inplace = True)
        if obj2 == 'Toronto':
            v.replace(obj2, 'TOR', inplace = True)
        if obj2 == 'Tampa Bay':
            v.replace(obj2, 'TBR', inplace = True)
        if obj2 == 'Washington':
            v.replace(obj2, 'WSN', inplace = True)
        if obj2 == 'Cincinatti':
            v.replace(obj2, 'CIN', inplace = True)
        if obj2 == 'Philadelphia':
            v.replace(obj2, 'PHI', inplace = True)
        if obj2 == 'L.A. Dodgers':
            v.replace(obj2, 'LAD', inplace = True)
        if obj2 == 'L.A. Angels':
            v.replace(obj2, 'LAA', inplace = True)
        if obj2 == 'Chi. White Sox':
            v.replace(obj2, 'CHW', inplace = True)
        if obj2 == 'Cleveland':
            v.replace(obj2, 'CLE', inplace = True)
        if obj2 == 'Pittsburgh':
            v.replace(obj2, 'PIT', inplace = True)
        if obj2 == 'Miami':
            v.replace(obj2, 'MIA', inplace = True)
        if obj2 == 'Baltimore':
            v.replace(obj2, 'BAL', inplace = True)
        if obj2 == 'N.Y. Yankees':
            v.replace(obj2, 'NYY', inplace = True)
        if obj2 == 'Boston':
            v.replace(obj2, 'BOS', inplace = True)
        if obj2 == 'Atlanta':
            v.replace(obj2, 'ATL', inplace = True)
        if obj2 == 'Chi. Cubs':
            v.replace(obj2, 'CHC', inplace = True)
        if obj2 == 'Milwaukee':
            v.replace(obj2, 'MIL', inplace = True)
        if obj2 == 'St. Louis':
            v.replace(obj2, 'STL', inplace = True)
        if obj2 == 'San Diego':
            v.replace(obj2, 'SDP', inplace = True)
        if obj2 == 'Oakland':
            v.replace(obj2, 'OAK', inplace = True)
        if obj2 == 'Colorado':
            v.replace(obj2, 'COL', inplace = True)
        k+=1
    
    return data

In [185]:
today = todays_matchups()

todays_matchups = today

In [319]:
pd.read_csv('0821log.csv')

Unnamed: 0,Date,t1_pqm,t2_pqm,t1_elo,t2_elo,home_team,away_team,home_pitcher,away_pitcher,t1_era,t2_era,t1_bb9,t2_bb9,t1_fip,t2_fip,t1_so,t2_so,t1_ba,t2_ba,t1_ops,t2_ops,Result
0,Aug 20,22,6,1150.0,1950.0,New York Yankees,Minnesota Twins,Nestor Cortes Jr.,Charlie Barnes,4.91,2.55,2.5,2.6,5.14,3.26,9.0,53.0,0.244,0.237,0.745,0.723,1
1,Aug 20,12,66,1725.0,400.0,Baltimore Orioles,Atlanta Braves,Keegan Akin,Max Fried,3.78,8.13,2.8,3.7,3.51,4.78,113.0,54.0,0.243,0.238,0.756,0.699,0
2,Aug 20,13,7,1325.0,1675.0,Toronto Blue Jays,Detroit Tigers,Robbie Ray,Tyler Alexander,4.57,2.88,1.9,2.4,4.48,3.85,53.0,167.0,0.241,0.265,0.708,0.787,0
3,Aug 20,16,0,575.0,1875.0,Boston Red Sox,Texas Rangers,Chris Sale,Dane Dunning,4.06,3.6,3.2,0.0,3.55,5.18,97.0,8.0,0.223,0.259,0.657,0.766,1
4,Aug 20,12,28,2000.0,2200.0,Tampa Bay Rays,Chicago White Sox,Michael Wacha,Lucas Giolito,3.83,5.91,2.7,2.5,4.04,5.09,161.0,76.0,0.251,0.238,0.752,0.74,0
5,Aug 20,2,22,1000.0,1725.0,Cincinnati Reds,Miami Marlins,Sonny Gray,Elieser Hernandez,2.84,4.47,0.7,3.7,4.68,3.79,13.0,113.0,0.237,0.253,0.682,0.767,1
6,Aug 20,17,15,1750.0,2025.0,Houston Astros,Seattle Mariners,Lance McCullers Jr.,Yusei Kikuchi,3.82,3.22,3.3,4.3,4.48,3.47,140.0,136.0,0.222,0.266,0.681,0.775,1
7,Aug 20,76,4,575.0,1600.0,St. Louis Cardinals,Pittsburgh Pirates,Miles Mikolas,Mitch Keller,6.86,2.25,5.0,2.3,4.77,3.18,60.0,3.0,0.233,0.237,0.662,0.702,0
8,Aug 20,0,13,500.0,1225.0,Colorado Rockies,Arizona Diamondbacks,Austin Gomber,Tyler Gilbert,0.0,4.09,3.6,2.8,2.78,3.98,10.0,100.0,0.236,0.25,0.691,0.728,1
9,Aug 20,13,12,2375.0,1900.0,Oakland Athletics,San Francisco Giants,James Kaprielian,Alex Wood,4.14,3.33,2.7,3.1,3.77,4.01,121.0,83.0,0.247,0.236,0.763,0.725,1


In [304]:
def predict(team, team2):
    dataset = create_df(team, team2)
    t1_pqm = dataset.t1_pqm
    t2_pqm = dataset.t2_pqm
    t1_elo = dataset.t1_elo
    t2_elo = dataset.t2_elo
    t1_era = dataset.t1_era
    t2_era = dataset.t2_era
    t1_bb9 = dataset.t1_bb9
    t2_bb9 = dataset.t2_bb9
    t1_fip = dataset.t1_fip
    t2_fip = dataset.t2_fip
    t1_ba = dataset.t1_ba
    t2_ba = dataset.t2_ba
    t1_ops = dataset.t1_ops
    t2_ops = dataset.t2_ops
    t1_so = dataset.t1_so
    t2_so = dataset.t2_so
    dataf = pd.DataFrame({'t1_pqm':t1_pqm,' t2_pqm':t2_pqm,'t1_elo':t1_elo, 't2_elo':t2_elo,
                      't1_era':t1_era, 't2_era':t2_era, 't1_bb9':t1_bb9,'t2_bb9': t2_bb9,
                     't1_fip':t1_fip, 't2_fip':t2_fip,'t1_so':t1_so,'t2_so':t2_so, 't1_ba':t1_ba,'t2_ba':t2_ba,'t1_ops':t1_ops,
                      't2_ops':t2_ops})
    return dataf

d = predict('CHW', 'OAK')
actual = dummies(d)
model.predict(actual)



array([[1., 0.]], dtype=float32)

In [308]:
todays_matchups(20210909)

KeyError: "['Time / TV' 'Venue' 'Home Starter' 'Away Starter' 'Buy Tickets'] not found in axis"

In [296]:
def dummies(data):
    val = pd.get_dummies(data)
    return val
dummies(bigstats)

Unnamed: 0,t1_pqm,t2_pqm,t1_elo,t2_elo,t1_era,t2_era,t1_bb9,t2_bb9,t1_fip,t2_fip,t1_so,t2_so,t1_ba,t2_ba,t1_ops,t2_ops,Result,Date_Aug 25,Date_Aug 26,Date_Sep 2,Date_Sep 7,home_team_Arizona Diamondbacks,home_team_Atlanta Braves,home_team_Baltimore Orioles,home_team_Boston Red Sox,home_team_Chicago Cubs,home_team_Cleveland Indians,home_team_Colorado Rockies,home_team_Detroit Tigers,home_team_Houston Astros,home_team_Kansas City Royals,home_team_Miami Marlins,home_team_Milwaukee Brewers,home_team_New York Mets,home_team_New York Yankees,home_team_Oakland Athletics,home_team_Philadelphia Phillies,home_team_Pittsburgh Pirates,home_team_San Diego Padres,home_team_San Francisco Giants,home_team_Seattle Mariners,home_team_St. Louis Cardinals,home_team_Tampa Bay Rays,home_team_Toronto Blue Jays,away_team_Arizona Diamondbacks,away_team_Atlanta Braves,away_team_Boston Red Sox,away_team_Chicago White Sox,away_team_Cincinnati Reds,away_team_Cleveland Indians,away_team_Colorado Rockies,away_team_Detroit Tigers,away_team_Kansas City Royals,away_team_Los Angeles Angels,away_team_Los Angeles Dodgers,away_team_Milwaukee Brewers,away_team_Minnesota Twins,away_team_New York Mets,away_team_New York Yankees,away_team_Oakland Athletics,away_team_Philadelphia Phillies,away_team_Pittsburgh Pirates,away_team_San Francisco Giants,away_team_Seattle Mariners,away_team_St. Louis Cardinals,away_team_Tampa Bay Rays,away_team_Texas Rangers,away_team_Toronto Blue Jays,away_team_Washington Nationals,home_pitcher_Aaron Civale,home_pitcher_Adrian Sampson,home_pitcher_Alexander Wells,home_pitcher_Blake Snell,home_pitcher_Brandon Woodruff,home_pitcher_Brett Anderson,home_pitcher_Carlos Carrasco,home_pitcher_Chi Chi Gonzalez,home_pitcher_Chris Sale,home_pitcher_Eduardo Rodriguez,home_pitcher_Edward Cabrera,home_pitcher_Elieser Hernandez,home_pitcher_Eric Lauer,home_pitcher_Gerrit Cole,home_pitcher_Hyun Jin Ryu,home_pitcher_Jake Odorizzi,home_pitcher_James Kaprielian,home_pitcher_Jon Lester,home_pitcher_Keegan Akin,home_pitcher_Keegan Thompson,home_pitcher_Lance McCullers Jr.,home_pitcher_Logan Webb,home_pitcher_Matt Manning,home_pitcher_Max Fried,home_pitcher_Mike Minor,home_pitcher_Mitch Keller,home_pitcher_Nick Pivetta,home_pitcher_Robbie Ray,home_pitcher_Sam Hentges,home_pitcher_Shane McClanahan,home_pitcher_Taijuan Walker,home_pitcher_Yu Darvish,home_pitcher_Yusei Kikuchi,home_pitcher_Zac Gallen,home_pitcher_Zach Davies,home_pitcher_Zach Eflin,home_pitcher_Zack Wheeler,away_pitcher_Aaron Nola,away_pitcher_Alex Wood,away_pitcher_Bailey Ober,away_pitcher_Brad Keller,away_pitcher_Carlos Carrasco,away_pitcher_Carlos Rodon,away_pitcher_Drew Rasmussen,away_pitcher_Eduardo Rodriguez,away_pitcher_Eric Lauer,away_pitcher_Frankie Montas,away_pitcher_German Marquez,away_pitcher_Jackson Kowar,away_pitcher_Jaime Barria,away_pitcher_Jameson Taillon,away_pitcher_John Gant,away_pitcher_Johnny Cueto,away_pitcher_Jordan Lyles,away_pitcher_Logan Gilbert,away_pitcher_Logan Webb,away_pitcher_Lucas Giolito,away_pitcher_Luis Castillo,away_pitcher_Max Scherzer,away_pitcher_Mike Minor,away_pitcher_Miles Mikolas,away_pitcher_Mitch Keller,away_pitcher_Packy Naughton,away_pitcher_Paolo Espino,away_pitcher_Patrick Corbin,away_pitcher_Ryan Yarbrough,away_pitcher_Sonny Gray,away_pitcher_Spencer Howard,away_pitcher_Steven Matz,away_pitcher_Tarik Skubal,away_pitcher_Touki Toussaint,away_pitcher_Triston McKenzie,away_pitcher_Wade Miley,away_pitcher_Walker Buehler,away_pitcher_Zac Gallen
0,17,11,1925.0,2375.0,5.12,3.59,2.7,2.8,3.48,3.37,150.0,117.0,0.259,0.239,0.769,0.741,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,71,27,575.0,1100.0,6.75,3.09,4.8,5.0,4.86,4.91,68.0,45.0,0.233,0.23,0.664,0.702,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,30,18,1550.0,1150.0,4.83,5.3,4.6,2.5,4.7,4.37,107.0,142.0,0.237,0.244,0.716,0.691,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
3,17,27,1700.0,1225.0,3.6,6.08,3.4,2.3,4.86,5.3,40.0,54.0,0.242,0.248,0.75,0.729,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
4,9,25,1825.0,1300.0,3.66,5.46,2.5,2.7,3.26,4.55,167.0,34.0,0.236,0.241,0.722,0.704,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,17,6,2250.0,2375.0,3.61,2.65,3.4,2.5,4.26,3.03,82.0,111.0,0.235,0.244,0.719,0.756,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,15,8,1075.0,1525.0,6.0,3.32,2.6,2.1,3.5,4.27,25.0,76.0,0.242,0.239,0.739,0.719,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,16,35,1500.0,925.0,6.59,5.23,1.6,2.6,4.91,7.62,28.0,4.0,0.24,0.235,0.705,0.677,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,186,84,1175.0,250.0,9.82,7.71,6.5,4.3,5.35,5.25,8.0,15.0,0.244,0.24,0.691,0.707,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,13,3,1800.0,1975.0,3.8,2.73,2.6,1.9,3.78,2.54,116.0,215.0,0.264,0.236,0.785,0.722,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0


In [237]:
while i < 100: 
    prediction = predict('SEA', 'HOU')
    print(prediction)
    

tf.Tensor([[-0.20717502]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.10259485]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.4181844]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.42294866]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.73463136]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.91016924]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.94528973]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.01856589]], shape=(1, 1), dtype=float32)
tf.Tensor([[1.2309793]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.0579536]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.85025036]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.28845203]], shape=(1, 1), dtype=float32)
tf.Tensor([[-1.351032]], shape=(1, 1), dtype=float32)
tf.Tensor([[1.312781]], shape=(1, 1), dtype=float32)
tf.Tensor([[-1.2704483]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.7918243]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.3402809]], shape=(1, 1), dtype=float32)


KeyboardInterrupt: 

In [254]:
url = 'https://www.cbssports.com/mlb/schedule/20210909'
data = pd.read_html(url)
data[0]

Unnamed: 0,Away,Home,Time / TV,Venue,Home Starter,Away Starter,Buy Tickets
0,L.A. Dodgers,St. Louis,1:15 pm,Busch Stadium,"J. Woodford (2-3, 4.47 ERA) J. Woodford (2-...","T. Gonsolin (2-1, 2.78 ERA) T. Gonsolin (2-...",Tickets Starting at $6.00
1,Chi. White Sox,Oakland,3:37 pm,Oakland-Alameda County Coliseum,"S. Manaea (8-9, 3.91 ERA) S. Manaea (8-9, 3...","R. Lopez (3-1, 2.08 ERA) R. Lopez (3-1, 2.0...",Tickets Starting at $7.48
2,Minnesota,Cleveland,6:10 pm,Progressive Field,"C. Quantrill (4-3, 3.15 ERA) C. Quantrill (...","R. Dobnak (1-7, 7.64 ERA) R. Dobnak (1-7, 7...",Tickets Starting at $8.00
3,N.Y. Mets,Miami,6:40 pm,loanDepot park,"J. Luzardo (3-3, 7.62 ERA) J. Luzardo (3-3,...","M. Stroman (9-12, 2.93 ERA) M. Stroman (9-1...",Tickets Starting at $6.00
4,Kansas City,Baltimore,7:05 pm,Oriole Park at Camden Yards,"J. Means (5-6, 3.47 ERA) J. Means (5-6, 3.4...","C. Hernandez (5-1, 3.57 ERA) C. Hernandez (...",Tickets Starting at $10.00
5,Colorado,Philadelphia,7:05 pm,Citizens Bank Park,"R. Suarez (6-4, 1.38 ERA) R. Suarez (6-4, 1...","A. Senzatela (4-9, 4.16 ERA) A. Senzatela (...",Tickets Starting at
6,Toronto,N.Y. Yankees,7:05 pm,Yankee Stadium,"N. Cortes (2-2, 2.67 ERA) N. Cortes (2-2, 2...","J. Berrios (2-2, 3.73 ERA) J. Berrios (2-2,...",Tickets Starting at
7,Washington,Atlanta,7:20 pm,Truist Park,"H. Ynoa (4-5, 3.19 ERA) H. Ynoa (4-5, 3.19 ...","E. Fedde (6-9, 5.27 ERA) E. Fedde (6-9, 5.2...",Tickets Starting at $6.00


In [287]:


y = t.Result
X = t.drop(columns = 'Result')

X = X.values.astype('float32')
y = y.values.astype('float32')

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2)
X_train, X_validation, y_train, y_validation = train_test_split(X, y, test_size = 0.2)

model=keras.Sequential([keras.layers.Dense(20,activation=tf.nn.relu),
                        keras.layers.Dense(2,activation='softmax')])
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['acc'])
history = model.fit(X_train,y_train,epochs=20, validation_data=(X_validation, y_validation))

#all variables within features list are single values, ex: .351, 11, .991, etc.

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [288]:
#stats1['Result'] = [0, 1, 0, 0, 0, 1 ]
t = bigstats.drop(columns = ['Date', 'home_team', 'away_team', 'home_pitcher', 'away_pitcher'])
newdf = pd.get_dummies(t)
tester = pd.DataFrame({'t1_pqm':'17', 't2_pqm':'11','t1_elo':'1925', 't2_elo':'2375',
                      't1_era':'5.12', 't2_era':'3.59', 't1_bb9':'2.7','t2_bb9': '2.8',
                     't1_fip':'3.48', 't2_fip':'3.37','t1_so':'150.0','t2_so':'117.0', 't1_ba':'0.259','t2_ba':'0.239','t1_ops':'0.769',
                      't2_ops':'0.741'}, index = [0])
testerr = pd.get_dummies(tester)
model.predict(testerr)

array([[0.8334366 , 0.16656333]], dtype=float32)

In [27]:
newstats = stats1[stats1.home_team != 'New York Mets']

In [38]:
stats1

Unnamed: 0,Date,t1_pqm,t2_pqm,t1_elo,t2_elo,home_team,away_team,home_pitcher,away_pitcher,t1_era,t2_era,t1_bb9,t2_bb9,t1_fip,t2_fip,t1_so,t2_so,t1_ba,t2_ba,t1_ops,t2_ops,Result
0,Sep 16,13,6,1350.0,2350.0,Tampa Bay Rays,Detroit Tigers,Louis Head,Tyler Alexander,4.09,2.67,2.3,2.7,4.5,3.43,71.0,27.0,0.242,0.24,0.709,0.744,1
0,Sep 16,14,0,1600.0,575.0,Pittsburgh Pirates,Cincinnati Reds,Connor Overton,Tyler Mahle,3.73,0.0,3.2,0.0,3.93,1.17,188.0,1.0,0.249,0.235,0.755,0.671,0
0,Sep 16,20,40,1775.0,1175.0,Kansas City Royals,Oakland Athletics,Daniel Lynch,Paul Blackburn,5.24,5.37,2.0,3.9,4.96,4.71,15.0,47.0,0.239,0.247,0.727,0.7,0
0,Sep 16,10,3,1425.0,2025.0,Chicago White Sox,Los Angeles Angels,Reynaldo Lopez,Alex Cobb,3.82,2.05,3.0,2.3,2.63,3.28,84.0,40.0,0.246,0.255,0.722,0.756,0
0,Sep 16,13,5,1625.0,2600.0,San Francisco Giants,San Diego Padres,Kevin Gausman,Pierce Johnson,2.98,2.65,4.2,2.5,3.48,2.87,67.0,201.0,0.24,0.248,0.719,0.771,0
0,Sep 16,13,16,1950.0,175.0,Baltimore Orioles,New York Yankees,Chris Ellis,Jordan Montgomery,3.71,2.55,3.0,4.1,3.64,5.09,137.0,11.0,0.237,0.24,0.725,0.711,1
0,Sep 16,16,5,2125.0,575.0,Texas Rangers,Houston Astros,Glenn Otto,Framber Valdez,3.26,6.92,3.9,1.4,3.96,1.63,109.0,16.0,0.269,0.231,0.782,0.671,0


In [56]:
 
stats1.to_csv('~/Desktop/mlb/0918log.csv', index = False)