In [402]:
# Import necessary libraries
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import datetime

def get_match_data(start_date,time_now_formatted,devengine):


    # Get historical match data on hard surface between start date and yesterday
    elo_hard = pd.read_sql_query(
        f"Select DISTINCT * From Elo_AllMatches_Hard where Date > '{start_date}' and Date not like '{time_now_formatted}'",
        con=devengine,
    )

    # Get historical match data on clay surface between start date and yesterday
    elo_clay = pd.read_sql_query(
        f"Select DISTINCT * From Elo_AllMatches_Clay where Date > '{start_date}' and Date not like '{time_now_formatted}'",
        con=devengine,
    )

    #Get today's matches on hard surface that haven't yet been resulted
    elo_data_hard =pd.read_sql_query(
        f"Select DISTINCT * From Elo_AllMatches_Hard where Date like '{time_now_formatted}' --and resulted like 'False'",
        con=devengine,)

    #Get today's matches on clay surface that haven't yet been resulted
    elo_data_clay =pd.read_sql_query(
        f"Select DISTINCT * From Elo_AllMatches_Clay where Date like '{time_now_formatted}' --and resulted like 'False'",
        con=devengine,)

    return elo_hard, elo_clay, elo_data_hard, elo_data_clay

# Connect to SQLite database using SQLAlchemy's create_engine
devengine = create_engine("sqlite:///C:/Git/tennis_atp/database/bets_sqllite.db")
# Get current date and time
time_now = datetime.datetime.now()

# Format current date as string in YYYY-MM-DD format
time_now_formatted = time_now.strftime("%Y-%m-%d")

# Get the start date two years ago from today
today =time_now
two_years_ago = (today - datetime.timedelta(days=365*2)).strftime("%Y-%m-%d")

elo_hard, elo_clay, elo_data_hard, elo_data_clay = get_match_data(two_years_ago,time_now_formatted,devengine)

def get_player_record(player, opponent_rank, history, range_low,range_high,auto):
    if auto:
        opponent_rank_low = opponent_rank - range_low
        opponent_rank_high = opponent_rank + range_high
    else:
        opponent_rank_low = range_low
        opponent_rank_high = range_high

    player_history = history[
        (
            (history["Fav"] == player)
            & (
                (history["Dog_Rank"] > opponent_rank_low)
                & (history["Dog_Rank"] < opponent_rank_high)
            )
        )
        | (
            (history["Dog"] == player)
            & (
                (history["Fav_Rank"] > opponent_rank_low)
                & (history["Fav_Rank"] < opponent_rank_high)
            )
        )
    ]
    if player_history.empty == False:
        result = float(
            len(player_history[player_history["Winner"] == player])
            / len(player_history)
        )
        return result, len(player_history)
    else:
        return 0, 0

def get_filtered_data(elo_data, elo):
    result_df = pd.DataFrame()
    for _, row in elo_data.sort_values(by="Time").iterrows():
        low_limit = 50
        high_limit = 50

        fav_percent, games = get_player_record(row.Fav, row.Dog_Rank, elo_hard, low_limit, high_limit, True)
        count = 0
        while games < 10 and count < 200:
            count = count + 1
            low_limit = low_limit + 10
            high_limit = high_limit + 10
            fav_percent, games = get_player_record(row.Fav, row.Dog_Rank, elo_hard, low_limit, high_limit, True)

        low_limit = 50
        high_limit = 50
        dog_percent, games2 = get_player_record(row.Dog, row.Fav_Rank, elo_hard, low_limit, high_limit, True)
        count = 0
        while games2 < 10 and count < 200:
            count = count + 1
            low_limit = low_limit + 10
            high_limit = high_limit + 10
            dog_percent, games2 = get_player_record(row.Dog, row.Fav_Rank, elo_hard, low_limit, high_limit, True)

        if games > 4 and games2 > 4:
            temp_df = pd.DataFrame(
                {
                    "Time": [row.Time],
                    "Fav_Odds": [row.Fav_Odds],
                    "Dog_Odds": [row.Dog_Odds],
                    "Fav": [row.Fav],
                    "Elo_Fav": [row.Elo_Fav],
                    "Fav_Record": ["{:.0%}".format(fav_percent)],
                    "Fav_Games": [games],
                    "Dog": [row.Dog],
                    "Dog_Odds": [row.Dog_Odds],
                    "Dog_Record": ["{:.0%}".format(dog_percent)],
                    "Dog_Games": [games2],
                    "fav_percent": [fav_percent],
                    "dog_percent": [dog_percent],
                    "Sex": [row.Sex],
                    "Resulted":[row.Resulted],
                    "fav_rank":[row.Fav_Rank],
                    "dog_rank":[row.Dog_Rank],
                    "Elo_Fav_Elo":[row.Elo_Fav_Elo],
                    "Elo_Dog_Elo":[row.Elo_Dog_Elo],                    
                }
            )
            result_df = pd.concat([result_df, temp_df])
    return result_df

results_hard=get_filtered_data(elo_data_hard,elo_hard)
results_clay=get_filtered_data(elo_data_clay,elo_clay)
def process_serving_data(result_df):
    try:
        # Try to read the 'servers_today.xlsx' file
        serving = pd.read_csv('mensserving.csv')

        # Try to read the 'servers_today_womens.xlsx' file
        serving_womens = pd.read_csv('womensserving.csv')

        # If 'serving_womens' dataframe is not empty, concatenate with 'serving' dataframe
        if serving_womens.empty == False:
            serving = pd.concat([serving, serving_womens])
        else:
            serving = serving

        # Drop the 'Time' column from the 'serving' dataframe
        #serving = serving.drop(columns='Time')
    except FileNotFoundError as e:
        # If either of the excel files is not found, print an error message and set serving to None
        print("The required excel file could not be found.")
        print("Error:", e)
        serving = None

    if serving is not None:
        try:
            # Try to merge the 'result_df' and 'serving' dataframes on the 'Fav' and 'Dog' columns
            result = pd.merge(result_df, serving, how='left', left_on=['Fav'], right_on=['Name'])
            result = pd.merge(result,serving, how='left', left_on=['Dog'], right_on=['Name'])
            result.rename(
        columns={
            "Service Games Won_x": "Fav_Serve%",
            "Service Games Won_y": "Dog_Serve%",
            "Return Games Won_x": "Fav_Return%",
            "Return Games Won_y": "Dog_Return%",
        },
        inplace=True,)
            result.drop(columns=['Name_x','Name_y'],inplace=True)
    

            # Set 'final_hard' to the 'result' dataframe
            result_serving = result
        except Exception as e:
            # If an error occurs during merging, print an error message and set both 'result' and 'final_hard' to None
            print("Error occured while merging the dataframes.")
            print("Error:", e)
            result = None
            result_serving = None
    else:
        result = None
        result_serving = None
        
    return result, result_serving


_,serving_hard=process_serving_data(results_hard)
_,serving_clay=process_serving_data(results_clay)

elo_hardxx = pd.read_sql_query(
    f"Select DISTINCT * From Elo_AllMatches_Hard where Date > '2022-01-01' and Date not like '{time_now_formatted}' --and Fav_odds>1.2 and Fav_odds<1.4 ",
    con=devengine,)

data_concat = pd.DataFrame(columns=['Date', 'Player', 'Odds', 'Win/Loss'])
for dataset_type in [('Winner', 'Win'), ('Loser', 'Loss')]:
    df = elo_hard[['Date', dataset_type[0], f"{dataset_type[0]}_Odds"]].copy()
    df['Player'] = df[dataset_type[0]]
    df['Odds'] = df[f"{dataset_type[0]}_Odds"]
    df['Win/Loss'] = dataset_type[1]
    df.drop(columns=[dataset_type[0], f"{dataset_type[0]}_Odds"], inplace=True)
    data_concat = pd.concat([data_concat, df])
data_concat = data_concat.sort_index()
data_concat['Odds'] = data_concat.Odds.astype(float)

def analyse_matchups(result_df,data_concat):
    for _,matchup in result_df.iterrows():
        player1=matchup.Fav
        player2=matchup.Dog
        player1_odds=float(matchup.Fav_Odds)
        player1_odds_hi=player1_odds+0.15
        player1_odds_lo=player1_odds-0.15
        player2_odds=float(matchup.Dog_Odds)
        player2_odds_hi=player2_odds+0.15
        player2_odds_lo=player2_odds-0.15
        player1=data_concat[data_concat['Player']==player1].copy()
        player2=data_concat[data_concat['Player']==player2].copy()
        player2=player2[(player2['Odds']>player2_odds_lo)&(player2['Odds']<player2_odds_hi)]
        if len(player2)>0:
            winperc2=len(player2[player2['Win/Loss']=='Win'])/len(player2)
        else:
            winperc2=0
        player1=player1[(player1['Odds']>player1_odds_lo)&(player1['Odds']<player1_odds_hi)]
        if len(player1)>0:
            winperc1=len(player1[player1['Win/Loss']=='Win'])/len(player1)
        else:
            winperc1=0
        if len(player1)>5 and len(player2)>3:
            print(matchup.Time,f"{matchup.Fav} ({round(player1_odds_lo,2)}-->{round(player1_odds_hi,2)})",f"{matchup.Dog} ({round(player2_odds_lo,2)}-->{round(player2_odds_hi,2)})")
            print(len(player1),winperc1,len(player2),winperc2)


analyse_matchups(serving_clay,data_concat)
final_hard=serving_hard
final_clay=serving_clay

01:10 Mattia Bellucci (1.54-->1.84) Evgeny Donskoy (1.95-->2.25)
8 0.75 5 0.4
01:30 Richard Gasquet (1.53-->1.83) Arthur Rinderknech (2.06-->2.36)
9 0.3333333333333333 4 0.75
03:50 Petra Martic (1.51-->1.81) Shelby Rogers (2.1-->2.4)
9 0.5555555555555556 10 0.4
20:30 Diego Schwartzman (1.63-->1.93) Nuno Borges (1.92-->2.22)
7 0.42857142857142855 5 0.8
20:30 Ugo Humbert (1.68-->1.98) Lorenzo Sonego (1.86-->2.16)
8 0.375 5 0.6
20:30 Frederico Ferreira Silva (1.48-->1.78) Benjamin Hassan (2.03-->2.33)
11 0.7272727272727273 4 0.5
20:30 Anastasia Potapova (1.26-->1.56) Mayar Sherif (2.82-->3.12)
14 0.5714285714285714 5 0.2
22:30 Sloane Stephens (1.46-->1.76) Varvara Gracheva (2.19-->2.49)
10 0.7 5 1.0


In [403]:
def last_five(df,pastmatches):
    for index, row in df.iterrows():
        fav=row.Fav
        dog=row.Dog
        last_five_matches_fav=pastmatches[(pastmatches['Winner']==fav)|(pastmatches['Loser']==fav)].tail(5)
        if len(last_five_matches_fav)>0:  
            fav_last_five_win_perc=len(last_five_matches_fav[last_five_matches_fav['Winner']==fav])/len(last_five_matches_fav)
        else:
            fav_last_five_win_perc=0
        last_five_matches_dog=pastmatches[(pastmatches['Winner']==dog)|(pastmatches['Loser']==dog)].tail(5)
        if len(last_five_matches_dog)>0:    
            dog_last_five_win_perc=len(last_five_matches_dog[last_five_matches_dog['Winner']==dog])/len(last_five_matches_dog) 
        else:
            dog_last_five_win_perc=0
        df.at[index, 'fav_last_five_win_perc'] = fav_last_five_win_perc
        df.at[index, 'dog_last_five_win_perc'] = dog_last_five_win_perc
    return df

if final_hard is not None:
    final_hard=last_five(final_hard,elo_hard)
    final_hard['Fav_Odds']=final_hard['Fav_Odds'].astype(float)
    final_hard['Dog_Odds']=final_hard['Dog_Odds'].astype(float)    

In [404]:
final_clay=last_five(final_clay,elo_clay)

In [405]:
for _,i in elo_data_hard.iterrows():
    check1=elo_hard[((elo_hard['Winner']==i.Winner)&(elo_hard['Loser']==i.Loser))|((elo_hard['Loser']==i.Winner)&(elo_hard['Winner']==i.Loser))]
    if check1.empty==False:
        for _, x in check1.iterrows():
            print(f"{x.Winner} beat {x.Loser}")

Marc Polmans beat Adam Walton
Christian Langmo beat Nathan Ponwith


In [406]:
for _,i in elo_data_clay.iterrows():
    check1=elo_clay[((elo_clay['Winner']==i.Winner)&(elo_clay['Loser']==i.Loser))|((elo_clay['Loser']==i.Winner)&(elo_clay['Winner']==i.Loser))]
    if check1.empty==False:
        for _, x in check1.iterrows():
            print(f"{x.Winner} beat {x.Loser}")

Matheus Pucinelli De Almeida beat Andrea Collarini
Irina Camelia Begu beat Sara Errani
Francisco Cerundolo beat Jaume Munar
Yannick Hanfmann beat Thiago Monteiro
Lorenzo Sonego beat Ugo Humbert
Juan Pablo Varillas beat Roberto Bautista Agut


In [423]:
if final_hard is not None:
    print(final_hard[(final_hard['Sex']!='Womens')&(final_hard['Resulted']=='False')][['Time','Fav','Elo_Fav','Fav_Odds','fav_percent','fav_rank','Fav_Serve%','Fav_Return%','fav_last_five_win_perc','Dog','Dog_Odds','dog_percent','dog_rank','Dog_Serve%','Dog_Return%','dog_last_five_win_perc']])

Empty DataFrame
Columns: [Time, Fav, Elo_Fav, Fav_Odds, fav_percent, fav_rank, Fav_Serve%, Fav_Return%, fav_last_five_win_perc, Dog, Dog_Odds, dog_percent, dog_rank, Dog_Serve%, Dog_Return%, dog_last_five_win_perc]
Index: []


In [409]:
final_clay['Dog_Odds']=final_clay['Dog_Odds'].astype(float)
final_clay['Fav_Odds']=final_clay['Fav_Odds'].astype(float)

In [410]:
final_clay[(final_clay['Sex']=='Womens')&(final_clay['Resulted']=='False')][['Time','Fav','Elo_Fav','Fav_Odds','fav_percent','fav_rank','Fav_Serve%','Fav_Return%','fav_last_five_win_perc','Dog','Dog_Odds','dog_percent','dog_rank','Dog_Serve%','Dog_Return%','dog_last_five_win_perc']]

Unnamed: 0,Time,Fav,Elo_Fav,Fav_Odds,fav_percent,fav_rank,Fav_Serve%,Fav_Return%,fav_last_five_win_perc,Dog,Dog_Odds,dog_percent,dog_rank,Dog_Serve%,Dog_Return%,dog_last_five_win_perc
16,19:00,Elina Svitolina,Elina Svitolina,1.3,0.375,192.0,0.64,0.37,1.0,Storm Hunter,3.6,0.333333,204.0,0.57,0.15,1.0
18,19:00,Jelena Ostapenko,Jelena Ostapenko,1.41,0.628571,17.0,0.68,0.4,0.8,Peyton Stearns,2.96,0.4,69.0,0.62,0.24,0.6
20,19:00,Elise Mertens,Camila Osorio,1.8,0.714286,28.0,0.68,0.39,0.6,Camila Osorio,2.06,0.375,86.0,0.57,0.37,0.6
22,19:45,Jessica Pegula,Jessica Pegula,1.36,0.717391,3.0,0.73,0.37,0.6,Camila Giorgi,3.22,0.357143,37.0,0.65,0.34,0.6
25,20:30,Liudmila Samsonova,Liudmila Samsonova,1.4,0.7,15.0,0.81,0.31,0.6,Anastasia Pavlyuchenkova,2.99,0.285714,333.0,0.65,0.43,0.6
28,20:30,Anastasia Potapova,Mayar Sherif,1.41,0.486486,25.0,0.63,0.42,0.6,Mayar Sherif,2.97,0.071429,54.0,0.64,0.27,0.4
29,21:00,Qinwen Zheng,Qinwen Zheng,1.33,0.6,19.0,0.75,0.26,0.8,Yulia Putintseva,3.41,0.4,58.0,0.65,0.37,0.4
30,21:00,Karolina Muchova,Karolina Muchova,1.18,0.615385,43.0,0.71,0.25,0.8,Nadia Podoroska,5.01,0.5,103.0,0.67,0.3,0.4
31,21:00,Marketa Vondrousova,Daria Kasatkina,1.41,0.571429,60.0,0.73,0.34,0.8,Daria Kasatkina,2.98,0.594595,9.0,0.61,0.47,0.6
32,21:15,Caroline Garcia,Caroline Garcia,1.42,0.627907,5.0,0.8,0.27,0.4,Anna Blinkova,2.95,0.5,56.0,0.63,0.33,0.8


In [411]:
final_clay[(final_clay['Sex']=='Mens')&(final_clay['Resulted']=='False')][['Time','Fav','Elo_Fav','Fav_Odds','fav_percent','fav_rank','Fav_Serve%','Fav_Return%','fav_last_five_win_perc','Dog','Dog_Odds','dog_percent','dog_rank','Dog_Serve%','Dog_Return%','dog_last_five_win_perc']]

Unnamed: 0,Time,Fav,Elo_Fav,Fav_Odds,fav_percent,fav_rank,Fav_Serve%,Fav_Return%,fav_last_five_win_perc,Dog,Dog_Odds,dog_percent,dog_rank,Dog_Serve%,Dog_Return%,dog_last_five_win_perc
17,19:00,Fabio Fognini,Jason Kubler,1.57,0.470588,130.0,0.72,0.24,0.6,Jason Kubler,2.45,0.4,69.0,0.82,0.19,0.4
19,19:00,Stefanos Tsitsipas,Stefanos Tsitsipas,1.08,0.710526,5.0,0.86,0.21,0.8,Roberto Carballes Baena,8.35,0.3,57.0,0.74,0.21,0.6
21,19:00,Sebastian Korda,Sebastian Ofner,1.56,0.818182,30.0,0.8,0.26,0.4,Sebastian Ofner,2.46,0.545455,118.0,0.69,0.16,0.8
23,20:30,Stan Wawrinka,Thanasi Kokkinakis,1.88,0.6,89.0,0.81,0.15,0.6,Thanasi Kokkinakis,1.95,0.727273,108.0,0.84,0.14,0.6
24,20:30,Diego Schwartzman,Nuno Borges,1.78,0.5,95.0,0.7,0.32,0.2,Nuno Borges,2.07,0.733333,80.0,0.84,0.13,0.4
26,20:30,Ugo Humbert,Ugo Humbert,1.83,0.4,40.0,0.76,0.17,1.0,Lorenzo Sonego,2.01,0.4,48.0,0.81,0.2,0.6
27,20:30,Frederico Ferreira Silva,Benjamin Hassan,1.63,0.6,230.0,0.0,0.0,0.4,Benjamin Hassan,2.18,0.545455,284.0,0.0,0.0,0.6
33,22:00,Hubert Hurkacz,Hubert Hurkacz,1.41,0.634146,14.0,0.91,0.18,0.4,Tallon Griekspoor,2.96,0.363636,39.0,0.81,0.15,0.4
34,22:00,Matteo Arnaldi,Matteo Arnaldi,1.8,0.5,106.0,0.8,0.13,0.6,Denis Shapovalov,2.05,0.611111,32.0,0.85,0.18,0.4
36,22:30,Karen Khachanov,Karen Khachanov,1.18,0.909091,11.0,0.82,0.2,0.6,Radu Albot,5.08,0.2,113.0,0.78,0.21,0.8


In [412]:
final_clay.to_pickle('Clay_Today')
if final_hard is not None:
    final_hard.to_pickle('Hard_Today')
else:
    final_clay[final_clay['Sex']=='k'].to_pickle('Hard_Today')

In [414]:
data=final_clay
data['fav_win_percent_grt_10'] = data['fav_percent'] > 0.1
data['fav_win_percent_grt_20'] = data['fav_percent'] > 0.2
data['fav_win_percent_grt_30'] = data['fav_percent'] > 0.3
data['fav_win_percent_grt_40'] = data['fav_percent'] > 0.4
data['fav_win_percent_grt_50'] = data['fav_percent'] > 0.5
data['fav_win_percent_grt_60'] = data['fav_percent'] > 0.6
data['fav_win_percent_grt_70'] = data['fav_percent'] > 0.7
data['fav_win_percent_grt_80'] = data['fav_percent'] > 0.8
data['fav_win_percent_grt_90'] = data['fav_percent'] > 0.9
data['fav_win_percent_100'] = data['fav_percent'] == 1.0
data['dog_win_percent_grt_10'] = data['dog_percent'] > 0.1
data['dog_win_percent_grt_20'] = data['dog_percent'] > 0.2
data['dog_win_percent_grt_30'] = data['dog_percent'] > 0.3
data['dog_win_percent_grt_40'] = data['dog_percent'] > 0.4
data['dog_win_percent_grt_50'] = data['dog_percent'] > 0.5
data['dog_win_percent_grt_60'] = data['dog_percent'] > 0.6
data['dog_win_percent_grt_70'] = data['dog_percent'] > 0.7
data['dog_win_percent_grt_80'] = data['dog_percent'] > 0.8
data['dog_win_percent_grt_90'] = data['dog_percent'] > 0.9
data['dog_win_percent_100'] = data['dog_percent'] == 1.0

data['fav_win_percent_lt_10'] = data['fav_percent'] < 0.1
data['fav_win_percent_lt_20'] = data['fav_percent'] < 0.2
data['fav_win_percent_lt_30'] = data['fav_percent'] < 0.3
data['fav_win_percent_lt_40'] = data['fav_percent'] < 0.4
data['fav_win_percent_lt_50'] = data['fav_percent'] < 0.5
data['fav_win_percent_lt_60'] = data['fav_percent'] < 0.6
data['fav_win_percent_lt_70'] = data['fav_percent'] < 0.7
data['fav_win_percent_lt_80'] = data['fav_percent'] < 0.8
data['fav_win_percent_lt_90'] = data['fav_percent'] < 0.9
data['fav_win_percent_0'] = data['fav_percent'] == 0.0

data['dog_win_percent_lt_10'] = data['dog_percent'] < 0.1
data['dog_win_percent_lt_20'] = data['dog_percent'] < 0.2
data['dog_win_percent_lt_30'] = data['dog_percent'] < 0.3
data['dog_win_percent_lt_40'] = data['dog_percent'] < 0.4
data['dog_win_percent_lt_50'] = data['dog_percent'] < 0.5
data['dog_win_percent_lt_60'] = data['dog_percent'] < 0.6
data['dog_win_percent_lt_70'] = data['dog_percent'] < 0.7
data['dog_win_percent_lt_80'] = data['dog_percent'] < 0.8
data['dog_win_percent_lt_90'] = data['dog_percent'] < 0.9
data['dog_win_percent_0'] = data['dog_percent'] == 0.0
#data['Winner_']=data['Winner']
#data['Winner_IsFav']=data['Winner']==data['Fav']
#data['Fav_IsEloFav']=data['Fav']==data['Elo_Fav']
data['fav_rank_band'] = pd.cut(data['fav_rank'], bins=list(range(0, 201, 20)), labels=list(range(20, 201, 20)), right=False)
data['dog_rank_band'] = pd.cut(data['dog_rank'], bins=list(range(0, 201, 20)), labels=list(range(20, 201, 20)), right=False)

data['fav_rank_band'] = data['fav_rank_band'].astype(float)
data['dog_rank_band'] = data['dog_rank_band'].astype(float)

for i in range(0, 201, 50):
    data[f'fav_rank_gt_{i}'] = data['fav_rank_band'] > i
    data[f'fav_rank_lt_{i}'] = data['fav_rank_band'] < i

    data[f'dog_rank_gt_{i}'] = data['dog_rank_band'] > i
    data[f'dog_rank_lt_{i}'] = data['dog_rank_band'] < i





data['Female']=data['Sex']=='Womens'
data['Male']=data['Sex']=='Mens'
data=data[['Fav', 'fav_win_percent_grt_10',
       'fav_win_percent_grt_20', 'fav_win_percent_grt_30',
       'fav_win_percent_grt_40', 'fav_win_percent_grt_50',
       'fav_win_percent_grt_60', 'fav_win_percent_grt_70',
       'fav_win_percent_grt_80', 'fav_win_percent_grt_90',
       'fav_win_percent_100', 'dog_win_percent_grt_10',
       'dog_win_percent_grt_20', 'dog_win_percent_grt_30',
       'dog_win_percent_grt_40', 'dog_win_percent_grt_50',
       'dog_win_percent_grt_60', 'dog_win_percent_grt_70',
       'dog_win_percent_grt_80', 'dog_win_percent_grt_90',
       'dog_win_percent_100', 'fav_win_percent_lt_10', 'fav_win_percent_lt_20',
       'fav_win_percent_lt_30', 'fav_win_percent_lt_40',
       'fav_win_percent_lt_50', 'fav_win_percent_lt_60',
       'fav_win_percent_lt_70', 'fav_win_percent_lt_80',
       'fav_win_percent_lt_90', 'fav_win_percent_0', 'dog_win_percent_lt_10',
       'dog_win_percent_lt_20', 'dog_win_percent_lt_30',
       'dog_win_percent_lt_40', 'dog_win_percent_lt_50',
       'dog_win_percent_lt_60', 'dog_win_percent_lt_70',
       'dog_win_percent_lt_80', 'dog_win_percent_lt_90', 'dog_win_percent_0',
         'Female', 'Male','Fav_Odds','Time','fav_percent','dog_percent','fav_rank_gt_0',
 'fav_rank_lt_0',
 'dog_rank_gt_0',
 'dog_rank_lt_0',
 'fav_rank_gt_50',
 'fav_rank_lt_50',
 'dog_rank_gt_50',
 'dog_rank_lt_50',
 'fav_rank_gt_100',
 'fav_rank_lt_100',
 'dog_rank_gt_100',
 'dog_rank_lt_100',
 'fav_rank_gt_150',
 'fav_rank_lt_150',
 'dog_rank_gt_150',
 'dog_rank_lt_150',
 'fav_rank_gt_200',
 'fav_rank_lt_200',
 'dog_rank_gt_200',
 'dog_rank_lt_200',
         ]][(data['Fav_Odds']>1.8)]

Unnamed: 0,Fav,fav_win_percent_grt_10,fav_win_percent_grt_20,fav_win_percent_grt_30,fav_win_percent_grt_40,fav_win_percent_grt_50,fav_win_percent_grt_60,fav_win_percent_grt_70,fav_win_percent_grt_80,fav_win_percent_grt_90,...,dog_rank_gt_100,dog_rank_lt_100,fav_rank_gt_150,fav_rank_lt_150,dog_rank_gt_150,dog_rank_lt_150,fav_rank_gt_200,fav_rank_lt_200,dog_rank_gt_200,dog_rank_lt_200
23,Stan Wawrinka,True,True,True,True,True,False,False,False,False,...,True,False,False,True,False,True,False,True,False,True
26,Ugo Humbert,True,True,True,False,False,False,False,False,False,...,False,True,False,True,False,True,False,True,False,True


In [415]:
filtered_data=data[['dog_win_percent_grt_20',
 'dog_rank_gt_100',
 'fav_win_percent_grt_30',
 'fav_win_percent_grt_40',
 'fav_win_percent_lt_40',
 'fav_rank_lt_200',
 'dog_win_percent_lt_70',
 'dog_rank_lt_100',
 'dog_win_percent_grt_50',
 'fav_rank_gt_50',
 'fav_win_percent_lt_30',
 'dog_rank_gt_0',
 'dog_rank_gt_150',
 'dog_rank_lt_200',
 'fav_rank_lt_50',
 'dog_win_percent_lt_80',
 'fav_rank_gt_100','Fav']][(data['Fav_Odds']>1.8)].copy()

In [416]:
from sklearn import svm
from joblib import load

# Load the saved model from the file
model = load('.\_superseded\model_file.joblib')

# Assuming you have a new dataset stored in a variable called 'new_data'
# Make predictions on the new dataset
drop_fav=filtered_data.drop(columns=['Fav'])
drop_fav=drop_fav.reset_index(drop=True)
predictions = model.predict(drop_fav)
predictions_df = pd.DataFrame(predictions, columns=['predictions'])

# Merge the predictions with the original data based on the index
#merged_data = pd.concat([drop_fav, predictions_df], axis=1)
data_new_index=data.reset_index(drop=True)
predictions_df.merge(data_new_index,left_index=True,right_index=True)[['Fav','predictions','Fav_Odds','Time']]


Unnamed: 0,Fav,predictions,Fav_Odds,Time
0,Stan Wawrinka,True,1.88,20:30
1,Ugo Humbert,True,1.83,20:30


In [417]:
data=final_clay
data['fav_win_percent_grt_10'] = data['fav_percent'] > 0.1
data['fav_win_percent_grt_20'] = data['fav_percent'] > 0.2
data['fav_win_percent_grt_30'] = data['fav_percent'] > 0.3
data['fav_win_percent_grt_40'] = data['fav_percent'] > 0.4
data['fav_win_percent_grt_50'] = data['fav_percent'] > 0.5
data['fav_win_percent_grt_60'] = data['fav_percent'] > 0.6
data['fav_win_percent_grt_70'] = data['fav_percent'] > 0.7
data['fav_win_percent_grt_80'] = data['fav_percent'] > 0.8
data['fav_win_percent_grt_90'] = data['fav_percent'] > 0.9
data['fav_win_percent_100'] = data['fav_percent'] == 1.0
data['dog_win_percent_grt_10'] = data['dog_percent'] > 0.1
data['dog_win_percent_grt_20'] = data['dog_percent'] > 0.2
data['dog_win_percent_grt_30'] = data['dog_percent'] > 0.3
data['dog_win_percent_grt_40'] = data['dog_percent'] > 0.4
data['dog_win_percent_grt_50'] = data['dog_percent'] > 0.5
data['dog_win_percent_grt_60'] = data['dog_percent'] > 0.6
data['dog_win_percent_grt_70'] = data['dog_percent'] > 0.7
data['dog_win_percent_grt_80'] = data['dog_percent'] > 0.8
data['dog_win_percent_grt_90'] = data['dog_percent'] > 0.9
data['dog_win_percent_100'] = data['dog_percent'] == 1.0

data['fav_win_percent_lt_10'] = data['fav_percent'] < 0.1
data['fav_win_percent_lt_20'] = data['fav_percent'] < 0.2
data['fav_win_percent_lt_30'] = data['fav_percent'] < 0.3
data['fav_win_percent_lt_40'] = data['fav_percent'] < 0.4
data['fav_win_percent_lt_50'] = data['fav_percent'] < 0.5
data['fav_win_percent_lt_60'] = data['fav_percent'] < 0.6
data['fav_win_percent_lt_70'] = data['fav_percent'] < 0.7
data['fav_win_percent_lt_80'] = data['fav_percent'] < 0.8
data['fav_win_percent_lt_90'] = data['fav_percent'] < 0.9
data['fav_win_percent_0'] = data['fav_percent'] == 0.0

data['dog_win_percent_lt_10'] = data['dog_percent'] < 0.1
data['dog_win_percent_lt_20'] = data['dog_percent'] < 0.2
data['dog_win_percent_lt_30'] = data['dog_percent'] < 0.3
data['dog_win_percent_lt_40'] = data['dog_percent'] < 0.4
data['dog_win_percent_lt_50'] = data['dog_percent'] < 0.5
data['dog_win_percent_lt_60'] = data['dog_percent'] < 0.6
data['dog_win_percent_lt_70'] = data['dog_percent'] < 0.7
data['dog_win_percent_lt_80'] = data['dog_percent'] < 0.8
data['dog_win_percent_lt_90'] = data['dog_percent'] < 0.9
data['dog_win_percent_0'] = data['dog_percent'] == 0.0
#data['Winner_']=data['Winner']
#data['Winner_IsFav']=data['Winner']==data['Fav']
#data['Fav_IsEloFav']=data['Fav']==data['Elo_Fav']
data['fav_rank_band'] = pd.cut(data['fav_rank'], bins=list(range(0, 201, 20)), labels=list(range(20, 201, 20)), right=False)
data['dog_rank_band'] = pd.cut(data['dog_rank'], bins=list(range(0, 201, 20)), labels=list(range(20, 201, 20)), right=False)

data['fav_rank_band'] = data['fav_rank_band'].astype(float)
data['dog_rank_band'] = data['dog_rank_band'].astype(float)

for i in range(0, 201, 50):
    data[f'fav_rank_gt_{i}'] = data['fav_rank_band'] > i
    data[f'fav_rank_lt_{i}'] = data['fav_rank_band'] < i

    data[f'dog_rank_gt_{i}'] = data['dog_rank_band'] > i
    data[f'dog_rank_lt_{i}'] = data['dog_rank_band'] < i





data['Female']=data['Sex']=='Womens'
data['Male']=data['Sex']=='Mens'
data=data[['Fav', 'fav_win_percent_grt_10',
       'fav_win_percent_grt_20', 'fav_win_percent_grt_30',
       'fav_win_percent_grt_40', 'fav_win_percent_grt_50',
       'fav_win_percent_grt_60', 'fav_win_percent_grt_70',
       'fav_win_percent_grt_80', 'fav_win_percent_grt_90',
       'fav_win_percent_100', 'dog_win_percent_grt_10',
       'dog_win_percent_grt_20', 'dog_win_percent_grt_30',
       'dog_win_percent_grt_40', 'dog_win_percent_grt_50',
       'dog_win_percent_grt_60', 'dog_win_percent_grt_70',
       'dog_win_percent_grt_80', 'dog_win_percent_grt_90',
       'dog_win_percent_100', 'fav_win_percent_lt_10', 'fav_win_percent_lt_20',
       'fav_win_percent_lt_30', 'fav_win_percent_lt_40',
       'fav_win_percent_lt_50', 'fav_win_percent_lt_60',
       'fav_win_percent_lt_70', 'fav_win_percent_lt_80',
       'fav_win_percent_lt_90', 'fav_win_percent_0', 'dog_win_percent_lt_10',
       'dog_win_percent_lt_20', 'dog_win_percent_lt_30',
       'dog_win_percent_lt_40', 'dog_win_percent_lt_50',
       'dog_win_percent_lt_60', 'dog_win_percent_lt_70',
       'dog_win_percent_lt_80', 'dog_win_percent_lt_90', 'dog_win_percent_0',
       'Female', 'Male','fav_percent','dog_percent','fav_rank_gt_0',
 'fav_rank_lt_0',
 'dog_rank_gt_0',
 'dog_rank_lt_0',
 'fav_rank_gt_50',
 'fav_rank_lt_50',
 'dog_rank_gt_50',
 'dog_rank_lt_50',
 'fav_rank_gt_100',
 'fav_rank_lt_100',
 'dog_rank_gt_100',
 'dog_rank_lt_100',
 'fav_rank_gt_150',
 'fav_rank_lt_150',
 'dog_rank_gt_150',
 'dog_rank_lt_150',
 'fav_rank_gt_200',
 'fav_rank_lt_200',
 'dog_rank_gt_200',
 'dog_rank_lt_200','Fav_Odds','Time'
         ]][(data['Fav_Odds']>1.7)&(data['Fav_Odds']<1.8)]

filtered_data=data[['fav_win_percent_grt_10', 'fav_win_percent_grt_20',
       'fav_win_percent_grt_30', 'fav_win_percent_grt_40',
       'fav_win_percent_grt_50', 'fav_win_percent_grt_60',
       'fav_win_percent_grt_70', 'fav_win_percent_grt_80',
       'fav_win_percent_grt_90', 'fav_win_percent_100',
       'dog_win_percent_grt_20', 'dog_win_percent_grt_30',
       'dog_win_percent_grt_40', 'dog_win_percent_grt_50',
       'dog_win_percent_grt_60', 'dog_win_percent_grt_70',
       'dog_win_percent_grt_90', 'dog_win_percent_100',
       'fav_win_percent_lt_10', 'fav_win_percent_lt_20',
       'fav_win_percent_lt_30', 'fav_win_percent_lt_40',
       'fav_win_percent_lt_50', 'fav_win_percent_lt_60',
       'fav_win_percent_lt_70', 'fav_win_percent_lt_80',
       'fav_win_percent_lt_90', 'fav_win_percent_0', 'dog_win_percent_lt_10',
       'dog_win_percent_lt_20', 'dog_win_percent_lt_30',
       'dog_win_percent_lt_40', 'dog_win_percent_lt_50',
       'dog_win_percent_lt_60', 'dog_win_percent_lt_70',
       'dog_win_percent_lt_80', 'dog_win_percent_lt_90', 'dog_win_percent_0',
         'Male', 'fav_rank_gt_0',
       'fav_rank_lt_0', 'dog_rank_gt_0', 'dog_rank_lt_0', 'fav_rank_gt_50',
       'fav_rank_lt_50', 'dog_rank_gt_50', 'dog_rank_lt_50', 'fav_rank_gt_100',
       'fav_rank_lt_100', 'dog_rank_gt_100', 'dog_rank_lt_100',
       'fav_rank_gt_150', 'fav_rank_lt_150', 'dog_rank_gt_150',
       'dog_rank_lt_150', 'fav_rank_gt_200', 'fav_rank_lt_200',
       'dog_rank_gt_200', 'dog_rank_lt_200', 'dog_percent','Fav',]].copy()

In [418]:
from sklearn import svm
from joblib import load

# Load the saved model from the file
model = load('.\model_file2.joblib')

# Assuming you have a new dataset stored in a variable called 'new_data'
# Make predictions on the new dataset
drop_fav=filtered_data.drop(columns=['Fav'])
drop_fav=drop_fav.reset_index(drop=True)
predictions = model.predict(drop_fav)
predictions_df = pd.DataFrame(predictions, columns=['predictions'])

# Merge the predictions with the original data based on the index
#merged_data = pd.concat([drop_fav, predictions_df], axis=1)
data_new_index=data.reset_index(drop=True)
predictions_df.merge(data_new_index,left_index=True,right_index=True)[['Fav','predictions','Fav_Odds','Time']]


Unnamed: 0,Fav,predictions,Fav_Odds,Time
0,Diego Schwartzman,True,1.78,20:30
1,Nick Hardt,True,1.75,23:00


In [420]:
data=final_hard
data['fav_win_percent_grt_10'] = data['fav_percent'] > 0.1
data['fav_win_percent_grt_20'] = data['fav_percent'] > 0.2
data['fav_win_percent_grt_30'] = data['fav_percent'] > 0.3
data['fav_win_percent_grt_40'] = data['fav_percent'] > 0.4
data['fav_win_percent_grt_50'] = data['fav_percent'] > 0.5
data['fav_win_percent_grt_60'] = data['fav_percent'] > 0.6
data['fav_win_percent_grt_70'] = data['fav_percent'] > 0.7
data['fav_win_percent_grt_80'] = data['fav_percent'] > 0.8
data['fav_win_percent_grt_90'] = data['fav_percent'] > 0.9
data['fav_win_percent_100'] = data['fav_percent'] == 1.0
data['dog_win_percent_grt_10'] = data['dog_percent'] > 0.1
data['dog_win_percent_grt_20'] = data['dog_percent'] > 0.2
data['dog_win_percent_grt_30'] = data['dog_percent'] > 0.3
data['dog_win_percent_grt_40'] = data['dog_percent'] > 0.4
data['dog_win_percent_grt_50'] = data['dog_percent'] > 0.5
data['dog_win_percent_grt_60'] = data['dog_percent'] > 0.6
data['dog_win_percent_grt_70'] = data['dog_percent'] > 0.7
data['dog_win_percent_grt_80'] = data['dog_percent'] > 0.8
data['dog_win_percent_grt_90'] = data['dog_percent'] > 0.9
data['dog_win_percent_100'] = data['dog_percent'] == 1.0

data['fav_win_percent_lt_10'] = data['fav_percent'] < 0.1
data['fav_win_percent_lt_20'] = data['fav_percent'] < 0.2
data['fav_win_percent_lt_30'] = data['fav_percent'] < 0.3
data['fav_win_percent_lt_40'] = data['fav_percent'] < 0.4
data['fav_win_percent_lt_50'] = data['fav_percent'] < 0.5
data['fav_win_percent_lt_60'] = data['fav_percent'] < 0.6
data['fav_win_percent_lt_70'] = data['fav_percent'] < 0.7
data['fav_win_percent_lt_80'] = data['fav_percent'] < 0.8
data['fav_win_percent_lt_90'] = data['fav_percent'] < 0.9
data['fav_win_percent_0'] = data['fav_percent'] == 0.0

data['dog_win_percent_lt_10'] = data['dog_percent'] < 0.1
data['dog_win_percent_lt_20'] = data['dog_percent'] < 0.2
data['dog_win_percent_lt_30'] = data['dog_percent'] < 0.3
data['dog_win_percent_lt_40'] = data['dog_percent'] < 0.4
data['dog_win_percent_lt_50'] = data['dog_percent'] < 0.5
data['dog_win_percent_lt_60'] = data['dog_percent'] < 0.6
data['dog_win_percent_lt_70'] = data['dog_percent'] < 0.7
data['dog_win_percent_lt_80'] = data['dog_percent'] < 0.8
data['dog_win_percent_lt_90'] = data['dog_percent'] < 0.9
data['dog_win_percent_0'] = data['dog_percent'] == 0.0
#data['Winner_']=data['Winner']
#data['Winner_IsFav']=data['Winner']==data['Fav']
#data['Fav_IsEloFav']=data['Fav']==data['Elo_Fav']
data['fav_rank_band'] = pd.cut(data['fav_rank'], bins=list(range(0, 201, 20)), labels=list(range(20, 201, 20)), right=False)
data['dog_rank_band'] = pd.cut(data['dog_rank'], bins=list(range(0, 201, 20)), labels=list(range(20, 201, 20)), right=False)

data['fav_rank_band'] = data['fav_rank_band'].astype(float)
data['dog_rank_band'] = data['dog_rank_band'].astype(float)

for i in range(0, 201, 50):
    data[f'fav_rank_gt_{i}'] = data['fav_rank_band'] > i
    data[f'fav_rank_lt_{i}'] = data['fav_rank_band'] < i

    data[f'dog_rank_gt_{i}'] = data['dog_rank_band'] > i
    data[f'dog_rank_lt_{i}'] = data['dog_rank_band'] < i





data['Female']=data['Sex']=='Womens'
data['Male']=data['Sex']=='Mens'
data=data[['Fav', 'fav_win_percent_grt_10',
       'fav_win_percent_grt_20', 'fav_win_percent_grt_30',
       'fav_win_percent_grt_40', 'fav_win_percent_grt_50',
       'fav_win_percent_grt_60', 'fav_win_percent_grt_70',
       'fav_win_percent_grt_80', 'fav_win_percent_grt_90',
       'fav_win_percent_100', 'dog_win_percent_grt_10',
       'dog_win_percent_grt_20', 'dog_win_percent_grt_30',
       'dog_win_percent_grt_40', 'dog_win_percent_grt_50',
       'dog_win_percent_grt_60', 'dog_win_percent_grt_70',
       'dog_win_percent_grt_80', 'dog_win_percent_grt_90',
       'dog_win_percent_100', 'fav_win_percent_lt_10', 'fav_win_percent_lt_20',
       'fav_win_percent_lt_30', 'fav_win_percent_lt_40',
       'fav_win_percent_lt_50', 'fav_win_percent_lt_60',
       'fav_win_percent_lt_70', 'fav_win_percent_lt_80',
       'fav_win_percent_lt_90', 'fav_win_percent_0', 'dog_win_percent_lt_10',
       'dog_win_percent_lt_20', 'dog_win_percent_lt_30',
       'dog_win_percent_lt_40', 'dog_win_percent_lt_50',
       'dog_win_percent_lt_60', 'dog_win_percent_lt_70',
       'dog_win_percent_lt_80', 'dog_win_percent_lt_90', 'dog_win_percent_0',
         'Female', 'Male','Fav_Odds','Time','fav_percent','dog_percent','fav_rank_gt_0',
 'fav_rank_lt_0',
 'dog_rank_gt_0',
 'dog_rank_lt_0',
 'fav_rank_gt_50',
 'fav_rank_lt_50',
 'dog_rank_gt_50',
 'dog_rank_lt_50',
 'fav_rank_gt_100',
 'fav_rank_lt_100',
 'dog_rank_gt_100',
 'dog_rank_lt_100',
 'fav_rank_gt_150',
 'fav_rank_lt_150',
 'dog_rank_gt_150',
 'dog_rank_lt_150',
 'fav_rank_gt_200',
 'fav_rank_lt_200',
 'dog_rank_gt_200',
 'dog_rank_lt_200',
         ]][(data['Fav_Odds']>1.8)]

from sklearn import svm
from joblib import load

# Load the saved model from the file
model = load('.\model_hard2.joblib')

# Assuming you have a new dataset stored in a variable called 'new_data'
# Make predictions on the new dataset
drop_fav=filtered_data.drop(columns=['Fav'])
drop_fav=drop_fav.reset_index(drop=True)
predictions = model.predict(drop_fav)
predictions_df = pd.DataFrame(predictions, columns=['predictions'])

# Merge the predictions with the original data based on the index
#merged_data = pd.concat([drop_fav, predictions_df], axis=1)
data_new_index=data.reset_index(drop=True)
predictions_df.merge(data_new_index,left_index=True,right_index=True)[['Fav','predictions','Fav_Odds','Time']]


Unnamed: 0,Fav,predictions,Fav_Odds,Time
0,Tennys Sandgren,False,1.86,09:25


In [421]:
data=final_hard
data['fav_win_percent_grt_10'] = data['fav_percent'] > 0.1
data['fav_win_percent_grt_20'] = data['fav_percent'] > 0.2
data['fav_win_percent_grt_30'] = data['fav_percent'] > 0.3
data['fav_win_percent_grt_40'] = data['fav_percent'] > 0.4
data['fav_win_percent_grt_50'] = data['fav_percent'] > 0.5
data['fav_win_percent_grt_60'] = data['fav_percent'] > 0.6
data['fav_win_percent_grt_70'] = data['fav_percent'] > 0.7
data['fav_win_percent_grt_80'] = data['fav_percent'] > 0.8
data['fav_win_percent_grt_90'] = data['fav_percent'] > 0.9
data['fav_win_percent_100'] = data['fav_percent'] == 1.0
data['dog_win_percent_grt_10'] = data['dog_percent'] > 0.1
data['dog_win_percent_grt_20'] = data['dog_percent'] > 0.2
data['dog_win_percent_grt_30'] = data['dog_percent'] > 0.3
data['dog_win_percent_grt_40'] = data['dog_percent'] > 0.4
data['dog_win_percent_grt_50'] = data['dog_percent'] > 0.5
data['dog_win_percent_grt_60'] = data['dog_percent'] > 0.6
data['dog_win_percent_grt_70'] = data['dog_percent'] > 0.7
data['dog_win_percent_grt_80'] = data['dog_percent'] > 0.8
data['dog_win_percent_grt_90'] = data['dog_percent'] > 0.9
data['dog_win_percent_100'] = data['dog_percent'] == 1.0

data['fav_win_percent_lt_10'] = data['fav_percent'] < 0.1
data['fav_win_percent_lt_20'] = data['fav_percent'] < 0.2
data['fav_win_percent_lt_30'] = data['fav_percent'] < 0.3
data['fav_win_percent_lt_40'] = data['fav_percent'] < 0.4
data['fav_win_percent_lt_50'] = data['fav_percent'] < 0.5
data['fav_win_percent_lt_60'] = data['fav_percent'] < 0.6
data['fav_win_percent_lt_70'] = data['fav_percent'] < 0.7
data['fav_win_percent_lt_80'] = data['fav_percent'] < 0.8
data['fav_win_percent_lt_90'] = data['fav_percent'] < 0.9
data['fav_win_percent_0'] = data['fav_percent'] == 0.0

data['dog_win_percent_lt_10'] = data['dog_percent'] < 0.1
data['dog_win_percent_lt_20'] = data['dog_percent'] < 0.2
data['dog_win_percent_lt_30'] = data['dog_percent'] < 0.3
data['dog_win_percent_lt_40'] = data['dog_percent'] < 0.4
data['dog_win_percent_lt_50'] = data['dog_percent'] < 0.5
data['dog_win_percent_lt_60'] = data['dog_percent'] < 0.6
data['dog_win_percent_lt_70'] = data['dog_percent'] < 0.7
data['dog_win_percent_lt_80'] = data['dog_percent'] < 0.8
data['dog_win_percent_lt_90'] = data['dog_percent'] < 0.9
data['dog_win_percent_0'] = data['dog_percent'] == 0.0
#data['Winner_']=data['Winner']
#data['Winner_IsFav']=data['Winner']==data['Fav']
#data['Fav_IsEloFav']=data['Fav']==data['Elo_Fav']
data['fav_rank_band'] = pd.cut(data['fav_rank'], bins=list(range(0, 201, 20)), labels=list(range(20, 201, 20)), right=False)
data['dog_rank_band'] = pd.cut(data['dog_rank'], bins=list(range(0, 201, 20)), labels=list(range(20, 201, 20)), right=False)

data['fav_rank_band'] = data['fav_rank_band'].astype(float)
data['dog_rank_band'] = data['dog_rank_band'].astype(float)

for i in range(0, 201, 50):
    data[f'fav_rank_gt_{i}'] = data['fav_rank_band'] > i
    data[f'fav_rank_lt_{i}'] = data['fav_rank_band'] < i

    data[f'dog_rank_gt_{i}'] = data['dog_rank_band'] > i
    data[f'dog_rank_lt_{i}'] = data['dog_rank_band'] < i





data['Female']=data['Sex']=='Womens'
data['Male']=data['Sex']=='Mens'
data=data[['Fav', 'fav_win_percent_grt_10',
       'fav_win_percent_grt_20', 'fav_win_percent_grt_30',
       'fav_win_percent_grt_40', 'fav_win_percent_grt_50',
       'fav_win_percent_grt_60', 'fav_win_percent_grt_70',
       'fav_win_percent_grt_80', 'fav_win_percent_grt_90',
       'fav_win_percent_100', 'dog_win_percent_grt_10',
       'dog_win_percent_grt_20', 'dog_win_percent_grt_30',
       'dog_win_percent_grt_40', 'dog_win_percent_grt_50',
       'dog_win_percent_grt_60', 'dog_win_percent_grt_70',
       'dog_win_percent_grt_80', 'dog_win_percent_grt_90',
       'dog_win_percent_100', 'fav_win_percent_lt_10', 'fav_win_percent_lt_20',
       'fav_win_percent_lt_30', 'fav_win_percent_lt_40',
       'fav_win_percent_lt_50', 'fav_win_percent_lt_60',
       'fav_win_percent_lt_70', 'fav_win_percent_lt_80',
       'fav_win_percent_lt_90', 'fav_win_percent_0', 'dog_win_percent_lt_10',
       'dog_win_percent_lt_20', 'dog_win_percent_lt_30',
       'dog_win_percent_lt_40', 'dog_win_percent_lt_50',
       'dog_win_percent_lt_60', 'dog_win_percent_lt_70',
       'dog_win_percent_lt_80', 'dog_win_percent_lt_90', 'dog_win_percent_0',
         'Female', 'Male','Fav_Odds','Time','fav_percent','dog_percent','fav_rank_gt_0',
 'fav_rank_lt_0',
 'dog_rank_gt_0',
 'dog_rank_lt_0',
 'fav_rank_gt_50',
 'fav_rank_lt_50',
 'dog_rank_gt_50',
 'dog_rank_lt_50',
 'fav_rank_gt_100',
 'fav_rank_lt_100',
 'dog_rank_gt_100',
 'dog_rank_lt_100',
 'fav_rank_gt_150',
 'fav_rank_lt_150',
 'dog_rank_gt_150',
 'dog_rank_lt_150',
 'fav_rank_gt_200',
 'fav_rank_lt_200',
 'dog_rank_gt_200',
 'dog_rank_lt_200',
         ]][(data['Fav_Odds']>1.7)&(data['Fav_Odds']<1.8)]

from sklearn import svm
from joblib import load

# Load the saved model from the file
model = load('.\model_hard2.joblib')

# Assuming you have a new dataset stored in a variable called 'new_data'
# Make predictions on the new dataset
drop_fav=filtered_data.drop(columns=['Fav'])
drop_fav=drop_fav.reset_index(drop=True)
predictions = model.predict(drop_fav)
predictions_df = pd.DataFrame(predictions, columns=['predictions'])

# Merge the predictions with the original data based on the index
#merged_data = pd.concat([drop_fav, predictions_df], axis=1)
data_new_index=data.reset_index(drop=True)
predictions_df.merge(data_new_index,left_index=True,right_index=True)[['Fav','predictions','Fav_Odds','Time']]


Unnamed: 0,Fav,predictions,Fav_Odds,Time
0,Alexis Galarneau,False,1.74,02:55
