In [475]:
# Import necessary libraries
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import datetime

def get_match_data(start_date,time_now_formatted,devengine):


    # Get historical match data on hard surface between start date and yesterday
    elo_hard = pd.read_sql_query(
        f"Select DISTINCT * From Elo_AllMatches_Hard where Date > '{start_date}' and Date not like '{time_now_formatted}'",
        con=devengine,
    )

    # Get historical match data on clay surface between start date and yesterday
    elo_clay = pd.read_sql_query(
        f"Select DISTINCT * From Elo_AllMatches_Clay where Date > '{start_date}' and Date not like '{time_now_formatted}'",
        con=devengine,
    )

    #Get today's matches on hard surface that haven't yet been resulted
    elo_data_hard =pd.read_sql_query(
        f"Select DISTINCT * From Elo_AllMatches_Hard where Date like '{time_now_formatted}' and resulted like 'False'",
        con=devengine,)

    #Get today's matches on clay surface that haven't yet been resulted
    elo_data_clay =pd.read_sql_query(
        f"Select DISTINCT * From Elo_AllMatches_Clay where Date like '{time_now_formatted}' and resulted like 'False'",
        con=devengine,)

    return elo_hard, elo_clay, elo_data_hard, elo_data_clay

# Connect to SQLite database using SQLAlchemy's create_engine
devengine = create_engine("sqlite:///C:/Git/tennis_atp/database/bets_sqllite.db")
# Get current date and time
time_now = datetime.datetime.now()

# Format current date as string in YYYY-MM-DD format
time_now_formatted = time_now.strftime("%Y-%m-%d")

# Get the start date two years ago from today
today =time_now
two_years_ago = (today - datetime.timedelta(days=365*2)).strftime("%Y-%m-%d")

elo_hard, elo_clay, elo_data_hard, elo_data_clay = get_match_data(two_years_ago,time_now_formatted,devengine)

def get_player_record(player, opponent_rank, history, range_low,range_high,auto):
    if auto:
        opponent_rank_low = opponent_rank - range_low
        opponent_rank_high = opponent_rank + range_high
    else:
        opponent_rank_low = range_low
        opponent_rank_high = range_high

    player_history = history[
        (
            (history["Fav"] == player)
            & (
                (history["Dog_Rank"] > opponent_rank_low)
                & (history["Dog_Rank"] < opponent_rank_high)
            )
        )
        | (
            (history["Dog"] == player)
            & (
                (history["Fav_Rank"] > opponent_rank_low)
                & (history["Fav_Rank"] < opponent_rank_high)
            )
        )
    ]
    if player_history.empty == False:
        result = float(
            len(player_history[player_history["Winner"] == player])
            / len(player_history)
        )
        return result, len(player_history)
    else:
        return 0, 0

def get_filtered_data(elo_data, elo):
    result_df = pd.DataFrame()
    for _, row in elo_data.sort_values(by="Time").iterrows():
        low_limit = 50
        high_limit = 50

        fav_percent, games = get_player_record(row.Fav, row.Dog_Rank, elo_hard, low_limit, high_limit, True)
        count = 0
        while games < 10 and count < 200:
            count = count + 1
            low_limit = low_limit + 10
            high_limit = high_limit + 10
            fav_percent, games = get_player_record(row.Fav, row.Dog_Rank, elo_hard, low_limit, high_limit, True)

        low_limit = 50
        high_limit = 50
        dog_percent, games2 = get_player_record(row.Dog, row.Fav_Rank, elo_hard, low_limit, high_limit, True)
        count = 0
        while games2 < 10 and count < 200:
            count = count + 1
            low_limit = low_limit + 10
            high_limit = high_limit + 10
            dog_percent, games2 = get_player_record(row.Dog, row.Fav_Rank, elo_hard, low_limit, high_limit, True)

        if games > 4 and games2 > 4:
            temp_df = pd.DataFrame(
                {
                    "Time": [row.Time],
                    "Fav_Odds": [row.Fav_Odds],
                    "Dog_Odds": [row.Dog_Odds],
                    "Fav": [row.Fav],
                    "Elo_Fav": [row.Elo_Fav],
                    "Fav_Record": ["{:.0%}".format(fav_percent)],
                    "Fav_Games": [games],
                    "Dog": [row.Dog],
                    "Dog_Odds": [row.Dog_Odds],
                    "Dog_Record": ["{:.0%}".format(dog_percent)],
                    "Dog_Games": [games2],
                    "fav_percent": [fav_percent],
                    "dog_percent": [dog_percent],
                    "Sex": [row.Sex],
                }
            )
            result_df = pd.concat([result_df, temp_df])
    return result_df

results_hard=get_filtered_data(elo_data_hard,elo_hard)
results_clay=get_filtered_data(elo_data_clay,elo_clay)
def process_serving_data(result_df):
    try:
        # Try to read the 'servers_today.xlsx' file
        serving = pd.read_csv('mensserving.csv')

        # Try to read the 'servers_today_womens.xlsx' file
        serving_womens = pd.read_csv('womensserving.csv')

        # If 'serving_womens' dataframe is not empty, concatenate with 'serving' dataframe
        if serving_womens.empty == False:
            serving = pd.concat([serving, serving_womens])
        else:
            serving = serving

        # Drop the 'Time' column from the 'serving' dataframe
        #serving = serving.drop(columns='Time')
    except FileNotFoundError as e:
        # If either of the excel files is not found, print an error message and set serving to None
        print("The required excel file could not be found.")
        print("Error:", e)
        serving = None

    if serving is not None:
        try:
            # Try to merge the 'result_df' and 'serving' dataframes on the 'Fav' and 'Dog' columns
            result = pd.merge(result_df, serving, how='left', left_on=['Fav'], right_on=['Name'])
            result = pd.merge(result,serving, how='left', left_on=['Dog'], right_on=['Name'])
            result.rename(
        columns={
            "Service Games Won_x": "Fav_Serve%",
            "Service Games Won_y": "Dog_Serve%",
            "Return Games Won_x": "Fav_Return%",
            "Return Games Won_y": "Dog_Return%",
        },
        inplace=True,)
            result.drop(columns=['Name_x','Name_y'],inplace=True)
    

            # Set 'final_hard' to the 'result' dataframe
            result_serving = result
        except Exception as e:
            # If an error occurs during merging, print an error message and set both 'result' and 'final_hard' to None
            print("Error occured while merging the dataframes.")
            print("Error:", e)
            result = None
            result_serving = None
    else:
        result = None
        result_serving = None
        
    return result, result_serving


_,serving_hard=process_serving_data(results_hard)
_,serving_clay=process_serving_data(results_clay)

elo_hardxx = pd.read_sql_query(
    f"Select DISTINCT * From Elo_AllMatches_Hard where Date > '2022-01-01' and Date not like '{time_now_formatted}' --and Fav_odds>1.2 and Fav_odds<1.4 ",
    con=devengine,)

data_concat = pd.DataFrame(columns=['Date', 'Player', 'Odds', 'Win/Loss'])
for dataset_type in [('Winner', 'Win'), ('Loser', 'Loss')]:
    df = elo_hard[['Date', dataset_type[0], f"{dataset_type[0]}_Odds"]].copy()
    df['Player'] = df[dataset_type[0]]
    df['Odds'] = df[f"{dataset_type[0]}_Odds"]
    df['Win/Loss'] = dataset_type[1]
    df.drop(columns=[dataset_type[0], f"{dataset_type[0]}_Odds"], inplace=True)
    data_concat = pd.concat([data_concat, df])
data_concat = data_concat.sort_index()
data_concat['Odds'] = data_concat.Odds.astype(float)

def analyse_matchups(result_df,data_concat):
    for _,matchup in result_df.iterrows():
        player1=matchup.Fav
        player2=matchup.Dog
        player1_odds=float(matchup.Fav_Odds)
        player1_odds_hi=player1_odds+0.2
        player1_odds_lo=1
        player2_odds=float(matchup.Dog_Odds)
        player2_odds_hi=player2_odds+0.5
        player2_odds_lo=2
        player1=data_concat[data_concat['Player']==player1].copy()
        player2=data_concat[data_concat['Player']==player2].copy()
        player2=player2[(player2['Odds']>player2_odds_lo)&(player2['Odds']<player2_odds_hi)]
        if len(player2)>0:
            winperc2=len(player2[player2['Win/Loss']=='Win'])/len(player2)
        else:
            winperc2=0
        player1=player1[(player1['Odds']>player1_odds_lo)&(player1['Odds']<player1_odds_hi)]
        if len(player1)>0:
            winperc1=len(player1[player1['Win/Loss']=='Win'])/len(player1)
        else:
            winperc1=0
        if len(player1)>5 and len(player2)>3:
            print(matchup.Time,f"{matchup.Fav} ({round(player1_odds_lo,2)}-->{round(player1_odds_hi,2)})",f"{matchup.Dog} ({round(player2_odds_lo,2)}-->{round(player2_odds_hi,2)})")
            print(len(player1),winperc1,len(player2),winperc2)

analyse_matchups(serving_hard,data_concat)
final_hard=serving_hard

00:05 Jannik Sinner (1-->1.5) Maxime Cressy (2-->4.05)
46 0.8478260869565217 16 0.375
00:05 Petra Martic (1-->1.99) Anastasia Potapova (2-->2.54)
21 0.5714285714285714 7 0.2857142857142857
00:30 Marie Bouzkova (1-->1.37) Tereza Martincova (2-->5.15)
21 0.8571428571428571 29 0.3448275862068966
00:30 Bernarda Pera (1-->1.62) Madison Brengle (2-->3.26)
20 0.75 13 0.38461538461538464
00:35 Lauren Davis (1-->1.48) Viktoriya Tomova (2-->3.99)
6 0.6666666666666666 13 0.15384615384615385
00:40 Karolina Pliskova (1-->1.37) Harriet Dart (2-->5.17)
11 0.7272727272727273 22 0.36363636363636365
03:00 Christopher Eubanks (1-->1.75) Aleksandar Vukic (2-->2.86)
50 0.76 22 0.4090909090909091
04:30 Nuno Borges (1-->1.87) Steve Johnson (2-->2.63)
27 0.7037037037037037 12 0.4166666666666667
05:00 Yibing Wu (1-->2.05) John Isner (2-->2.45)
32 0.84375 6 0.5
14:30 Jason Jung (1-->1.55) Sasi Kumar Mukund (2-->3.44)
13 0.8461538461538461 18 0.3888888888888889
17:00 Liudmila Samsonova (1-->1.52) Sofia Kenin (2-

In [476]:
final_hard['Fav_Odds']=final_hard['Fav_Odds'].astype(float)
final_hard['Dog_Odds']=final_hard['Dog_Odds'].astype(float)

In [477]:
final_hard[(final_hard['Fav_Serve%']>final_hard['Dog_Serve%'])&(final_hard['Dog_Return%']>10)&(final_hard['Fav_Serve%']>0)&(final_hard['Dog_Odds']<2.5)&(final_hard['Dog_Serve%']>70)]


Unnamed: 0,Time,Fav_Odds,Dog_Odds,Fav,Elo_Fav,Fav_Record,Fav_Games,Dog,Dog_Record,Dog_Games,fav_percent,dog_percent,Sex,Fav_Serve%,Fav_Return%,Dog_Serve%,Dog_Return%
10,03:00,1.55,2.36,Christopher Eubanks,Christopher Eubanks,70%,23,Aleksandar Vukic,58%,19,0.695652,0.578947,Mens,87.0,11.0,85.0,13.0
18,17:00,1.76,2.06,Veronika Kudermetova,Barbora Krejcikova,53%,53,Barbora Krejcikova,57%,30,0.528302,0.566667,Womens,77.4,30.8,74.3,35.0
21,17:00,1.83,1.96,Qinwen Zheng,Maria Sakkari,55%,20,Maria Sakkari,62%,52,0.55,0.615385,Womens,75.2,26.4,74.8,33.0


In [478]:
for _,i in elo_data_hard.iterrows():
    check1=elo_hard[((elo_hard['Winner']==i.Winner)&(elo_hard['Loser']==i.Loser))|((elo_hard['Loser']==i.Winner)&(elo_hard['Winner']==i.Loser))]
    if check1.empty==False:
        for _, x in check1.iterrows():
            print(f"{x.Winner} beat {x.Loser}")

Titouan Droguet beat Jurgen Briand
Sasi Kumar Mukund beat Jason Jung
Bernarda Pera beat Madison Brengle
Marie Bouzkova beat Tereza Martincova
Madison Keys beat Jelena Ostapenko
Paula Badosa beat Beatriz Haddad Maia
Maria Sakkari beat Qinwen Zheng
Veronika Kudermetova beat Barbora Krejcikova
Aleksandar Vukic beat Christopher Eubanks
Arthur Cazaux beat Hoang Ly Nam
Francesco Passaro beat Kaichi Uchida


In [479]:
for _,i in elo_data_clay.iterrows():
    check1=elo_clay[((elo_clay['Winner']==i.Winner)&(elo_clay['Loser']==i.Loser))|((elo_clay['Loser']==i.Winner)&(elo_clay['Winner']==i.Loser))]
    if check1.empty==False:
        for _, x in check1.iterrows():
            print(f"{x.Winner} beat {x.Loser}")

Yannick Hanfmann beat Hugo Gaston
Federico Delbonis beat Juan Pablo Varillas


In [480]:
filter1=(final_hard['Sex']=='Womens') & \
(final_hard['dog_percent']>0.0) & (final_hard['dog_percent']<0.5) & (final_hard['fav_percent']<0.9) & (final_hard['fav_percent']>0.1) & (final_hard['Dog_Odds']<2.5) & (((final_hard['dog_percent']>0.0) & (final_hard['dog_percent']<0.1) & (final_hard['fav_percent']<0.5) & 
(final_hard['fav_percent']>0.4)) | ((final_hard['dog_percent']>0.0) & (final_hard['dog_percent']<0.1) & (final_hard['fav_percent']<0.6) & (final_hard['fav_percent']>0.5)) | 
((final_hard['dog_percent']>0.1) & (final_hard['dog_percent']<0.2) & (final_hard['fav_percent']<0.2) & (final_hard['fav_percent']>0.1)) | ((final_hard['dog_percent']>0.1) & (final_hard['dog_percent']<0.2) & (final_hard['fav_percent']<0.4) & (final_hard['fav_percent']>0.3)) | ((final_hard['dog_percent']>0.1) & (final_hard['dog_percent']<0.2) & (final_hard['fav_percent']<0.5) & (final_hard['fav_percent']>0.4)) | ((final_hard['dog_percent']>0.1) & (final_hard['dog_percent']<0.2) & (final_hard['fav_percent']<0.6) & (final_hard['fav_percent']>0.5)) | ((final_hard['dog_percent']>0.2) & (final_hard['dog_percent']<0.3) & (final_hard['fav_percent']<0.5) & (final_hard['fav_percent']>0.4)) | ((final_hard['dog_percent']>0.2) & (final_hard['dog_percent']<0.3) & (final_hard['fav_percent']<0.9) & (final_hard['fav_percent']>0.8)) | ((final_hard['dog_percent']>0.3) & (final_hard['dog_percent']<0.4) & (final_hard['fav_percent']<0.3) & (final_hard['fav_percent']>0.2)) | ((final_hard['dog_percent']>0.4) & (final_hard['dog_percent']<0.5) & (final_hard['fav_percent']<0.2) & (final_hard['fav_percent']>0.1)))
filter2=(final_hard['Sex']=='Womens') & \
           (((final_hard['dog_percent']>0.4) & (final_hard['dog_percent']<0.5) & (final_hard['fav_percent']<0.8) & (final_hard['fav_percent']>0.7)) | 
            ((final_hard['dog_percent']>0.5) & (final_hard['dog_percent']<0.6) & (final_hard['fav_percent']<0.2) & (final_hard['fav_percent']>0.1)) | 
            ((final_hard['dog_percent']>0.5) & (final_hard['dog_percent']<0.6) & (final_hard['fav_percent']<0.3) & (final_hard['fav_percent']>0.2)) | 
            ((final_hard['dog_percent']>0.5) & (final_hard['dog_percent']<0.6) & (final_hard['fav_percent']<0.6) & (final_hard['fav_percent']>0.5)) | 
            ((final_hard['dog_percent']>0.7) & (final_hard['dog_percent']<0.8) & (final_hard['fav_percent']<0.4) & (final_hard['fav_percent']>0.3)) | 
            ((final_hard['dog_percent']>0.7) & (final_hard['dog_percent']<0.8) & (final_hard['fav_percent']<0.7) & (final_hard['fav_percent']>0.6)) | 
            ((final_hard['dog_percent']>0.8) & (final_hard['dog_percent']<0.9) & (final_hard['fav_percent']<0.5) & (final_hard['fav_percent']>0.4)) | 
            ((final_hard['dog_percent']>0.8) & (final_hard['dog_percent']<0.9) & (final_hard['fav_percent']<0.7) & (final_hard['fav_percent']>0.6)) | 
            ((final_hard['dog_percent']>0.9) & (final_hard['dog_percent']<1.0) & (final_hard['fav_percent']<0.6) & (final_hard['fav_percent']>0.5)) | 
            ((final_hard['dog_percent']>0.9) & (final_hard['dog_percent']<1.0) & (final_hard['fav_percent']<0.7) & (final_hard['fav_percent']>0.6))) & \
           (final_hard['Dog_Odds']<2.5)
final_hard[filter1|filter2]

Unnamed: 0,Time,Fav_Odds,Dog_Odds,Fav,Elo_Fav,Fav_Record,Fav_Games,Dog,Dog_Record,Dog_Games,fav_percent,dog_percent,Sex,Fav_Serve%,Fav_Return%,Dog_Serve%,Dog_Return%
18,17:00,1.76,2.06,Veronika Kudermetova,Barbora Krejcikova,53%,53,Barbora Krejcikova,57%,30,0.528302,0.566667,Womens,77.4,30.8,74.3,35.0


In [481]:
final_hard[
    (final_hard["Sex"] == "Mens")
    & (
        (
            (final_hard["dog_percent"] > 0.0)
            & (final_hard["dog_percent"] < 0.1)
            & (final_hard["fav_percent"] < 0.8)
            & (final_hard["fav_percent"] > 0.7)
            & (final_hard["Dog_Odds"] < 2.5)
        )
        | (
            (final_hard["dog_percent"] > 0.1)
            & (final_hard["dog_percent"] < 0.2)
            & (final_hard["fav_percent"] < 0.8)
            & (final_hard["fav_percent"] > 0.7)
            & (final_hard["Dog_Odds"] < 2.5)
        )
        | (
            (final_hard["dog_percent"] > 0.1)
            & (final_hard["dog_percent"] < 0.2)
            & (final_hard["fav_percent"] < 0.9)
            & (final_hard["fav_percent"] > 0.8)
            & (final_hard["Dog_Odds"] < 2.5)
        )
        | (
            (final_hard["dog_percent"] > 0.3)
            & (final_hard["dog_percent"] < 0.4)
            & (final_hard["fav_percent"] < 0.4)
            & (final_hard["fav_percent"] > 0.3)
            & (final_hard["Dog_Odds"] < 2.5)
        )
        | (
            (final_hard["dog_percent"] > 0.6)
            & (final_hard["dog_percent"] < 0.7)
            & (final_hard["fav_percent"] < 0.3)
            & (final_hard["fav_percent"] > 0.2)
            & (final_hard["Dog_Odds"] < 2.5)
        )
        | (
            (final_hard["dog_percent"] > 0.6)
            & (final_hard["dog_percent"] < 0.7)
            & (final_hard["fav_percent"] < 0.5)
            & (final_hard["fav_percent"] > 0.4)
            & (final_hard["Dog_Odds"] < 2.5)
        )
        | (
            (final_hard["dog_percent"] > 0.7)
            & (final_hard["dog_percent"] < 0.8)
            & (final_hard["fav_percent"] < 0.4)
            & (final_hard["fav_percent"] > 0.3)
            & (final_hard["Dog_Odds"] < 2.5)
        )
        | (
            (final_hard["dog_percent"] > 0.7)
            & (final_hard["dog_percent"] < 0.8)
            & (final_hard["fav_percent"] < 0.5)
            & (final_hard["fav_percent"] > 0.4)
            & (final_hard["Dog_Odds"] < 2.5)
        )
        | (
            (final_hard["dog_percent"] > 0.7)
            & (final_hard["dog_percent"] < 0.8)
            & (final_hard["fav_percent"] < 0.7)
            & (final_hard["fav_percent"] > 0.6)
            & (final_hard["Dog_Odds"] < 2.5)
        )
        | (
            (final_hard["dog_percent"] > 0.9)
            & (final_hard["dog_percent"] < 1.0)
            & (final_hard["fav_percent"] < 0.7)
            & (final_hard["fav_percent"] > 0.6)
            & (final_hard["Dog_Odds"] < 2.5)
        )
    )
]


Unnamed: 0,Time,Fav_Odds,Dog_Odds,Fav,Elo_Fav,Fav_Record,Fav_Games,Dog,Dog_Record,Dog_Games,fav_percent,dog_percent,Sex,Fav_Serve%,Fav_Return%,Dog_Serve%,Dog_Return%


In [487]:
final_hard[final_hard['dog_percent']>0.49]#[(final_hard['Fav_Serve%']<final_hard['Dog_Serve%'])&(final_hard['Dog_Return%']>20)&(final_hard['Fav_Return%']<20)&(final_hard['Fav_Serve%']>0)&(final_hard['Dog_Odds']<2.5)]

Unnamed: 0,Time,Fav_Odds,Dog_Odds,Fav,Elo_Fav,Fav_Record,Fav_Games,Dog,Dog_Record,Dog_Games,fav_percent,dog_percent,Sex,Fav_Serve%,Fav_Return%,Dog_Serve%,Dog_Return%
10,03:00,1.55,2.36,Christopher Eubanks,Christopher Eubanks,70%,23,Aleksandar Vukic,58%,19,0.695652,0.578947,Mens,87.0,11.0,85.0,13.0
13,05:00,1.85,1.95,Yibing Wu,Yibing Wu,64%,11,John Isner,59%,17,0.636364,0.588235,Mens,81.0,21.0,92.0,8.0
18,17:00,1.76,2.06,Veronika Kudermetova,Barbora Krejcikova,53%,53,Barbora Krejcikova,57%,30,0.528302,0.566667,Womens,77.4,30.8,74.3,35.0
20,17:00,1.22,4.19,Karolina Muchova,Karolina Muchova,50%,10,Martina Trevisan,64%,11,0.5,0.636364,Womens,71.4,25.3,55.1,42.2
21,17:00,1.83,1.96,Qinwen Zheng,Maria Sakkari,55%,20,Maria Sakkari,62%,52,0.55,0.615385,Womens,75.2,26.4,74.8,33.0
22,17:00,1.81,1.99,Beatriz Haddad Maia,Beatriz Haddad Maia,50%,30,Paula Badosa,57%,44,0.5,0.568182,Womens,71.5,31.2,68.4,36.0
23,17:30,1.62,2.2,Nicolas Moreno De Alboran,Frederico Ferreira Silva,50%,12,Frederico Ferreira Silva,71%,14,0.5,0.714286,Mens,0.0,0.0,0.0,0.0
24,17:30,1.55,2.35,Sebastian Ofner,Chan Hong Seong,73%,11,Chan Hong Seong,60%,10,0.727273,0.6,Mens,69.0,16.0,,
31,18:00,1.65,2.08,Salvatore Caruso,Salvatore Caruso,70%,10,Viktor Durasovic,60%,10,0.7,0.6,Mens,38.0,17.0,59.0,5.0
35,19:00,1.72,2.04,Evan Furness,Peter Gojowczyk,58%,12,Peter Gojowczyk,55%,11,0.583333,0.545455,Mens,71.0,29.0,64.0,13.0


In [483]:
results_clay

Unnamed: 0,Time,Fav_Odds,Dog_Odds,Fav,Elo_Fav,Fav_Record,Fav_Games,Dog,Dog_Record,Dog_Games,fav_percent,dog_percent,Sex
0,01:00,1.72,2.05,Facundo Bagnis,Facundo Bagnis,60%,10,Camilo Ugo Carabelli,43%,7,0.6,0.428571,Mens
0,03:00,1.76,1.99,Federico Delbonis,Federico Delbonis,0%,11,Juan Pablo Varillas,44%,9,0.0,0.444444,Mens
0,04:00,1.62,2.22,Yannick Hanfmann,Yannick Hanfmann,45%,11,Hugo Gaston,53%,19,0.454545,0.526316,Mens
0,04:30,1.38,2.9,Dusan Lajovic,Dusan Lajovic,25%,12,Felipe Meligeni Alves,17%,6,0.25,0.166667,Mens
0,08:00,1.49,2.62,Sebastian Baez,Federico Coria,14%,14,Federico Coria,8%,12,0.142857,0.083333,Mens


In [484]:
data=pd.read_sql_query(
    f"Select DISTINCT Winner,Dog,Dog_Record,Winner_Odds,Fav_Record,fav_percent,dog_percent,Fav_Games,Dog_Games,Fav_Odds,Fav,sex,Dog_Odds From results_hard where sex like 'Womens' --where Date > '2023-01-01' and Date not like '{time_now_formatted}'",
    con=devengine,)
data['Fav_Odds']=data['Fav_Odds'].astype(float)
data['Dog_Odds']=data['Dog_Odds'].astype(float)
data['Winx']=data.apply(lambda x: 'Dog' if x['Winner']==x['Dog'] else 'Fav', axis=1)
womensserving=pd.read_csv('womensserving.csv')
combine = pd.merge(data, womensserving, how="left", left_on="Fav", right_on="Name")
combine2 = pd.merge(combine, womensserving, how="left", left_on="Dog", right_on="Name")
combine2[["Service Games Won_x", "Service Games Won_y"]] = combine2[
        ["Service Games Won_x", "Service Games Won_y"]
    ].astype(float)
combine2.rename(
        columns={
            "Service Games Won_x": "Fav_Serve%",
            "Service Games Won_y": "Dog_Serve%",
            "Return Games Won_x": "Fav_Return%",
            "Return Games Won_y": "Dog_Return%",
        },
        inplace=True,
    )
womens = combine2[
    [   "Winner","Dog","Winner_Odds","fav_percent","dog_percent","Fav_Games","Dog_Games","Fav_Odds",
        "Fav",
        # "Player_1_Odds",
        "Fav_Serve%",
        "Dog_Return%",
       
        # "Player_2_Odds",
        "Dog_Serve%",
        "Fav_Return%","Dog_Odds"
    ]]

In [502]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
# Load the data into a Pandas DataFrame

# Load the data into a Pandas DataFrame
data = womens[['Fav_Odds','Dog_Odds','Fav_Serve%','Dog_Serve%','Fav_Return%','Dog_Return%','fav_percent','dog_percent','Winner','Fav','Dog']].copy()
data=data.dropna()
player_1=data.copy()
columns_to_drop=player_1.columns
player_1['Odds']=player_1['Fav_Odds']
player_1['Serve_percent']=player_1['Fav_Serve%']/100
player_1['Return_percent']=player_1['Fav_Return%']/100
player_1['Win_percent']=player_1['fav_percent']
player_1['Win']=player_1.apply(lambda x: 1 if x['Winner']==x['Fav'] else 0, axis=1)
player_1=player_1.drop(columns=columns_to_drop)

player_2=data.copy()
columns_to_drop=player_2.columns
player_2['Odds']=1/player_2['Dog_Odds']
player_2['Serve_percent']=player_2['Dog_Serve%']/100
player_2['Return_percent']=player_2['Dog_Return%']/100
player_2['Win_percent']=player_2['dog_percent']
player_2['Serve_percent_opp']=player_2['Fav_Serve%']/100
player_2['Return_percent_opp']=player_2['Fav_Return%']/100
player_2['Win_percent_opp']=player_2['fav_percent']
player_2['Odds_opp']=1/player_2['Fav_Odds']
player_2['Win']=player_2.apply(lambda x: 1 if x['Winner']!=x['Fav'] else 0, axis=1)
player_2=player_2.drop(columns=columns_to_drop)
train=player_2
#[player_2['Odds']<2.1]

# Split the data into training and testing sets
X = train.drop(columns=['Win'])
y = train['Win']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate the model's performance on the testing data
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)
len(train[train['Win']==1])/len(train)

import torch
import torch.nn as nn
import torch.optim as optim

# Split the data into training and testing sets
X_train = torch.tensor(X_train.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
X_test = torch.tensor(X_test.values, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32)

# Define the model
input_size = X_train.shape[1]
output_size = 1
model2 = nn.Sequential(nn.Linear(input_size, output_size), nn.Sigmoid())

# Define the loss function and optimization algorithm
criterion = nn.BCELoss()
optimizer = optim.SGD(model2.parameters(), lr=0.01)

# Train the model
for epoch in range(1000):
    optimizer.zero_grad()
    y_pred = model2(X_train)
    loss = criterion(y_pred.view(-1), y_train)
    loss.backward()
    optimizer.step()

# Evaluate the model's performance on the testing data
with torch.no_grad():
    y_pred_test = model2(X_test)
    y_pred_test = (y_pred_test > 0.5).float()
    accuracy = (y_pred_test == y_test).float().mean().item()
print('Accuracy:', accuracy)



Accuracy: 0.6366459627329193
Accuracy: 0.6552795171737671


In [505]:
testing=final_hard[final_hard['Sex']=='Womens'].dropna()
player_2=testing.copy()
columns_to_drop=player_2.columns
player_2=data.copy()
columns_to_drop=player_2.columns
player_2['Odds']=1/player_2['Dog_Odds']
player_2['Serve_percent']=player_2['Dog_Serve%']/100
player_2['Return_percent']=player_2['Dog_Return%']/100
player_2['Win_percent']=player_2['dog_percent']
player_2['Serve_percent_opp']=player_2['Fav_Serve%']/100
player_2['Return_percent_opp']=player_2['Fav_Return%']/100
player_2['Win_percent_opp']=player_2['fav_percent']
player_2['Odds_opp']=1/player_2['Fav_Odds']
#player_2['Win']=player_2.apply(lambda x: 1 if x['Winner']!=x['Fav'] else 0, axis=1)
player_2=player_2.drop(columns=columns_to_drop)
model.predict(player_2)
#final_hard[final_hard['Sex']=='Womens'].dropna()


array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [501]:
player_2

Unnamed: 0,Odds,Serve_percent,Return_percent,Win_percent,Serve_percent_opp,Return_percent_opp,Win_percent_opp,Odds_opp,Win
1,0.485437,0.631,0.417,0.461538,0.686,0.347,0.357143,0.568182,1
2,0.228833,0.538,0.300,0.428571,0.748,0.330,0.600000,0.826446,0
3,0.423729,0.638,0.396,0.272727,0.522,0.386,0.437500,0.628931,0
5,0.425532,0.671,0.318,0.846154,0.500,0.390,0.600000,0.628931,1
6,0.492611,0.718,0.270,0.515152,0.704,0.379,0.733333,0.558659,1
...,...,...,...,...,...,...,...,...,...
1854,0.450450,0.734,0.396,0.300000,0.568,0.396,0.400000,0.613497,0
1855,0.414938,0.698,0.314,0.250000,0.649,0.366,0.454545,0.653595,0
1856,0.380228,0.771,0.316,0.600000,0.704,0.379,0.827586,0.671141,0
1858,0.355872,0.672,0.379,0.562500,0.677,0.402,0.692308,0.694444,0
