In [12]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [7]:
def get_masters_scores():
    url = "https://site.web.api.espn.com/apis/site/v2/sports/golf/leaderboard?league=pga"
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        data = response.json()
        tournament = data['events'][0]['name']
        leaderboard = data['events'][0]['competitions'][0]['competitors']
        player_data = []
        for player in leaderboard:
            name = player['athlete']['displayName']
            # Fix names 
            if name == 'Byeong-Hun An': 
                name = 'Byeong Hun An'
            elif name == 'Cameron Davis': 
                name = 'Cam Davis'
            elif name == 'Ludvig Aberg': 
                name = 'Ludvig Åberg'
            elif name == 'Nicolai Højgaard': 
                name = 'Nicolai Hojgaard'
            elif name == 'Thorbjorn Olesen': 
                name = 'Thorbjørn Olesen'
            elif name == 'Joaquín Niemann': 
                name = 'Joaquin Niemann'
            
            position = player['status']['period']
            
            score = player['score']['displayValue']
            if score == 'E': 
                score = 0
            else: 
                score = int(score)
                
            player_data.append({'golfer_name': name, 'score': score})
        
        df = pd.DataFrame(player_data)
        df['score'] = df['score'].astype(int)
        return df
    else:
        return 'API Error'

def calculate_top_n(row, n):
    scores = [row['tier_1_1_score'], row['tier_1_2_score'], row['tier_1_3_score'], row['tier_2_1_score'], row['tier_2_2_score'], row['tier_2_3_score'], row['tier_3_2_score'], row['tier_4_1_score']]
    return sum(sorted(scores)[:n])


scores = get_masters_scores()

picks = pd.read_csv('masters_picks.csv')

# Merging golfers_df with masters_data_df on golfer names
for col in ['tier_1_1', 'tier_1_2', 'tier_1_3', 'tier_2_1', 'tier_2_2', 'tier_2_3', 'tier_3_1', 'tier_3_2', 'tier_4_1']: 
    if col == 'tier_1_1': 
        merged_df = pd.merge(picks, scores, how='left', left_on=col, right_on='golfer_name')
        merged_df = merged_df.drop(columns = 'golfer_name')
        merged_df = merged_df.rename(columns = {'score': f'{col}_score'})
    else: 
        merged_df = pd.merge(merged_df, scores, how ='left', left_on=col, right_on = 'golfer_name')
        merged_df = merged_df.drop(columns = ['golfer_name'])
        merged_df = merged_df.rename(columns = {'score': f'{col}_score'})

# Calculate top n scores
merged_df['top_6_score'] = merged_df.apply(lambda row: calculate_top_n(row, n=6), axis=1)
merged_df['top_7_score'] = merged_df.apply(lambda row: calculate_top_n(row, n=7), axis=1)
merged_df['top_8_score'] = merged_df.apply(lambda row: calculate_top_n(row, n=8), axis=1)

merged_df = merged_df.rename(columns = {'name': 'Name', 'tier_1_1': '1', 'tier_1_2': '2', 'tier_1_3': '3', 'tier_2_1': '4', 'tier_2_2': '5', 'tier_2_3': '6', 'tier_3_1': '7', 'tier_3_2': '8', 'tier_4_1': '9', 
                                        'tier_1_1_score': '1 Score', 'tier_1_2_score': '2 Score', 'tier_1_3_score': '3 Score', 'tier_2_1_score': '4 Score', 'tier_2_2_score': '5 Score', 'tier_2_3_score': '6 Score', 'tier_3_1_score': '7 Score', 'tier_3_2_score': '8 Score', 'tier_4_1_score': '9 Score',
                                        'top_6_score': 'Score', 'top_7_score': 'Tiebreak'})
for i in range(1, 10): 
    merged_df[f'Pick: {i}'] = merged_df[str(i)] + ' (' + merged_df[f'{i} Score'].astype(str) + ')'
merged_df['Rank'] = merged_df['Score'].rank(method='min').astype(int)
# Add blank col for spacing
merged_df[''] = ''

In [8]:
scores.sort_values(by = 'score')

Unnamed: 0,golfer_name,score
0,Bryson DeChambeau,0
63,Adam Hadwin,0
62,Tommy Fleetwood,0
61,Jordan Spieth,0
60,Grayson Murray,0
...,...,...
25,Tony Finau,0
24,Jason Day,0
23,Sergio Garcia,0
43,Akshay Bhatia,0


In [53]:
def get_masters_scores(): 
    url = 'https://www.espn.com/golf/leaderboard'
    headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
        }
    response = requests.get(url, headers=headers)
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content of the webpage
        soup = BeautifulSoup(response.content, "html.parser")
        
        # Find the table with the specified class
        table = soup.find("tbody", class_="Table__TBODY")
        
        # Check if the table was found
     
        # Extract the rows of the table
        rows = table.find_all("tr")
        
        # Initialize an empty list to store the table data
        data = []
        
        # Loop through each row and extract the data
        for row in rows:
            # Extract the cells (td) of the row
            cells = row.find_all("td")
            
            # Extract the text content of each cell and append to the data list
            row_data = [cell.get_text() for cell in cells]
            data.append(row_data)
        
        # Convert the data list into a pandas DataFrame
        df = pd.DataFrame(data)
        df = df[[2, 3]]
        df.columns = ['golfer_name', 'score']
        
        df["golfer_name"] = df["golfer_name"].replace({
            'Byeong-Hun An': 'Byeong Hun An',
            'Cameron Davis': 'Cam Davis',
            'Ludvig Aberg': 'Ludvig Åberg',
            'Nicolai Højgaard': 'Nicolai Hojgaard',
            'Thorbjorn Olesen': 'Thorbjørn Olesen',
            'Joaquín Niemann': 'Joaquin Niemann'
        })
        
        df["score"] = df["score"].replace({
            'E': 0
        })
        df['score'] 
        df["score"] = df["score"].apply(lambda x: int(x))
        
        return df 
        
    else:
        return f"Failed to retrieve ESPN scores. Status code:{response.status_code}"

In [59]:
df = get_masters_scores()
df.sort_values(by = 'score')

Unnamed: 0,golfer_name,score
0,Erik van Rooyen,-2
1,Danny Willett,-2
2,Bryson DeChambeau,-2
3,Christo Lamprecht (a),-1
4,Thorbjørn Olesen,-1
...,...,...
13,José María Olazábal,1
16,Chris Kirk,1
17,Austin Eckroat,2
19,Gary Woodland,2


In [42]:
df[df['golfer_name'] == 'Danny Willett']

Unnamed: 0,golfer_name,score
0,Danny Willett,-2


In [65]:
picks

Unnamed: 0,golfer_name,score
0,Erik van Rooyen,-2
1,Danny Willett,-2
2,Bryson DeChambeau,-2
3,Christo Lamprecht (a),-1
4,Thorbjørn Olesen,-1
5,Ryan Fox,-1
6,Jake Knapp,0
7,Taylor Moore,0
8,Santiago de la Fuente (a),0
9,Stephan Jaeger,0


In [66]:
scores = get_masters_scores()

picks = pd.read_csv('masters_picks.csv')

for col in ['tier_1_1', 'tier_1_2', 'tier_1_3', 'tier_2_1', 'tier_2_2', 'tier_2_3', 'tier_3_1', 'tier_3_2', 'tier_4_1']: 
    if col == 'tier_1_1': 
        merged_df = pd.merge(picks, scores, how='left', left_on=col, right_on='golfer_name')
        merged_df = merged_df.drop(columns = 'golfer_name')
        merged_df = merged_df.rename(columns = {'score': f'{col}_score'})
    else: 
        merged_df = pd.merge(merged_df, scores, how ='left', left_on=col, right_on = 'golfer_name')
        merged_df = merged_df.drop(columns = ['golfer_name'])
        merged_df = merged_df.rename(columns = {'score': f'{col}_score'})

In [67]:
merged_df

Unnamed: 0,name,tier_1_1,tier_1_2,tier_1_3,tier_2_1,tier_2_2,tier_2_3,tier_3_1,tier_3_2,tier_4_1,tier_1_1_score,tier_1_2_score,tier_1_3_score,tier_2_1_score,tier_2_2_score,tier_2_3_score,tier_3_1_score,tier_3_2_score,tier_4_1_score
0,Aaron Pereira 1,Scottie Scheffler,Viktor Hovland,Wyndham Clark,Dustin Johnson,Max Homa,Shane Lowry,Justin Rose,Keegan Bradley,Gary Woodland,0.0,0.0,0.0,0,0,0,0.0,0.0,2.0
1,Abigail Ueland 1,Scottie Scheffler,Brooks Koepka,Wyndham Clark,Tony Finau,Matt Fitzpatrick,Rickie Fowler,Russell Henley,Nicolai Hojgaard,Gary Woodland,0.0,0.0,0.0,0,0,0,0.0,0.0,2.0
2,Aj Aziz 1,Rory McIlroy,Joaquin Niemann,Wyndham Clark,Dustin Johnson,Shane Lowry,Tommy Fleetwood,Russell Henley,Tiger Woods,Bubba Watson,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0
3,Aj Caruso 1,Scottie Scheffler,Ludvig Aberg,Wyndham Clark,Tony Finau,Matt Fitzpatrick,Sahith Theegala,Russell Henley,Harris English,Emiliano Grillo,0.0,,0.0,0,0,0,0.0,0.0,0.0
4,Alec Maclennan 1,Scottie Scheffler,Xander Schauffele,Ludvig Aberg,Tommy Fleetwood,Sungjae Im,Tom Kim,Russell Henley,Denny McCarthy,Emiliano Grillo,0.0,0.0,,0,0,0,0.0,0.0,0.0
5,Alex Gianoplus 1,Jordan Spieth,Xander Schauffele,Will Zalatoris,Matt Fitzpatrick,Cameron Young,Sam Burns,Denny McCarthy,Cameron Davis,Nick Dunlap,0.0,0.0,0.0,0,0,0,0.0,,0.0
6,Anders Corey 1,Rory McIlroy,Joaquin Niemann,Hideki Matsuyama,Dustin Johnson,Shane Lowry,Patrick Reed,Russell Henley,Si Woo Kim,Adam Schenk,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0
7,Anders Corey 2,Xander Schauffele,Hideki Matsuyama,Wyndham Clark,Cameron Young,Shane Lowry,Tyrrell Hatton,Denny McCarthy,Si Woo Kim,Adrian Meronk,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0
8,Andrew Stulce 1,Xander Schauffele,Hideki Matsuyama,Will Zalatoris,Cameron Young,Tommy Fleetwood,Patrick Reed,Harris English,Luke List,Danny Willett,0.0,0.0,0.0,0,0,0,0.0,1.0,-1.0
9,Andrew Stulce 2,Brooks Koepka,Patrick Cantlay,Wyndham Clark,Dustin Johnson,Bryson DeChambeau,Tyrrell Hatton,Justin Rose,Akshay Bhatia,Bubba Watson,0.0,0.0,0.0,0,-3,0,0.0,0.0,0.0
