In [4]:
import pandas as pd
import numpy as np
import logging
from math import floor
from google.oauth2.service_account import Credentials
import gspread
from typing import Dict, Any, List
from utils import get_teg_rounds, load_all_data, aggregate_data





In [None]:
all_data = load_all_data(exclude_teg_50=True,exclude_incomplete_tegs=True)
print(all_data)

In [None]:
all_data = load_all_data(exclude_teg_50=True,exclude_incomplete_tegs=True)
agg_measures = ['Sc','GrossVP','NetVP','Stableford']
teg_data = aggregate_data(all_data,'TEG',agg_measures)

teg_data

In [None]:
df = teg_data

best_stableford = df.loc[df.groupby('Player')['Stableford'].idxmax()]

# Select only relevant columns for output
output = best_stableford[['Player', 'Stableford', 'TEG']].rename(columns={'Stableford': 'Best Stableford'})

# Display output
print(output)

In [None]:

# Find unique combinations of TEG and Year
unique_teg_year = all_data[['TEG', 'Year']].drop_duplicates()

# Merge teg_data with the unique TEG-Year combinations to add the Year column
df_merged = pd.merge(teg_data, unique_teg_year, on='TEG', how='left')

# Define metrics and aggregation functions
metrics = {
    #'lowest Sc': 'Sc',
    'lowest GrossVP': 'GrossVP',
    'lowest NetVP': 'NetVP',
    'highest Stableford': 'Stableford'
}

# Initialize a dictionary to store outputs
output_dict = {}

# Loop over each metric and create the desired DataFrame
for metric_name, column in metrics.items():
    # Set up whether to find max or min for each metric
    if column == 'Stableford':
        best_values = df_merged.loc[df_merged.groupby('Player')[column].idxmax()]
    else:
        best_values = df_merged.loc[df_merged.groupby('Player')[column].idxmin()]

    # Select relevant columns and rename them
    output = best_values[['Player', column, 'TEG', 'Year']].rename(columns={column: metric_name})

    # Store the result in the dictionary
    output_dict[metric_name] = output
    
    # Print the output DataFrame
    print(f"Output for {metric_name}:")
    print(output, "\n")

In [None]:
aggregate_data(data=all_data,aggregation_level='TEG')

In [None]:
import pandas as pd
from IPython.display import Markdown

# Example DataFrame
data = {
    'Player': ['Alice', 'Bob', 'Charlie'],
    'Score': [72, 68, 74],
    'TEG': ['TEG1', 'TEG2', 'TEG1'],
    'Year': [2021, 2020, 2022],
    'Measure': ['Gross', 'Net', 'Stableford']
}

df = pd.DataFrame(data)

# Loop through DataFrame and display markdown text with dynamic content
for index, row in df.iterrows():
    # Define the markdown text using row values
    markdown_text = f"Best {row['Measure']}: **{row['Score']}** | {row['Player']} ({row['TEG']}, {row['Year']})"
    
    # Display the markdown text in Jupyter notebook
    display(Markdown(markdown_text))

In [None]:
from utils import get_round_data, get_complete_teg_data, get_9_data

rds = get_round_data()
tegs = get_complete_teg_data()

#rds.head()

import pandas as pd

rank_field = 'Sc'
rank_ascending = True

def add_ranks(df, fields_to_rank=None, rank_ascending=None):
    # If fields_to_rank is not provided, use default list of fields
    if fields_to_rank is None:
        fields_to_rank = ['Sc', 'GrossVP', 'NetVP', 'Stableford']
    
    # Check if fields_to_rank is a string, convert to list if necessary
    if isinstance(fields_to_rank, str):
        fields_to_rank = [fields_to_rank]
    
    for field in fields_to_rank:
        # Determine default value for rank_ascending for each field
        if rank_ascending is None:
            rank_ascending = False if 'Stableford' in field else True
        
        # Rank within each Player's rounds
        df[f'Rank_within_player_{field}'] = df.groupby('Player')[field].rank(method='min', ascending=rank_ascending)
        
        # Rank across all rounds
        df[f'Rank_within_all_{field}'] = df[field].rank(method='min', ascending=rank_ascending)
    
    return df

#ranked_rds = add_ranks(rds,'Sc',rank_ascending=True)
#ranked_rds.to_clipboard()
# fields_to_rank = 'Sc', ''
# ranked_tegs = add_ranks(tegs,'Stableford')
# ranked_tegs.to_clipboard()

frontback_data = get_9_data()
ranked_9s = add_ranks(df = frontback_data)
ranked_9s.to_clipboard()
print('Copied to clipboard')

In [None]:
for column in ranked_9s.columns:
    print(column)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

# scatter = plt.scatter(df['Rank_within_all_Sc'], df['Sc'])
df = ranked_9s

df['rank'] = df['Rank_within_all_Sc']

# Get unique values from 'Pl' column
unique_pl = df['Pl'].unique()

# Create a color map
color_map = plt.cm.get_cmap('tab10')  # You can change 'tab10' to other colormaps
color_dict = {pl: color_map(i/len(unique_pl)) for i, pl in enumerate(unique_pl)}

# Map colors to your data
colors = df['Pl'].map(color_dict)

plt.figure(figsize=(10, 6))
for pl in unique_pl:
    mask = df['Pl'] == pl
    plt.scatter(df.loc[mask, 'rank'], df.loc[mask, 'Sc'], 
                c=[color_dict[pl]], label=pl)

plt.xlabel('Rank')
plt.ylabel('Mean (Sc)')
plt.title('Scatter Plot of Rank vs Mean (Sc), colored by Pl')
plt.legend()
plt.show()


import plotly.express as px

fig = px.scatter(df, x='rank', y='Sc', color='Pl', 
                 labels={'rank': 'Rank', 'Sc': 'Mean (Sc)', 'Pl': 'Pl'},
                 title='Scatter Plot of Rank vs Mean (Sc), colored by Pl')
fig.show()

In [None]:
from utils import get_ranked_teg_data, get_ranked_round_data, get_ranked_frontback_data

import streamlit as st

st.cache_data.clear()

tegs_ranked = get_ranked_teg_data()
# for col in tegs_ranked.columns:
#     print(col)

def get_best(df, measure_to_use, player_level = False, top_n = 1):
    valid_measures = ['Sc', 'GrossVP', 'NetVP', 'Stableford']
    if measure_to_use not in valid_measures:
        error_message = f"Invalid measure: '{measure_to_use}'. Valid options are: {', '.join(valid_measures)}"

    if player_level is None:
        player_level = False

    if top_n is None:
        top_n = 1
    
    measure_fn = 'Rank_within_' + ('player' if player_level else 'all') + f'_{measure_to_use}' 

    #measure_fn
    return df[df[measure_fn] == top_n]

#get_best(get_ranked_round_data(), measure_to_use='Stableford')
get_best(tegs_ranked, measure_to_use='Stableford')

THIS BIT MAKES THE RECORDS

In [None]:
tegs_ranked = get_ranked_teg_data()

def format_best_record(df, measure):
    # Get the best record(s)
    best_records = get_best(df, measure_to_use=measure, top_n=1)
    
    # Format the header based on the measure
    if measure == 'Sc':
        header = f"Best score: {int(best_records[measure].iloc[0])}"
    elif measure == 'GrossVP':
        header = f"Best gross: {int(best_records[measure].iloc[0]):+}"
    elif measure == 'NetVP':
        header = f"Best net: {int(best_records[measure].iloc[0]):+}"
    elif measure == 'Stableford':
        header = f"Best Stableford: {int(best_records[measure].iloc[0])}"
    
    # Format player info
    if len(best_records) == 1:
        player_info = f"| **{best_records['Player'].iloc[0]}** | {best_records['TEG'].iloc[0]} ({best_records['Year'].iloc[0]})"
        return f"{header} {player_info}"
    else:
        player_infos = [f"* **{row['Player']}** | {row['TEG']} ({row['Year']})" for _, row in best_records.iterrows()]
        return f"{header}\n" + "\n".join(player_infos)

# Usage
measures = ['Sc', 'GrossVP', 'NetVP', 'Stableford']
for measure in measures:
    print(format_best_record(tegs_ranked, measure))
    print()  # Add a blank line between measures

TESTING HOW TO PUT CHOSEN ROUNDS IN CONTEXT

In [None]:
from utils import get_ranked_teg_data, get_ranked_round_data, get_ranked_frontback_data,safe_ordinal
from utils import chosen_rd_context, chosen_teg_context

# ROUND CONTEXT

df = get_ranked_round_data()
max_teg = df.loc[df['TEGNum'].idxmax(), 'TEG']
max_rd_in_max_teg = df[df['TEG'] == max_teg]['Round'].max()

teg = max_teg
rd = max_rd_in_max_teg

print(chosen_rd_context(df,teg,rd,'Sc'))
print(chosen_rd_context(df,teg,rd,'GrossVP'))
print(chosen_rd_context(df,teg,rd,'NetVP'))
print(chosen_rd_context(df,teg,rd,'Stableford'))


# TEG CONTEXT

df = get_ranked_teg_data()
max_teg = df.loc[df['TEGNum'].idxmax(), 'TEG']
teg = max_teg

print(chosen_teg_context(df,teg,'Sc'))
print(chosen_teg_context(df,teg,'GrossVP'))
print(chosen_teg_context(df,teg,'NetVP'))
print(chosen_teg_context(df,teg,'Stableford'))


In [5]:
import streamlit as st
import pandas as pd
import altair as alt
from utils import load_all_data, get_teg_winners, get_teg_rounds

# === LOAD DATA === #
all_data = load_all_data(exclude_incomplete_tegs=True, exclude_teg_50=True)



filtered_data = all_data.copy()

# CREATE WINNERS TABLE

winners = get_teg_winners(filtered_data).drop(columns=['Year'])
winner_df = winners.replace(r'\*', '', regex=True)

# === GENERATE DATA FOR CHARTS AND DOUBLES === #
# Melt the DataFrame for players and competitions in long format
melted_winners = pd.melt(winner_df, id_vars=['TEG'], value_vars=['TEG Trophy', 'Green Jacket', 'HMM Wooden Spoon'],
                         var_name='Competition', value_name='Player')

# Group by player and competition, then count the occurrences
player_wins = melted_winners.groupby(['Player', 'Competition']).size().unstack(fill_value=0).sort_values(by='TEG Trophy', ascending=False)
player_wins = player_wins[['TEG Trophy', 'Green Jacket', 'HMM Wooden Spoon']]
player_wins.columns = ['Trophy', 'Jacket', 'Spoon']

# Sort data for each competition
trophy_sorted = player_wins.sort_values(by='Trophy', ascending=False).reset_index()
jacket_sorted = player_wins.sort_values(by='Jacket', ascending=False).reset_index()
spoon_sorted = player_wins.sort_values(by='Spoon', ascending=False).reset_index()

trophy_sorted



Unnamed: 0,Player,Trophy,Jacket,Spoon
0,Gregg WILLIAMS,4,2,1
1,Jon BAKER,3,3,1
2,David MULLIN,2,8,4
3,John PATTERSON,2,0,1
4,Alex BAKER,1,0,3
5,Henry MELLER,1,0,1
6,Stuart NEUMANN,1,1,3


In [2]:
from utils import load_all_data

all_data = load_all_data(exclude_teg_50=True)

def calculate_golf_stats(df):
    # Group by Player and calculate the sums
    stats = df.groupby('Player').agg({
        'GrossVP': [
            ('Holes_Played', 'count'),  # Count of rows for each player
            ('Pars_or_Better', lambda x: (x <= 0).sum()),
            ('Birdies', lambda x: (x == -1).sum()),
            ('Eagles', lambda x: (x == -2).sum()),
            ('TBPs', lambda x: (x > 2).sum())
        ]
    })
    
    # Flatten the column names
    stats.columns = ['_'.join(col).strip() for col in stats.columns.values]
    
    return stats
calculate_golf_stats(all_data)

2024-10-14 21:58:33.348 
  command:

    streamlit run c:\Users\JBA33\AppData\Local\Programs\Python\Python312\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2024-10-14 21:58:33.348 No runtime found, using MemoryCacheStorageManager


Unnamed: 0_level_0,GrossVP_Holes_Played,GrossVP_Pars_or_Better,GrossVP_Birdies,GrossVP_Eagles,GrossVP_TBPs
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alex BAKER,720,133,8,0,168
David MULLIN,1062,348,40,1,97
Gregg WILLIAMS,1062,245,22,0,176
Henry MELLER,486,114,9,0,86
John PATTERSON,432,108,8,1,90
Jon BAKER,1062,336,38,1,132
Stuart NEUMANN,846,200,17,1,162


In [3]:
import pandas as pd

def calculate_max_scores_per_round(df):
    # Define score types and their conditions
    score_types = {
        'Pars_or_Better': lambda x: x <= 0,
        'Birdies': lambda x: x == -1,
        'Eagles': lambda x: x == -2,
        'TBPs': lambda x: x > 2
    }
    
    # Initialize a dictionary to store results
    results = {score: [] for score in score_types}
    results['Player'] = []

    # Group by Player
    for player, player_data in df.groupby('Player'):
        results['Player'].append(player)
        
        # For each score type, calculate the max count per round
        for score, condition in score_types.items():
            max_count = player_data.groupby('Round').apply(lambda x: condition(x['GrossVP']).sum()).max()
            results[score].append(max_count)
    
    # Convert results to DataFrame
    results_df = pd.DataFrame(results)
    
    return results_df

calculate_max_scores_per_round(all_data)

  max_count = player_data.groupby('Round').apply(lambda x: condition(x['GrossVP']).sum()).max()
  max_count = player_data.groupby('Round').apply(lambda x: condition(x['GrossVP']).sum()).max()
  max_count = player_data.groupby('Round').apply(lambda x: condition(x['GrossVP']).sum()).max()
  max_count = player_data.groupby('Round').apply(lambda x: condition(x['GrossVP']).sum()).max()
  max_count = player_data.groupby('Round').apply(lambda x: condition(x['GrossVP']).sum()).max()
  max_count = player_data.groupby('Round').apply(lambda x: condition(x['GrossVP']).sum()).max()
  max_count = player_data.groupby('Round').apply(lambda x: condition(x['GrossVP']).sum()).max()
  max_count = player_data.groupby('Round').apply(lambda x: condition(x['GrossVP']).sum()).max()
  max_count = player_data.groupby('Round').apply(lambda x: condition(x['GrossVP']).sum()).max()
  max_count = player_data.groupby('Round').apply(lambda x: condition(x['GrossVP']).sum()).max()
  max_count = player_data.groupby('Round

NameError: name 'pd' is not defined