#### What Require Inputs in this Notebook
1. [Fill in File Input](#Load-in-Data)
2. [Right or Left Hand](#Right-or-Left-Hand)
3. [Output ShotCSV](#Output-ShotCSV)
4. [Change to atNet (by Player) from tagger later](#At-Net)
5. [EDA and get Stats Here](#EDA)
6. [Output PointCSV for Visuals](#PRINT-POINTCSV-VISUALS)
7. [Output PointCSV for Upload](#Print-POINTCSV-UPLOAD)

# Table of Contents
1. [Load in Data](#Load-in-Data)
- Error Checking
- Add Columns
- [Output ShotCSV](#Output-ShotCSV)
2. [Create Point DF](#Create-Point-DF)
- Add Columns
- [Output PointCSV for Visuals](#PRINT-POINTCSV-VISUALS)
- [Output PointCSV for Upload](#Print-POINTCSV-UPLOAD)    
        - cut out points with no timestamp position    
        - atNetPlayer1 has values of the player name for display purposes instead of boolean values

# [Click for Summary Stats and EDA HERE](#EDA)

# Notebook Start

### Load in Data

In [None]:
import pandas as pd
import numpy as np

In [None]:
# Put your Uncleaned .csv file name here

your_file_name = "doublestagging.csv"
shot_data = pd.read_csv(your_file_name)

# Put timestamps file here
timestamp = pd.read_csv("Spencer_Emon vs. USC Timestamps - Sheet1.csv")

In [None]:
# Check existing columns
print(len(shot_data.columns))
shot_data.columns

70


Index(['pointScore', 'gameScore', 'setScore', 'tiebreakScore', 'isPointStart',
       'pointStartTime', 'isPointEnd', 'pointEndTime', 'pointNumber',
       'isBreakPoint', 'shotInRally', 'side', 'serverName', 'firstServeIn',
       'firstServeZone', 'firstServeXCoord', 'firstServeYCoord',
       'secondServeIn', 'secondServeZone', 'secondServeXCoord',
       'secondServeYCoord', 'isAce', 'returnContactX', 'returnContactY',
       'returnPlacementX', 'returnPlacementY', 'shotContactX', 'shotContactY',
       'shotDirection', 'shotType', 'isSlice', 'isVolley', 'isOverhead',
       'isApproach', 'isDropshot', 'isPoach', 'bothAtNetTeamA',
       'bothAtNetTeamB', 'isFakeTeamA', 'isFakeTeamB', 'serveFormation',
       'returnFormation', 'shotLocationX', 'shotLocationY', 'isWinner',
       'isErrorWideR', 'isErrorWideL', 'isErrorNet', 'isErrorLong', 'isLob',
       'shotHitBy', 'teamA', 'teamB', 'Date', 'Division', 'Event',
       'lineupPosition', 'matchVenue', 'teamAPlayer1', 'teamAPlayer2

<!-- ### Error Check:
#### If data includes firstServeZone but no "1" for firstServeIn, same for secondServeZone/secondServeIn -->

## Error Check 1:
#### If returnData exists, separate values of "Forehand/Backhand Crosscourt/Down the Line" into shotFhBh and shotDirection

In [None]:
# Check if 'returnData' column exists
if 'returnData' in shot_data.columns:
    # Replace NaN values in returnData column with an empty string
    shot_data['returnData'].fillna('', inplace=True)

    # Check if "backhand" or "forehand" is in returnData and update shotFhBh accordingly
    shot_data.loc[shot_data['returnData'].str.contains('backhand', case=False), 'shotFhBh'] = 'Backhand'
    shot_data.loc[shot_data['returnData'].str.contains('forehand', case=False), 'shotFhBh'] = 'Forehand'

    # Check if "Crosscourt" or "Down the Line" is in returnData and update shotDirection accordingly
    shot_data.loc[shot_data['returnData'].str.contains('Crosscourt', case=False), 'shotDirection'] = 'Crosscourt'
    shot_data.loc[shot_data['returnData'].str.contains('Down the Line', case=False), 'shotDirection'] = 'Down the Line'

    # Display the modified shot_data
    print(shot_data[['returnData', 'shotDirection', 'shotFhBh']].head(10))
else:
    print("Check Passed: Column 'returnData' does not exist.")


Check Passed: Column 'returnData' does not exist.


## Error Check 2:
#### Change ShotType column to shotFhBh column if using old tagger

In [None]:
if 'shotType' in shot_data.columns:
    # Rename the column
    shot_data.rename(columns={'shotType': 'shotFhBh'}, inplace=True)
    print("Column renamed successfully.")
else:
    print("Check Passed: Column 'shotType' does not exist.")

Column renamed successfully.


## Error Check 3:
#### From Old Tagger, check that shotFhBh doesn't include Direction and Fh/Bh

In [None]:
shot_data.loc[shot_data['shotFhBh'] == 'Forehand Crosscourt', 'shotDirection'] = 'Crosscourt'
shot_data.loc[shot_data['shotFhBh'] == 'Forehand Crosscourt', 'shotFhBh'] = 'Forehand'

shot_data.loc[shot_data['shotFhBh'] == 'Backhand Crosscourt', 'shotDirection'] = 'Crosscourt'
shot_data.loc[shot_data['shotFhBh'] == 'Backhand Crosscourt', 'shotFhBh'] = 'Backhand'

shot_data.loc[shot_data['shotFhBh'] == 'Forehand Down the Line', 'shotDirection'] = 'Down the Line'
shot_data.loc[shot_data['shotFhBh'] == 'Forehand Down the Line', 'shotFhBh'] = 'Forehand'

shot_data.loc[shot_data['shotFhBh'] == 'Backhand Down the Line', 'shotDirection'] = 'Down the Line'
shot_data.loc[shot_data['shotFhBh'] == 'Backhand Down the Line', 'shotFhBh'] = 'Backhand'

print(shot_data[['shotDirection', 'shotFhBh']].head(10))

   shotDirection  shotFhBh
0            NaN       NaN
1  Down the Line  Backhand
2            NaN       NaN
3     Crosscourt  Backhand
4     Crosscourt  Backhand
5            NaN       NaN
6  Down the Line  Backhand
7     Crosscourt  Backhand
8     Crosscourt  Forehand
9            NaN       NaN


# CHANGED
## Error Check 4:
#### Check that Player1Name and Player2Name have values

In [None]:
# Fill in missing values for teamAPlayer1

fillInteamAPlayer1Name = "Choose_teamAPlayer1Name_Here"
fillInteamAPlayer2Name = "Choose_teamAPlayer2Name_Here"
fillInteamBPlayer1Name = "Choose_teamBPlayer1Name_Here"
fillInteamBPlayer2Name = "Choose_teamBPlayer2Name_Here"

#teamAPlayer1
if shot_data.at[0, 'teamAPlayer1'] == None:
    print("teamAPlayer1 was blank. Filling with 'fillinteamAPlayer1")
    shot_data['teamAPlayer1'].fillna('Fill_In_Value', inplace=True)
else:
    print("Check Passed: teamAplayer1Name is not blank. Current value:", shot_data.at[0, 'teamAPlayer1'])

# teamAPlayer2
if shot_data.at[0, 'teamAPlayer2'] == None:
    print("teamAPlayer2 was blank. Filling with 'fillinteamAPlayer2")
    shot_data['teamAPlayer2'].fillna('Fill_In_Value', inplace=True)
else:
    print("Check Passed: teamAplayer2Name is not blank. Current value:", shot_data.at[0, 'teamAPlayer2'])

# teamBPlayer1
if shot_data.at[0, 'teamBPlayer1'] == None:
    print("teamBPlayer1 was blank. Filling with 'fillinteamBPlayer1")
    shot_data['teamBPlayer1'].fillna('Fill_In_Value', inplace=True)
else:
    print("Check Passed: teamBplayer1Name is not blank. Current value:", shot_data.at[0, 'teamBPlayer1'])

# teamBPlayer2
if shot_data.at[0, 'teamBPlayer2'] == None:
    print("teamBPlayer2 was blank. Filling with 'fillinteamBPlayer2")
    shot_data['teamBPlayer2'].fillna('Fill_In_Value', inplace=True)
else:
    print("Check Passed: teamAplayer1Name is not blank. Current value:", shot_data.at[0, 'teamBPlayer2'])

Check Passed: teamAplayer1Name is not blank. Current value: Spencer Johnson
Check Passed: teamAplayer2Name is not blank. Current value: Emon van Loben Sels
Check Passed: teamBplayer1Name is not blank. Current value: Karl Lee
Check Passed: teamAplayer1Name is not blank. Current value: Samuel Rubell


## Error Check 5:
#### Check if there are missing 'Deuce' or 'Ad' sides in side column

In [None]:
# Filter out rows with 'side' values other than 'Deuce' and 'Ad'
filtered_data = shot_data[shot_data['side'].isin(['Deuce', 'Ad'])]

# Rows that don't meet the condition
rows_not_meeting_condition = shot_data[~shot_data['side'].isin(['Deuce', 'Ad'])]

# Count of rows not meeting the condition
count_rows_not_meeting_condition = len(rows_not_meeting_condition)

print("Rows with Side that aren't Deuce or Ad:", count_rows_not_meeting_condition)

# Output rows if you want to see which rows
# print("Rows not meeting the condition:")
# print(rows_not_meeting_condition)


Rows with Side that aren't Deuce or Ad: 0


In [None]:
 # Check if both conditions passed
if 'pointNumber' in shot_data.columns and not shot_data['pointNumber'].isnull().any() and shot_data['pointNumber'].is_monotonic_increasing:
    print("Check Passed: Point Numbers already exist")


# Check if pointNumber exists for every row and is not empty
if 'pointNumber' not in shot_data.columns or shot_data['pointNumber'].isnull().any():
    # Execute the script to calculate pointNumber
    point_starts = (shot_data['isPointStart'] == 1)
    shot_data['pointNumber'] = point_starts.cumsum()

    print("Data had missing point numbers. They were filled in automatically")

# Check if pointNumber is in increasing order
if not shot_data['pointNumber'].is_monotonic_increasing:
    # Print the condition failure if the 'pointNumber' column is not in increasing order
    print("Data had point numbers in the wrong order. They were filled in automatically")
    point_starts = (shot_data['isPointStart'] == 1)
    shot_data['pointNumber'] = point_starts.cumsum()


Data had missing point numbers. They were filled in automatically


## Error Check 6:

1.   List item
2.   List item


#### Fills in PointNumber if not there

In [None]:
missing_pointNumber_rows = shot_data[shot_data['pointNumber'].isnull()]

# Check if there are missing rows
if len(missing_pointNumber_rows) == 0:
    print("Check Passed: All rows have pointNumber")
else:
    print(f"Count of rows missing 'pointNumber': {len(missing_pointNumber_rows)}")

Check Passed: All rows have pointNumber


## Error Check 6.5:

1. Check to see that isVolley and isPoach are indeed a column
2. Check to make sure that each isPoach value occurs when isVolley is true
3. ...



In [None]:
# Truth variables
isVolleyExists = False
isPoachExists = False

# Check to see if "isVolley" is a column
if 'isVolley' in shot_data.columns:
    isVolleyExists = True
    print("Check Passed: isVolley is a column. \n")
else:
    print("Error: isVolley is not a column. \n")


# Check to see if "isPoach" is a column
if 'isPoach' in shot_data.columns:
    isPoachExists = True
    print("Check Passed: isPoach is a column. \n")
else:
    print("Error: isPoach is not a column. \n")


# Only execute the following code when Volley and Poach columns exist:
if isVolleyExists and isPoachExists:
  error_rows = []

  # Iterate through shot_data, check each row for the error
  for index, row in shot_data.iterrows():
    if row['isPoach'] == 1 and row['isVolley'] != 1:
      error_rows.append(row)

  # Display results
  if not error_rows:
    print("Check Passed: all isPoach 1's correspond to a isVolley 1.")
  else:
    print("Error: the following row(s) have a 1 in isPoach but do not have a 1 in isVolley.")
    print(pd.DataFrame(error_rows))  # Print the DataFrame of error rows


Check Passed: isVolley is a column. 

Check Passed: isPoach is a column. 

Check Passed: all isPoach 1's correspond to a isVolley 1.


## Add Column:
#### player1Hand, player2Hand

In [None]:
fillInteamAplayer1Hand = "Right"
fillInteamAplayer2Hand = "Right"
fillInteamBplayer1Hand = "Right"
fillInteamBplayer2Hand = "Right"

# Check if teamAplayer1Hand key exists in shot_data, if not, create it
if 'teamAPlayer1Hand' not in shot_data:
    shot_data['teamAPlayer1Hand'] = None

# Check if teamAplayer2Hand key exists in shot_data, if not, create it
if 'teamAPlayer2Hand' not in shot_data:
    shot_data['teamAPlayer2Hand'] = None

# Check if teambplayer1Hand key exists in shot_data, if not, create it
if 'teamBPlayer1Hand' not in shot_data:
    shot_data['teamBPlayer1Hand'] = None

# Check if teamBplayer2Hand key exists in shot_data, if not, create it
if 'teamBPlayer2Hand' not in shot_data:
    shot_data['teamBPlayer2Hand'] = None

if shot_data.at[0, 'teamAPlayer1Hand'] == None:
    print("teamAPlayer1Hand was blank. Filling with 'fillInteamAPlayer1Hand'")
    shot_data.at[0, 'teamAPlayer1Hand'] = fillInteamAplayer1Hand
else:
    print("Check Passed: teamAPlayer1Hand is not blank. Current value:", shot_data.at[0, 'teamAPlayer1Hand'])

# Check if player2Hand is blank, if so, fill it with 'fillInplayer2Hand'
if shot_data.at[0, 'teamAPlayer2Hand'] == None:
    print("teamAPlayer2Hand was blank. Filling with 'fillInteamAPlayer2Hand'")
    shot_data.at[0, 'teamAPlayer2Hand'] = fillInteamAplayer2Hand
else:
    print("Check Passed: teamAplayer2Hand is not blank. Current value:", shot_data.at[0, 'teamAPlayer2Hand'])

if shot_data.at[0, 'teamBPlayer1Hand'] == None:
    print("teamBplayer1Hand was blank. Filling with 'fillInteamBplayer1Hand'")
    shot_data.at[0, 'teamBPlayer1Hand'] = fillInteamBplayer1Hand
else:
    print("Check Passed: teamBplayer1Hand is not blank. Current value:", shot_data.at[0, 'teamBPlayer1Hand'])

# Check if player2Hand is blank, if so, fill it with 'fillInplayer2Hand'
if shot_data.at[0, 'teamBPlayer2Hand'] == None:
    print("teamBplayer2Hand was blank. Filling with 'fillInteamBplayer2Hand'")
    shot_data.at[0, 'teamBPlayer2Hand'] = fillInteamBplayer2Hand
else:
    print("Check Passed: teamBplayer2Hand is not blank. Current value:", shot_data.at[0, 'teamBPlayer2Hand'])


print("Player1Hand Current value:", shot_data.at[0, 'teamAPlayer1Hand'])
print("Player2Hand Current value:", shot_data.at[0, 'teamAPlayer2Hand'])

Check Passed: teamAPlayer1Hand is not blank. Current value: Left
Check Passed: teamAplayer2Hand is not blank. Current value: Right
Check Passed: teamBplayer1Hand is not blank. Current value: Right
Check Passed: teamBplayer2Hand is not blank. Current value: Right
Player1Hand Current value: Left
Player2Hand Current value: Right


### Filling Columns: Forward Fill
- 'gameScore'
- 'setScore'
- 'serverName'
- 'teamAPlayer1'
- 'teamAPlayer2'
- 'teamBPlayer1'
- 'teamBPlayer2'
- 'teamAPlayer1Hand'
- 'teamAPlayer2Hand'
- 'teamBPlayer1Hand'
- 'teamBPlayer2Hand'

# Forward Fill GameScore, SetScore, tiebreakScore, serverName, player1Name, player2Name

In [None]:
columns_to_fill = ['gameScore','setScore', 'serverName', 'teamAPlayer1', 'teamAPlayer2','teamBPlayer1', 'teamBPlayer2', 'teamAPlayer1Hand', 'teamAPlayer2Hand',
                   'teamBPlayer1Hand', 'teamBPlayer2Hand']
# columns_to_fill = ['gameScore','setScore', 'tiebreakScore', 'serverName', 'player1Name', 'player2Name']
# Add player1Hand, player2Hand when it is in tagger

for column in columns_to_fill:
    shot_data[column].replace(['', 'na'], pd.NaT, inplace=True)
    shot_data[column] = shot_data[column].ffill()
    print("Sucessful")

Sucessful
Sucessful
Sucessful
Sucessful
Sucessful
Sucessful
Sucessful
Sucessful
Sucessful
Sucessful
Sucessful


## Error Check 7:
#### Check for NA's

- all 0, except missing pointScore should match amount of tiebreak shots
- side can have missing, for old tagger and not tagging side of each shot

### Fixing:
- Open CSV in Google Sheets (Excel will change to date format), output game, set, point Score of missing values. Then find in google sheets, adjust, redownload, and re-upload into notebook

In [None]:
# Count empty strings in each column
empty_string_counts = (shot_data == "").sum()

# Filter out columns with zero empty strings
non_zero_counts = empty_string_counts[empty_string_counts > 0]

# Count NaN values in 'pointScore' column when 'gameScore' is '6-6'
na_tiebreak_count = shot_data.loc[shot_data['gameScore'] == '6-6', 'pointScore'].isna().sum()

side_na_count = shot_data.loc[shot_data['isPointStart'] == 1, 'side'].isna().sum()
print("\nCount of NaN values for 'side' on Point Start:", side_na_count)

# Display the count
print("\nCount of Nan when gameScore is '6-6' (# tiebreak shots):", na_tiebreak_count)


# Count NaN or empty values in specified columns
na_counts = shot_data[[ 'pointScore', 'shotInRally', 'gameScore', 'setScore', 'side', 'serverName']].isna().sum()

# Display the counts
print(f"\nCount of NA's in these columns\n{na_counts}")

# Display the counts
print("Count of empty strings in each column that includes at least one:")
print(non_zero_counts)




Count of NaN values for 'side' on Point Start: 0

Count of Nan when gameScore is '6-6' (# tiebreak shots): 0

Count of NA's in these columns
pointScore     42
shotInRally     0
gameScore       0
setScore        0
side            0
serverName      0
dtype: int64
Count of empty strings in each column that includes at least one:
Series([], dtype: int64)


## Error Check 8:
####  rows where side is empty
- If side missing, focus on rows where (shotInRally == 1) and returns (shotInRally == 2 ), anything else can leave NA

In [None]:
print(shot_data[shot_data["side"].isnull()][['pointScore', 'gameScore', 'setScore','side', 'shotInRally']])

Empty DataFrame
Columns: [pointScore, gameScore, setScore, side, shotInRally]
Index: []


## Error Check 9:
####  Outputs Missing shotInRally Rows

In [None]:
empty_shot_rows = shot_data[shot_data['shotInRally'].isnull()]

if not empty_shot_rows.empty:
    # Iterate over the index of empty_shot_rows
    for index in empty_shot_rows.index:
        # Get the row with empty shotInRally
        empty_row = shot_data.loc[index]

        # Get the row above it
        if index - 1 >= 0:
            above_row = shot_data.loc[index - 1]
        else:
            above_row = None

        # Get the row below it
        if index + 1 < len(shot_data):
            below_row = shot_data.loc[index + 1]
        else:
            below_row = None

        # Print the rows
        print("Empty row:")
        print(empty_row)
        print("Row above:")
        print(above_row)
        print("Row below:")
        print(below_row)
        print("====================")
else:
    print("Check Passed: No Missing shotInRally")


Check Passed: No Missing shotInRally


## Error Check 10:
####  Check that pointNumber is increasing consecutively

- if video is bad and points are cut out, this can fail and that's ok.

In [None]:
point_numbers = shot_data['pointNumber'].unique()

# Check if the point numbers are consecutive
if list(point_numbers) == list(range(1, len(point_numbers) + 1)):
    print("Check Passed")
else:
    raise ValueError("Error: The 'pointNumber' column does not contain consecutive numbers starting from 1.")


Check Passed


## Error Check 11:
#### Check same amount of start and end points

In [None]:
# Count of Point Start and Point End
num_point_start = shot_data['isPointStart'].sum()

# Count the number of rows where isPointEnd is equal to 1
num_point_end = shot_data['isPointEnd'].sum()

print("Number of rows with isPointStart = 1:", num_point_start)
print("Number of rows with isPointEnd = 1:", num_point_end)

Number of rows with isPointStart = 1: 75.0
Number of rows with isPointEnd = 1: 75.0


## Error Check 12:
#### Output consecutive isPointStart with no isPointEnd, and vice versa for isPointEnd


In [None]:
# Shubhan

# Output total start, total end
# How many Points aren't enclosing between point start and point end

# Count where isPointStart = 1
count_isPointStart = (shot_data['isPointStart'] == 1).sum()

# Count where isPointEnd = 1
count_isPointEnd = (shot_data['isPointEnd'] == 1).sum()

print("Total count where isPointStart = 1:", count_isPointStart)
print("Total count where isPointEnd = 1:", count_isPointEnd)

# Output Point/Game Scores where there is consecutive isPointStart with no isPointEnd
def check_isPointStart_in_between(shot_data):
    start_indices = shot_data.index[shot_data['isPointStart'] == 1]  # Get indices where isPointStart is True
    end_indices = shot_data.index[shot_data['isPointEnd'] == 1]  # Get indices where isPointEnd is True
    start_without_end = []

    for i in range(len(start_indices) - 1):
        current_start_index = start_indices[i]
        next_start_index = start_indices[i + 1]

        # Get all isPointEnd indices between current_start_index and next_start_index
        end_between_starts = end_indices[(end_indices >= current_start_index) & (end_indices < next_start_index)]

        # Check if there is at least one isPointEnd between current_start_index and next_start_index
        if len(end_between_starts) == 0:
            start_without_end.append(current_start_index)

    # Check the last isPointStart
    last_start_index = start_indices[-1]
    end_after_last_start = end_indices[end_indices >= last_start_index]

    if len(end_after_last_start) == 0:
        start_without_end.append(last_start_index)

    if start_without_end:
        print(" ")
        print("isPointStart without corresponding isPointEnd:")
        for index in start_without_end:
#             print("Index:", index)
#             print("pointScore:", shot_data.at[index, 'pointScore'])
#             print("gameScore:", shot_data.at[index, 'gameScore'])
#             print("setScore:", shot_data.at[index, 'setScore'])
            print(" ")
            print(shot_data.loc[start_without_end][['pointScore', 'gameScore', 'setScore', 'tiebreakScore', 'isPointStart']])
            print(" ")


        return False
    else:
        print("All isPointStart have corresponding isPointEnd.")
        return True

# Check if the condition is met
check_isPointStart_in_between(shot_data)

# Output Point/Game Scores where there is consecutive isPointEnd with no isPointStart
def check_isPointEnd_in_between(shot_data):
    start_indices = shot_data.index[shot_data['isPointStart'] == 1]  # Get indices where isPointStart is True
    end_indices = shot_data.index[shot_data['isPointEnd'] == 1]  # Get indices where isPointEnd is True
    end_without_start = []

    for i in range(len(end_indices) - 1):
        current_end_index = end_indices[i]
        next_end_index = end_indices[i + 1]

        # Get all isPointStart indices b/w current_end_index and next_end_index
        start_between_ends = start_indices[(start_indices > current_end_index) & (start_indices <= next_end_index)]

        # Check if there is at least one isPointStart between current_end_index and next_end_index
        if len(start_between_ends) == 0:
            end_without_start.append(current_end_index)

    # NOTE: This will always append the last index of the sheet because the last end will never have a corresponding start so I'm not sure if its necessary to add it to the end_without_start array
    # Check the last isPointStart
    """
    last_end_index = end_indices[-1]
    start_after_last_end = start_indices[start_indices >= last_end_index]

    if len(start_after_last_end) == 0:
        end_without_start.append(last_end_index)
    """

    if end_without_start:
        print("isPointEnd without corresponding isPointStart:")
#         for index in end_without_start:
#             print("Index:", index)
#             print("pointScore:", shot_data.at[index, 'pointScore'])
#             print("gameScore:", shot_data.at[index, 'gameScore'])
#             print("setScore:", shot_data.at[index, 'setScore'])
#             print("setScore:", shot_data.at[index, 'tiebreakScore'])

        # Output the rows of shot_data that correspond to the indices in end_without_start
        print(" ")
        print(shot_data.loc[end_without_start][['pointScore', 'gameScore', 'setScore', 'tiebreakScore', 'isPointEnd']])

        return False
    else:
        print("All isPointEnd have corresponding isPointStart.")
        return True

# Check if the condition is met
check_isPointEnd_in_between(shot_data)

Total count where isPointStart = 1: 75
Total count where isPointEnd = 1: 75
All isPointStart have corresponding isPointEnd.
All isPointEnd have corresponding isPointStart.


True

## Error Check 14:
#### Change the score from incorrect date format
- accounts for all variations of dates eg. (0-00-0000, 0/0/0000)

In [None]:
## Error Check 13:
#### Make Jan-00 back into 1-0 for Game/Set Score

# Make Scores Strings not Date Time
columns_to_convert = ['gameScore', 'setScore'] #if no tiebreakScore
# columns_to_convert = ['gameScore', 'setScore', 'tiebreakScore']
shot_data[columns_to_convert] = shot_data[columns_to_convert].astype(object)

In [None]:
import re

# Define a mapping for month abbreviations
month_mapping = {'Jan': '1', 'Feb': '2', 'Mar': '3', 'Apr': '4', 'May': '5', 'Jun': '6',
                 'Jul': '7', 'Aug': '8', 'Sep': '9', 'Oct': '10', 'Nov': '11', 'Dec': '12'}

# Function to convert string like 'Jan-00' to '1-0'
def convert_score_string(score_str):
    # Check if the input is a string
    if not isinstance(score_str, str):
        return score_str
    # Check if the string has a month abbreviation and a year ending with '00'
    if re.match(r'^\d{1,2}-[A-Za-z]{3}$', score_str):
        # Extract year and month abbreviation
        year, month = score_str.split('-')

        # Remove leading zeros from the year
        year = str(int(year))

        # Replace month abbreviation with corresponding number
        month_number = month_mapping.get(month, month)

        # Concatenate the parts to form the transformed string
        transformed_str = f'{year}-{month_number}'
        return transformed_str

    # Check if the string has a month abbreviation and a year with leading '0's
    elif re.match(r'^[A-Za-z]{3}-\d{1,2}$', score_str):
        # Extract month abbreviation and year
        month, year = score_str.split('-')

        # Replace month abbreviation with corresponding number
        month_number = month_mapping.get(month, month)

        # Remove leading zeros from the year
        year = str(int(year))

        # Concatenate the parts to form the transformed string
        transformed_str = f'{month_number}-{year}'
        return transformed_str

    # Check if the string has a date in the format 'month/day/year'
    elif re.match(r'^\d{1,2}/\d{1,2}/\d{4}$', score_str):
        # Extract month, day, and year
        month, day, year = score_str.split('/')

        # Remove leading zeros from month and day
        month = str(int(month))
        day = str(int(day))

        # Concatenate the parts to form the transformed string
        transformed_str = f'{month}-{day}'
        return transformed_str

    # Check if the string has a date in the format 'month-day-year'
    elif re.match(r'^\d{1,2}-\d{1,2}-\d{4}$', score_str):
        # Extract month, day, and year
        month, day, year = score_str.split('-')

        # Remove leading zeros from month and day
        month = str(int(month))
        day = str(int(day))

        # Concatenate the parts to form the transformed string
        transformed_str = f'{month}-{day}'
        return transformed_str

    return score_str

# Apply the conversion function to the relevant columns in shot_data
shot_data['gameScore'] = shot_data['gameScore'].apply(convert_score_string)
shot_data['setScore'] = shot_data['setScore'].apply(convert_score_string)
# shot_data['tiebreakerScore'] = shot_data['tiebreakerScore'].apply(convert_score_string)


## Error Check 15:
#### Check that there is no strings in set, game scores.

In [None]:
# Assuming shot_data is your DataFrame
unique_set_scores = shot_data['setScore'].unique()
unique_game_scores = shot_data['gameScore'].unique()

# Print unique values
print("Unique Set Scores:", unique_set_scores)
print("Unique Game Scores:", unique_game_scores)


Unique Set Scores: ['0-0']
Unique Game Scores: ['0-0' '1-0' '1-1' '2-1' '1-2' '2-2' '2-3' '3-3' '3-4' '4-4' '4-5' '5-5'
 '5-6' '6-6']


## Error Check 16 :
### Print out all the rows where isPointStart time doesn't start in the beginning of a rally

In [None]:
# Filter the DataFrame
filtered_rows = shot_data[(shot_data['isPointStart'] == 1) & (shot_data['shotInRally'] != 1)]

# Print the filtered rows
if filtered_rows.empty:
    print ("Check passed.")
else:
    print("Rows where isPointStart = 1 and shotInRally != 1:")
    print(filtered_rows)


Check passed.


## Error Check 17:
### Find the rows where isPointEnd = 1 and shotInRally = 1 but is neither an ace or a double fault

In [None]:
# Filter the DataFrame
filtered_rows = shot_data[
    (shot_data['isPointEnd'] == 1) &
    (shot_data['shotInRally'] == 1) &
    (shot_data['firstServeIn'] != 0) &
    (shot_data['secondServeIn'] != 0) &
    (shot_data['isAce'] != 1)
]

# Print the filtered rows
if filtered_rows.empty:
    print ("Check passed.")
else:
    print("Rows where isPointEnd = 1, shotInRally = 1, firstServeIn and secondServeIn are both not 0, and isAce != 1:")
    print(filtered_rows)

Rows where isPointEnd = 1, shotInRally = 1, firstServeIn and secondServeIn are both not 0, and isAce != 1:
    pointScore gameScore setScore tiebreakScore  isPointStart  pointStartTime  \
72        0-30       2-2      0-0           NaN           1.0             NaN   
75       30-30       2-2      0-0           NaN           1.0             NaN   
76       40-30       2-2      0-0           NaN           1.0             NaN   
134       15-0       4-4      0-0           NaN           1.0             NaN   

     isPointEnd  pointEndTime  pointNumber  isBreakPoint  ...  \
72          1.0           NaN           23           NaN  ...   
75          1.0           NaN           25           NaN  ...   
76          1.0           NaN           26           NaN  ...   
134         1.0           NaN           42           NaN  ...   

     teamAPlayer1Hand teamAPlayer2Hand Data Error  teamBPlayer1  \
72               Left            Right        NaN      Karl Lee   
75               Left      

## Error Check 18:
### Find all the rows where there are duplicate isPointStarts == 1 points
- Finds all the points where the there are the same score and isPointStart == 1

In [None]:
# Filter the DataFrame
filtered_rows = shot_data[shot_data['isPointStart'] == 1]

# Output rows where isPointStart is 1 and pointScore, gameScore, and setScore have the same value
output_rows = filtered_rows[filtered_rows.duplicated(subset=['pointScore', 'gameScore', 'setScore', 'tiebreakScore'], keep=False)]

# Print the output rows
if output_rows.empty:
    print ("Check passed.")
else:
    print("Rows where isPointStart is 1 and pointScore, gameScore, and setScore have the same value:")
    print(output_rows[['pointScore', 'gameScore', 'setScore', 'tiebreakScore', 'isPointStart']])



Check passed.


## Error Check 19:
### Find all the rows where there are duplicate isPointEnd == 1 points
- Finds all the points where the there are the same score and isPointEnd == 1

In [None]:
# Filter the DataFrame
filtered_rows = shot_data[shot_data['isPointEnd'] == 1]

# Output rows where isPointStart is 1 and pointScore, gameScore, and setScore have the same value
output_rows = filtered_rows[filtered_rows.duplicated(subset=['pointScore', 'gameScore', 'setScore', 'tiebreakScore'], keep=False)]

# Print the output rows
if output_rows.empty:
    print ("Check passed.")
else:
    print("Rows where isPointEnd is 1 and pointScore, gameScore, and setScore have the same value:")
    print(output_rows[['pointScore', 'gameScore', 'setScore', 'tiebreakScore', 'isPointStart']])


Check passed.


## Error Check 20:
### Find all the rows where there is isPointEnd == 1 but there is no isWinner, isErrorWideL, isErrorWideR, isErrorNet, isErrorLong


In [None]:
filtered_rows = shot_data[(shot_data['isPointEnd'] == 1) &
                          (shot_data['isWinner'] != 1) &
                          (shot_data['isErrorWideL'] != 1) &
                          (shot_data['isErrorWideR'] != 1) &
                          (shot_data['isErrorNet'] != 1) &
                          (shot_data['isErrorLong'] != 1) &
                          (shot_data['firstServeIn'] != 0) &
                          (shot_data['secondServeIn'] != 0)]

if filtered_rows.empty:
    print ("Check passed.")
else:
    print("Error: isPointEnd == 1 but there is no point end shot")
    print(filtered_rows[['pointScore', 'gameScore', 'setScore','isPointStart', 'isPointEnd', 'isWinner', 'isErrorWideL', 'isErrorWideR', 'isErrorNet', 'isErrorLong']])


Check passed.


## Error Check 21:
### Check if shotHitBy column exists. If it does replace empty values in shotHitBy column with string "NA"


In [None]:
if 'shotHitBy' in shot_data.columns:
    print("shotHitBy column exists. replacing empty values with NA ...")
    # replace empty string and NaN values in shotHitBy column with string "NA"
    shot_data['shotHitBy'] = shot_data['shotHitBy'].replace('', 'NA').fillna('NA')
else:
    print("shotHitBy column doesn't exist.")

shotHitBy column exists. replacing empty values with NA ...


## Error Check 22:
### Check if there are any "NA"s in the shotHitBy column. Replace "NA" values in shotHitBy that has a corresponding shotInRally = 1 with the serverName. Print out remaining unresolved shotHitBy "NA" rows.

In [None]:
# replace shotHitBy "NA" values whose shotInRally = 1 with the serverName for that row
shot_data.loc[shot_data['shotInRally'] == 1, 'shotHitBy'] = shot_data['serverName']
# unresolved shotHitBy "NA" rows
shot_hitby_nas = shot_data[shot_data['shotHitBy'] == "NA"]

if (len(shot_hitby_nas) == 0):
    print("NA shotHitBy error check passed successfully")
else:
    print("unresolved NA's in shotHitBy column")
    print(shot_hitby_nas)

NA shotHitBy error check passed successfully


## Error Check 23:
### Check that the shotHitBy rows alternate with a member of each team. Print out rows that have consecutive shotHitBy when shotInRally is increasing.


In [None]:
shothitby_repeat = []

for i in range(len(shot_data) - 1):
    row = shot_data.iloc[i]
    row_next = shot_data.iloc[i + 1]

    # filters out cases where rows may have consecutive shotHitBy's because it goes from the end of a rally to a serve.
    if row_next['shotInRally'] > row['shotInRally']:
        if row_next['shotHitBy'] == row['shotHitBy']:
            shothitby_repeat.append(row)
            shothitby_repeat.append(row_next)

if len(shothitby_repeat) == 0:
    print("alternating shotHitBy error check passed successfully")
else:
    print("shotHitBy rows not alternating with a team member of each team (team A or team B)")
    print(shothitby_repeat)

shotHitBy rows not alternating with a team member of each team (team A or team B)
[pointScore          40-15
gameScore             4-4
setScore              0-0
tiebreakScore         NaN
isPointStart          1.0
                    ...  
teamBPlayer1Hand    Right
teamBPlayer2Hand    Right
Round                 NaN
Surface               NaN
Notes                 NaN
Name: 146, Length: 70, dtype: object, pointScore          40-15
gameScore             4-4
setScore              0-0
tiebreakScore         NaN
isPointStart          NaN
                    ...  
teamBPlayer1Hand    Right
teamBPlayer2Hand    Right
Round                 NaN
Surface               NaN
Notes                 NaN
Name: 147, Length: 70, dtype: object]


# Add More Columns

In [None]:
# Ensure there are no out-of-bounds errors
for index in range(len(shot_data) - 1):
    if shot_data.loc[index, 'firstServeIn'] == 1 or shot_data.loc[index, 'secondServeIn'] == 1:
        shot_data.loc[index, 'returnerName'] = shot_data.loc[index + 1, 'shotHitBy']


shot_data['returnerName'] = shot_data['returnerName'].ffill()
# Display the first few rows of the updated DataFrame to verify the changes
shot_data.head()


Unnamed: 0,pointScore,gameScore,setScore,tiebreakScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,...,teamAPlayer2Hand,Data Error,teamBPlayer1,teamBPlayer2,teamBPlayer1Hand,teamBPlayer2Hand,Round,Surface,Notes,returnerName
0,0-0,0-0,0-0,,1.0,,,,1,,...,Right,,Karl Lee,Samuel Rubell,Right,Right,,Hard,,Karl Lee
1,0-0,0-0,0-0,,,,1.0,,1,,...,Right,,Karl Lee,Samuel Rubell,Right,Right,,,,Karl Lee
2,15-0,0-0,0-0,,1.0,,,,2,,...,Right,,Karl Lee,Samuel Rubell,Right,Right,,,,Samuel Rubell
3,15-0,0-0,0-0,,,,,,2,,...,Right,,Karl Lee,Samuel Rubell,Right,Right,,,,Samuel Rubell
4,15-0,0-0,0-0,,,,1.0,,2,,...,Right,,Karl Lee,Samuel Rubell,Right,Right,,,,Samuel Rubell


### Add Column: isInsideOut, isInsideIn

In [None]:
# Add columns for isInsideOut and isInsideIn, initially set to 0
shot_data['isInsideOut'] = None
shot_data['isInsideIn'] = None

# Iterate through rows
for index, row in shot_data.iterrows():
    shotHitBy = row['shotHitBy']
    player_hand = (
    row['teamAPlayer1Hand'] if shotHitBy == row['teamAPlayer1'] else
    row['teamAPlayer2Hand'] if shotHitBy == row['teamAPlayer2'] else
    row['teamBPlayer1Hand'] if shotHitBy == row['teamBPlayer1'] else
    row['teamBPlayer2Hand']
    )


    if player_hand == "Right":
        if row['side'] == "Deuce" and row['shotFhBh'] == "Backhand" and row['shotDirection'] == "Crosscourt":
            shot_data.at[index, 'isInsideOut'] = 1
        elif row['side'] == "Ad" and row['shotFhBh'] == "Forehand" and row['shotDirection'] == "Crosscourt":
            shot_data.at[index, 'isInsideOut'] = 1
        elif row['side'] == "Deuce" and row['shotFhBh'] == "Backhand" and row['shotDirection'] == "Down the Line":
            shot_data.at[index, 'isInsideIn'] = 1
        elif row['side'] == "Ad" and row['shotFhBh'] == "Forehand" and row['shotDirection'] == "Down the Line":
            shot_data.at[index, 'isInsideIn'] = 1
    elif player_hand == "Left":
        if row['side'] == "Ad" and row['shotFhBh'] == "Backhand" and row['shotDirection'] == "Crosscourt":
            shot_data.at[index, 'isInsideOut'] = 1
        elif row['side'] == "Deuce" and row['shotFhBh'] == "Forehand" and row['shotDirection'] == "Crosscourt":
            shot_data.at[index, 'isInsideOut'] = 1
        elif row['side'] == "Ad" and row['shotFhBh'] == "Backhand" and row['shotDirection'] == "Down the Line":
            shot_data.at[index, 'isInsideIn'] = 1
        elif row['side'] == "Deuce" and row['shotFhBh'] == "Forehand" and row['shotDirection'] == "Down the Line":
            shot_data.at[index, 'isInsideIn'] = 1

### Add Column: isAce

In [None]:
# Add the Ace column
shot_data['isAce'] = None

for index, row in shot_data.iterrows():
    if row['isPointEnd'] == 1:
        if row['shotInRally'] == 1: # last point is serve
            if (row['firstServeIn'] == 1 or row['secondServeIn'] == 1): # either first or second serve went in
                shot_data.at[index, 'isAce'] = 1

### Add Column: isDoubleFault

In [None]:
# Add the DoubleFault column
shot_data['isDoubleFault'] = None

for index, row in shot_data.iterrows():
    if row['isPointEnd'] == 1:
        if row['shotInRally'] == 1: # last point is serve
            if (row['firstServeIn'] != 1 and row['secondServeIn'] != 1): # either first or second serve went in
                shot_data.at[index, 'isDoubleFault'] = 1

### Add Column: pointWonBy, lastShotError

In [None]:
# Add the 'pointWonBy' column
shot_data['pointWonBy'] = None

# Add the 'lastShotError' column
shot_data['lastShotError'] = 0

for index, row in shot_data.iterrows():
    if row['isPointEnd'] == 1:
        if row['shotInRally'] == 1: # last point is serve
            if row['isAce'] == 1:
                shot_data.at[index, 'pointWonBy'] = row['serverName']
            elif row['isDoubleFault'] == 1:
                shot_data.at[index, 'pointWonBy'] = row['returnerName']


        elif row['shotInRally'] != 1:
            if row['isErrorWideR'] == 1 or row['isErrorWideL'] == 1 or row['isErrorNet'] == 1 or row['isErrorLong'] == 1: # if error
                shot_data.at[index, 'lastShotError'] = 1
                # point won by person who hit ball 2nd to last
                shot_data.at[index, 'pointWonBy'] = shot_data.at[index-1, 'shotHitBy']



            elif row['isWinner'] == 1:
              # point won by person who hit the ball last
                shot_data.at[index, 'pointWonBy'] = shot_data.at[index, 'shotHitBy']


## Error Check 21:
#### Output Point End with no pointWonBy


In [None]:
# Missing Point End
print(shot_data[(shot_data['isPointEnd'] == 1) & (shot_data['pointWonBy'].isnull())][['pointScore', 'gameScore','setScore', 'lastShotError', 'isWinner', 'isErrorWideR', 'isErrorWideL',
       'isErrorNet', 'isErrorLong', 'pointWonBy', 'serverName', 'shotInRally']])


Empty DataFrame
Columns: [pointScore, gameScore, setScore, lastShotError, isWinner, isErrorWideR, isErrorWideL, isErrorNet, isErrorLong, pointWonBy, serverName, shotInRally]
Index: []


### Backward Fill: PointWonBy

In [None]:
shot_data['pointWonBy'].bfill()

0          Spencer Johnson
1          Spencer Johnson
2      Emon van Loben Sels
3      Emon van Loben Sels
4      Emon van Loben Sels
              ...         
272        Spencer Johnson
273        Spencer Johnson
274        Spencer Johnson
275        Spencer Johnson
276        Spencer Johnson
Name: pointWonBy, Length: 277, dtype: object

### Add Column: serveResult, serveInPlacement

In [None]:
conditions = [
    (shot_data['isPointStart'] == 1) & (shot_data['firstServeIn'] == 1),
    (shot_data['isPointStart'] == 1) & (shot_data['firstServeIn'] != 1) & (shot_data['secondServeIn'] == 1),
    (shot_data['isPointStart'] == 1) & (shot_data['firstServeIn'] != 1) & (shot_data['secondServeIn'] != 1)
]

# Define the values to be assigned for each condition
values_result = ['1st Serve In', '2nd Serve In', 'Double Fault']
values_placement = [shot_data['firstServeZone'], shot_data['secondServeZone'], np.nan]

# Use numpy.select to assign values based on conditions
shot_data['serveResult'] = np.select(conditions, values_result, default='')
shot_data['serveInPlacement'] = np.select(conditions, values_placement, default='')

# Now shot_data DataFrame is updated with serveResult and serveInPlacement values where isPointStart is 1
# print(shot_data[['serveResult', 'serveInPlacement','pointScore','side']].head(60))

In [None]:
shot_data.replace("", None, inplace=True)

# Double Check BEFORE Outputting

In [None]:
print(shot_data.head(10))

  pointScore gameScore setScore tiebreakScore  isPointStart  pointStartTime  \
0        0-0       0-0      0-0           NaN           1.0             NaN   
1        0-0       0-0      0-0           NaN           NaN             NaN   
2       15-0       0-0      0-0           NaN           1.0             NaN   
3       15-0       0-0      0-0           NaN           NaN             NaN   
4       15-0       0-0      0-0           NaN           NaN             NaN   
5       30-0       0-0      0-0           NaN           1.0             NaN   
6       30-0       0-0      0-0           NaN           NaN             NaN   
7       30-0       0-0      0-0           NaN           NaN             NaN   
8       30-0       0-0      0-0           NaN           NaN             NaN   
9       40-0       0-0      0-0           NaN           1.0             NaN   

   isPointEnd  pointEndTime  pointNumber  isBreakPoint  ...  Surface Notes  \
0         NaN           NaN            1           N

In [None]:
shot_data.columns

Index(['pointScore', 'gameScore', 'setScore', 'tiebreakScore', 'isPointStart',
       'pointStartTime', 'isPointEnd', 'pointEndTime', 'pointNumber',
       'isBreakPoint', 'shotInRally', 'side', 'serverName', 'firstServeIn',
       'firstServeZone', 'firstServeXCoord', 'firstServeYCoord',
       'secondServeIn', 'secondServeZone', 'secondServeXCoord',
       'secondServeYCoord', 'isAce', 'returnContactX', 'returnContactY',
       'returnPlacementX', 'returnPlacementY', 'shotContactX', 'shotContactY',
       'shotDirection', 'shotFhBh', 'isSlice', 'isVolley', 'isOverhead',
       'isApproach', 'isDropshot', 'isPoach', 'bothAtNetTeamA',
       'bothAtNetTeamB', 'isFakeTeamA', 'isFakeTeamB', 'serveFormation',
       'returnFormation', 'shotLocationX', 'shotLocationY', 'isWinner',
       'isErrorWideR', 'isErrorWideL', 'isErrorNet', 'isErrorLong', 'isLob',
       'shotHitBy', 'teamA', 'teamB', 'Date', 'Division', 'Event',
       'lineupPosition', 'matchVenue', 'teamAPlayer1', 'teamAPlayer2

In [None]:
print(shot_data.tail(10))

    pointScore gameScore setScore tiebreakScore  isPointStart  pointStartTime  \
267        NaN       6-6      0-0         3-Jun           NaN             NaN   
268        NaN       6-6      0-0         4-Jun           1.0             NaN   
269        NaN       6-6      0-0         4-Jun           NaN             NaN   
270        NaN       6-6      0-0         4-Jun           NaN             NaN   
271        NaN       6-6      0-0         4-Jun           NaN             NaN   
272        NaN       6-6      0-0         4-Jun           NaN             NaN   
273        NaN       6-6      0-0         4-Jun           NaN             NaN   
274        NaN       6-6      0-0         4-Jun           NaN             NaN   
275        NaN       6-6      0-0         4-Jun           NaN             NaN   
276        NaN       6-6      0-0         4-Jun           NaN             NaN   

     isPointEnd  pointEndTime  pointNumber  isBreakPoint  ...  Surface Notes  \
267         1.0           Na

# Output ShotCSV

In [None]:
# # # Ouput Improved Shot Csv HERE
# Assuming shot_data is your DataFrame and the names are from the first row
teamAPlayer1NameNoSpace = shot_data.iloc[0]['teamAPlayer1'].replace(" ", "")
teamAPlayer2NameNoSpace = shot_data.iloc[0]['teamAPlayer2'].replace(" ", "")
teamBPlayer1NameNoSpace = shot_data.iloc[0]['teamBPlayer1'].replace(" ", "")
teamBPlayer2NameNoSpace = shot_data.iloc[0]['teamBPlayer2'].replace(" ", "")

# Save DataFrame to CSV file with modified player names
shot_data.to_csv(f'Shot_Visuals_{teamAPlayer1NameNoSpace}_{teamAPlayer2NameNoSpace}_{teamBPlayer1NameNoSpace}_{teamBPlayer2NameNoSpace}.csv', index=False)


In [None]:
shot_data['pointNumber'].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
       69, 70, 71, 72, 73, 74, 75])

# Below is for Point CSV

### Create Point DF

In [None]:
# Creating point_df (with only 1 row for each pointNumber)
point_df = shot_data.drop_duplicates(subset='pointNumber')[['pointNumber']]
# point_df.shape

In [None]:
shot_data.head()

Unnamed: 0,pointScore,gameScore,setScore,tiebreakScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,...,Surface,Notes,returnerName,isInsideOut,isInsideIn,isDoubleFault,pointWonBy,lastShotError,serveResult,serveInPlacement
0,0-0,0-0,0-0,,1.0,,,,1,,...,Hard,,Karl Lee,,,,,0,1st Serve In,T
1,0-0,0-0,0-0,,,,1.0,,1,,...,,,Karl Lee,,1.0,,Spencer Johnson,1,,
2,15-0,0-0,0-0,,1.0,,,,2,,...,,,Samuel Rubell,,,,,0,2nd Serve In,Body
3,15-0,0-0,0-0,,,,,,2,,...,,,Samuel Rubell,,,,,0,,
4,15-0,0-0,0-0,,,,1.0,,2,,...,,,Samuel Rubell,1.0,,,Emon van Loben Sels,0,,


### Add Column: teamAplayer1Name, teamAplayer2Name, teamBplayer1Name, teamBplayer2Name
### Set Variable: first_teamAplayer1, first_teamAplayer, first_teamBplayer1, first_teamBplayer

In [None]:
# Extract the first value of teamAPlayer1, teamAPlayer2, teamBPlayer1, and teamBPlayer2 from shot_data
first_teamAPlayer1 = shot_data['teamAPlayer1'].iloc[0]
first_teamAPlayer2 = shot_data['teamAPlayer2'].iloc[0]
first_teamBPlayer1 = shot_data['teamBPlayer1'].iloc[0]
first_teamBPlayer2 = shot_data['teamBPlayer2'].iloc[0]

# Fill in the first value into all rows of point_df for the respective columns
point_df['teamAPlayer1'] = first_teamAPlayer1
point_df['teamAPlayer2'] = first_teamAPlayer2
point_df['teamBPlayer1'] = first_teamBPlayer1
point_df['teamBPlayer2'] = first_teamBPlayer2


### Add Column: Scores

In [None]:
point_df['pointScore'] = shot_data.groupby('pointNumber')['pointScore'].first().values
point_df['gameScore'] = shot_data.groupby('pointNumber')['gameScore'].first().values
point_df['setScore'] = shot_data.groupby('pointNumber')['setScore'].first().values
point_df['tiebreakScore'] = shot_data.groupby('pointNumber')['tiebreakScore'].first().values

### Add Column: Side

In [None]:
# Group shot_data by 'pointNumber' and get the first 'side' value for each group
side_values = shot_data.groupby('pointNumber')['side'].first().reset_index()

point_df['side'] = side_values['side'].values

### Add Column: serverName, returnerName

---



In [None]:
# Adds Server and Returner Names and pointScore

point_df['serverName'] = shot_data.groupby('pointNumber')['serverName'].first().values
point_df['returnerName'] = shot_data.groupby('pointNumber')['returnerName'].first().values


In [None]:
client_team_value = shot_data.loc[0, 'teamA']
opponent_team_value = shot_data.loc[0, 'teamB']

# Adding 'clientTeam' and 'opponentTeam' columns to point_df
point_df['clientTeam'] = client_team_value
point_df['opponentTeam'] = opponent_team_value


## Warning: Will be Empty if Timestamp separate

In [None]:
# Add Start and End times per point

for index, row in shot_data.iterrows():
    point_number = row['pointNumber']

    if row['isPointStart'] == 1:
        point_df.loc[point_df['pointNumber'] == point_number, 'Position'] = row['pointStartTime']
    if row['isPointEnd'] == 1:
        point_df.loc[point_df['pointNumber'] == point_number, 'pointEndPosition'] = row['pointEndTime']

# Add Duration
point_df['Duration'] = point_df['pointEndPosition'] - point_df['Position']

### Add Column: Rally Column
- rallyCount (x amount)
- rallyCountFreq (1-4,5-8,9-12,13+)

In [None]:
# Find the highest shotInRally for each pointNumber in shot_data
max_rally_per_point = shot_data.groupby('pointNumber')['shotInRally'].max().reset_index()
point_df['rallyCount'] = list(max_rally_per_point['shotInRally'])

# Add 'rallyCountFreq' column based on specified conditions
point_df['rallyCountFreq'] = point_df['rallyCount'].apply(lambda x: '1 - 4' if 1 <= x <= 4 else ('5 - 8' if 5 <= x <= 8 else ('9 - 12' if 9 <= x <= 12 else ('13 +' if x >= 13 else 'Error'))))


### Add Column: Serve
- firstServeIn
- secondServeIn
- serveResult
- serveInPlacement

Part 2
- serveResult
- serveInPlacement

Part 3
- firstServeZone
- secondServeZone

In [None]:
# Add firstServeIn and secondServeIn

# Add firstServeIn and secondServeIn columns
point_df['firstServeIn'] = 0
point_df['secondServeIn'] = 0

for point_number in shot_data['pointNumber'].unique():
    # Check if firstServeIn is 1 for the given pointNumber in shot_data
    if any((shot_data['pointNumber'] == point_number) & (shot_data['firstServeIn'] == 1)):
        point_df.loc[point_df['pointNumber'] == point_number, 'firstServeIn'] = 1

    # Check if secondServeIn is 1 for the given pointNumber in shot_data
    if any((shot_data['pointNumber'] == point_number) & (shot_data['secondServeIn'] == 1)):
        point_df.loc[point_df['pointNumber'] == point_number, 'secondServeIn'] = 1

#part 2
start_points = shot_data[shot_data['isPointStart'] == 1]

# Set values in point_df to corresponding values from start_points
point_df['serveResult'] = start_points['serveResult'].values
point_df['serveInPlacement'] = start_points['serveInPlacement'].values

# part 3
serve_zones = shot_data.loc[shot_data['shotInRally'] == 1, ['pointNumber', 'firstServeZone', 'secondServeZone', 'firstServeIn', 'secondServeIn']].drop_duplicates()
point_df['firstServeZone'] = shot_data.groupby('pointNumber')['firstServeZone'].first().values
point_df['secondServeZone'] = shot_data.groupby('pointNumber')['secondServeZone'].first().values

### Add Column: Ace

In [None]:
point_df['isAce'] = ((point_df['rallyCount'] == 1) & ((point_df['serveResult'] != "Double Fault")))

# Display the resulting DataFrame
print(point_df[['pointNumber', 'rallyCount', 'isAce']].head(14))

    pointNumber  rallyCount  isAce
0             1           2  False
2             2           3  False
5             3           4  False
9             4           3  False
12            5           2  False
14            6           5  False
19            7           5  False
24            8           2  False
26            9           4  False
30           10           5  False
35           11           3  False
38           12           1  False
39           13           3  False
42           14           3  False


### Add Column: Return

In [None]:
# print(shot_data[shot_data['shotInRally'] == 2][['shotDirection','shotFhBh','pointScore','gameScore','setScore']])

In [None]:
# Set the initial values of 'returnDirection' and 'returnHand' columns to None
point_df['returnDirection'] = None
point_df['returnFhBh'] = None

# Iterate through pointNumber in shot_data
for point_number in shot_data['pointNumber'].unique():
    # Check if shotInRally == 2 exists for the given pointNumber
    if 2 in shot_data.loc[shot_data['pointNumber'] == point_number, 'shotInRally'].values:
        # Get the information from the corresponding row
        row_with_return_info = shot_data[(shot_data['pointNumber'] == point_number) & (shot_data['shotInRally'] == 2)].iloc[0]

        # Assign values to 'returnDirection' and 'returnHand' columns
        point_df.loc[point_df['pointNumber'] == point_number, 'returnDirection'] = row_with_return_info['shotDirection']
        point_df.loc[point_df['pointNumber'] == point_number, 'returnFhBh'] = row_with_return_info['shotFhBh']

# Display the modified DataFrame
print(point_df.head(10))

    pointNumber     teamAPlayer1         teamAPlayer2 teamBPlayer1  \
0             1  Spencer Johnson  Emon van Loben Sels     Karl Lee   
2             2  Spencer Johnson  Emon van Loben Sels     Karl Lee   
5             3  Spencer Johnson  Emon van Loben Sels     Karl Lee   
9             4  Spencer Johnson  Emon van Loben Sels     Karl Lee   
12            5  Spencer Johnson  Emon van Loben Sels     Karl Lee   
14            6  Spencer Johnson  Emon van Loben Sels     Karl Lee   
19            7  Spencer Johnson  Emon van Loben Sels     Karl Lee   
24            8  Spencer Johnson  Emon van Loben Sels     Karl Lee   
26            9  Spencer Johnson  Emon van Loben Sels     Karl Lee   
30           10  Spencer Johnson  Emon van Loben Sels     Karl Lee   

     teamBPlayer2 pointScore gameScore setScore tiebreakScore   side  ...  \
0   Samuel Rubell        0-0       0-0      0-0          None  Deuce  ...   
2   Samuel Rubell       15-0       0-0      0-0          None     Ad  ...  

### Add Column: Error Column
- errorType

In [None]:
# Create an empty DataFrame to store the results
error_results = pd.DataFrame(columns=['errorType', 'pointNumber'])

# Iterate through entire shot_data
for index, row in shot_data.iterrows():
    pointNumber = row['pointNumber']
    point_error_value = None

    if row['isErrorWideR'] == 1:
        point_error_value = 'Wide Right'
    elif row['isErrorWideL'] == 1:
        point_error_value = 'Wide Left'
    elif 'isErrorNet' in row and row['isErrorNet'] == 1:
        point_error_value = 'Net'
    elif row['isErrorLong'] == 1:
        point_error_value = 'Long'


    # If an error is found, append the result to the error_results DataFrame
    if point_error_value is not None:
        error_results = pd.concat([error_results, pd.DataFrame({'pointNumber': [pointNumber], 'errorType': [point_error_value]})], ignore_index=True)


# Drop duplicates based on 'pointNumber'
error_results = error_results.drop_duplicates(subset=['pointNumber'])

In [None]:
# Create a dictionary mapping 'pointNumber' to 'errorType' in error_results
error_type_mapping = dict(zip(error_results['pointNumber'], error_results['errorType']))

# Create 'errorType' column in point_df based on the mapping
point_df['errorType'] = point_df['pointNumber'].map(error_type_mapping)

point_df = point_df.replace({np.nan: None})

### Add Column: Error (Returns)

In [None]:
def get_return_error(row):
    if row['rallyCount'] == 2:
        return row['errorType']
    else:
        return None

point_df.loc[point_df['pointNumber'] == point_number, 'serveInPlacement'] = shot_data['secondServeZone']

# Apply the functions to create the new columns
point_df['returnError'] = point_df.apply(get_return_error, axis=1)


### Add Column: Last Shot

In [None]:
point_df['lastShotDirection'] = None
point_df['lastShotFhBh'] = None
point_df['lastShotHitBy'] = None
point_df['lastShotResult'] = None

# Iterate through unique pointNumbers in shot_data
for point_number in shot_data['pointNumber'].unique():
    # Check if isPointEnd == 1 exists for the given pointNumber
    if 1 in shot_data.loc[shot_data['pointNumber'] == point_number, 'isPointEnd'].values:
        # Get the information from the corresponding row
        row_with_lastshot_info = shot_data[(shot_data['pointNumber'] == point_number) & (shot_data['isPointEnd'] == 1)].iloc[0]

        # Assign values to 'lastShotDirection' and 'lastShotFhBh' columns
        point_df.loc[point_df['pointNumber'] == point_number, 'lastShotDirection'] = row_with_lastshot_info['shotDirection']
        point_df.loc[point_df['pointNumber'] == point_number, 'lastShotFhBh'] = row_with_lastshot_info['shotFhBh']
        point_df.loc[point_df['pointNumber'] == point_number, 'lastShotHitBy'] = row_with_lastshot_info['shotHitBy']

        # Determine lastShotResult based on conditions
        if row_with_lastshot_info['isWinner'] == 1 and not row_with_lastshot_info['isAce']:
            point_df.loc[point_df['pointNumber'] == point_number, 'lastShotResult'] = "Winner"
        elif row_with_lastshot_info['lastShotError'] == 1:
            point_df.loc[point_df['pointNumber'] == point_number, 'lastShotResult'] = "Error"

In [None]:
print(point_df['lastShotResult'].unique())

['Error' 'Winner' None]


### Add Column: pointWonBy

In [None]:
# Initialize variables to keep track of the state
prev_point_number = None
point_won_by_list = []

# Iterate through the DataFrame
for index, row in shot_data.iterrows():
    if row['isPointEnd'] == 1:
        # Check if pointNumber is different and consecutively increasing
        if prev_point_number is None or row['pointNumber'] == prev_point_number + 1:
            # Append pointWonBy to the list
            point_won_by_list.append(row['pointWonBy'])
            prev_point_number = row['pointNumber']
        else:
            raise ValueError("Error: Point numbers are not different or consecutively increasing.")
            break

# Add point_won_by_list as a new column to point_df
point_df['pointWonBy'] = point_won_by_list

## Error Check 22:
#### teamA and teamB player1, player2, serverName, returnerName do not have misspellings

In [None]:
# Print the unique values of teamAPlayer1, teamAPlayer2, teamBPlayer1, and teamBPlayer2
print(point_df['teamAPlayer1'].unique())
print(point_df['teamAPlayer2'].unique())
print(point_df['teamBPlayer1'].unique())
print(point_df['teamBPlayer2'].unique())

print(point_df['serverName'].unique())
print(point_df['returnerName'].unique())

if (len(point_df['serverName'].unique()) != 4):
    raise ValueError('Error: Mispelled Names')

if (len(point_df['returnerName'].unique()) != 4):
    raise ValueError('Error: Mispelled Names')

['Spencer Johnson']
['Emon van Loben Sels']
['Karl Lee']
['Samuel Rubell']
['Spencer Johnson' 'Samuel Rubell' 'Emon van Loben Sels' 'Karl Lee']
['Karl Lee' 'Samuel Rubell' 'Emon van Loben Sels' 'Spencer Johnson']


## Error Check 23:
#### pointWonBy has value that is not one of the two player names
- References: first_teamAplayer1, first_teamAplayer2, first_teamBplayer1, first_teamBplayer2 from above server info in point_df

In [None]:
print(point_df['pointWonBy'].unique())
none_pointWonBy_df = point_df[~point_df['pointWonBy'].isin([first_teamAPlayer1, first_teamAPlayer2, first_teamBPlayer1, first_teamBPlayer2])]

if none_pointWonBy_df.empty:
    print("Check passed.")
else:
    print("Error: Rows where pointWonBy is not by either player!")
    print(none_pointWonBy_df)

['Spencer Johnson' 'Emon van Loben Sels' 'Samuel Rubell' 'Karl Lee']
Check passed.


### Add Column: Break Point

In [None]:
break_point_values = ['0-40', '15-40', '30-40', '40-40']

# Create 'isBreakPoint' column in point_df
point_df['isBreakPoint'] = point_df['pointScore'].isin(break_point_values)

In [None]:
break_point_total = point_df['isBreakPoint'].sum()
print("Total break points:", break_point_total)
point_df[point_df['isBreakPoint'] == 1].head()


Total break points: 4


Unnamed: 0,pointNumber,teamAPlayer1,teamAPlayer2,teamBPlayer1,teamBPlayer2,pointScore,gameScore,setScore,tiebreakScore,side,...,returnDirection,returnFhBh,errorType,returnError,lastShotDirection,lastShotFhBh,lastShotHitBy,lastShotResult,pointWonBy,isBreakPoint
42,14,Spencer Johnson,Emon van Loben Sels,Karl Lee,Samuel Rubell,30-40,1-1,0-0,,Ad,...,Crosscourt,Backhand,Net,,Down the Line,Backhand,Emon van Loben Sels,Error,Samuel Rubell,True
62,20,Spencer Johnson,Emon van Loben Sels,Karl Lee,Samuel Rubell,30-40,2-1,0-0,,Ad,...,Crosscourt,Backhand,Net,,Crosscourt,Backhand,Samuel Rubell,Error,Spencer Johnson,True
203,57,Spencer Johnson,Emon van Loben Sels,Karl Lee,Samuel Rubell,30-40,5-5,0-0,,Ad,...,Crosscourt,Forehand,Long,Long,Crosscourt,Forehand,Samuel Rubell,Error,Emon van Loben Sels,True
205,58,Spencer Johnson,Emon van Loben Sels,Karl Lee,Samuel Rubell,40-40,5-5,0-0,,Deuce,...,Crosscourt,Forehand,Wide Left,,Crosscourt,Backhand,Karl Lee,Error,Emon van Loben Sels,True


## Warning: Do not use if atNetPlayer1 is in shot_data
### Add Column: atNetPlayer1, atNetPlayer2

- if below says atNetPlayer1: Uncomment below Code
- replace later with atNetPlayer1 atNetPlayer2

In [None]:
# Check if 'atNetPlayer1' exists in point_df
if 'bothAtNetTeamA' in shot_data.columns:
    print("Column 'bothAtNetTeamA' exists in point_df.")
else:
    raise ValueError("Column 'bothAtNetTeamA' does not exist in point_df.")


if 'bothAtNetTeamB' in shot_data.columns:
    print("Column 'bothAtNetTeamB' exists in point_df.")
else:
    raise ValueError("Column 'bothAtNetTeamB' does not exist in point_df.")

Column 'bothAtNetTeamA' exists in point_df.
Column 'bothAtNetTeamB' exists in point_df.


In [None]:
# Add atNetPlayer1 and atNetPlayer2 columns
# point_df['bothatNetTeamA'] = 0
# point_df['bothAtNetTeamB'] = 0

for point_number in shot_data['pointNumber'].unique():
    # Check if firstServeIn is 1 for the given pointNumber in shot_data
    if any((shot_data['pointNumber'] == point_number) & (shot_data['bothAtNetTeamA'] == 1)):
        point_df.loc[point_df['pointNumber'] == point_number, 'bothAtNetTeamA'] = 1

    # Check if secondServeIn is 1 for the given pointNumber in shot_data
    if any((shot_data['pointNumber'] == point_number) & (shot_data['bothAtNetTeamB'] == 1)):
        point_df.loc[point_df['pointNumber'] == point_number, 'bothAtNetTeamB'] = 1

In [None]:
point_df['bothAtNetTeamA'].unique()

array([nan,  1.])

### Add Column: setNum

In [None]:
# Extract numbers from 'setScore' and calculate the sum plus 1
point_df['setNum'] = point_df['setScore'].apply(lambda x: sum(int(char) for char in x if char.isdigit()) + 1)

In [None]:
point_df.columns

Index(['pointNumber', 'teamAPlayer1', 'teamAPlayer2', 'teamBPlayer1',
       'teamBPlayer2', 'pointScore', 'gameScore', 'setScore', 'tiebreakScore',
       'side', 'serverName', 'returnerName', 'clientTeam', 'opponentTeam',
       'Position', 'pointEndPosition', 'Duration', 'rallyCount',
       'rallyCountFreq', 'firstServeIn', 'secondServeIn', 'serveResult',
       'serveInPlacement', 'firstServeZone', 'secondServeZone', 'isAce',
       'returnDirection', 'returnFhBh', 'errorType', 'returnError',
       'lastShotDirection', 'lastShotFhBh', 'lastShotHitBy', 'lastShotResult',
       'pointWonBy', 'isBreakPoint', 'bothAtNetTeamB', 'bothAtNetTeamA',
       'setNum'],
      dtype='object')

### Add Column: Game Number, Set Number, Game/Set/Point for each player

In [None]:
point_df[['player1SetScore', 'player2SetScore']] = point_df['setScore'].str.split('-', expand=True)
point_df[['player1GameScore', 'player2GameScore']] = point_df['gameScore'].str.split('-', expand=True)
point_df[['player1PointScore', 'player2PointScore']] = point_df['pointScore'].str.split('-', expand=True)
if not point_df['tiebreakScore'].isnull().all() and not point_df['tiebreakScore'].eq("").all():
    # Perform the operation only when tiebreakScore is not empty
    point_df[['player1TiebreakScore', 'player2TiebreakScore']] = point_df['tiebreakScore'].str.split('-', expand=True)
else:
    # Set player1TiebreakScore and player2TiebreakScore to NaN
    point_df['player1TiebreakScore'] = np.nan
    point_df['player2TiebreakScore'] = np.nan

def calculate_game_number(score):
    return int(score.split('-')[0]) + int(score.split('-')[1])+1

# Apply the function to create the 'gameNumber' column
point_df['gameNumber'] = point_df['gameScore'].apply(calculate_game_number)

## Error Check 24:
#### Output Game Number in order: Should be consecutive increasing. Ex: 1,2,3,4,5,6. End of Set 1. 1,2,3,4,5,6,7,8

In [None]:
game_numbers = point_df['gameNumber'].tolist()

# Initialize variables
seen = set()
prev = None

# Iterate through gameNumber column
for num in game_numbers:
    # If the number is not in seen or it's different from the previous one, print it
    if num not in seen or num != prev:
        print(num, end=', ')
    # If the number is the same as the previous one but not consecutive, print it
    elif num == prev and num not in seen:
        print(num, end=', ')
    # Update seen set and prev variable
    seen.add(num)
    prev = num

1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 

### Add Column: ServeResult for all 4 players

In [None]:
# Add the 'teamAPlayer1ServeResult', 'teamAPlayer2ServeResult', 'teamBPlayer1ServeResult', and 'teamBPlayer2ServeResult' columns
point_df['teamAPlayer1ServeResult'] = None
point_df['teamAPlayer2ServeResult'] = None
point_df['teamBPlayer1ServeResult'] = None
point_df['teamBPlayer2ServeResult'] = None

# Set serve results based on conditions
point_df.loc[point_df['serverName'] == point_df['teamAPlayer1'], 'teamAPlayer1ServeResult'] = point_df['serveResult']
point_df.loc[point_df['serverName'] == point_df['teamAPlayer2'], 'teamAPlayer2ServeResult'] = point_df['serveResult']
point_df.loc[point_df['serverName'] == point_df['teamBPlayer1'], 'teamBPlayer1ServeResult'] = point_df['serveResult']
point_df.loc[point_df['serverName'] == point_df['teamBPlayer2'], 'teamBPlayer2ServeResult'] = point_df['serveResult']


In [None]:
point_df.loc[point_df['isAce'] == True, 'player1ServeResult'] = 'Ace'


### Add Column: ServePlacement for all 4 players

In [None]:
# Add the 'teamAPlayer1ServePlacement', 'teamAPlayer2ServePlacement', 'teamBPlayer1ServePlacement', and 'teamBPlayer2ServePlacement' columns
point_df['teamAPlayer1ServePlacement'] = None
point_df['teamAPlayer2ServePlacement'] = None
point_df['teamBPlayer1ServePlacement'] = None
point_df['teamBPlayer2ServePlacement'] = None

# Set serve placements based on conditions
point_df.loc[point_df['serverName'] == point_df['teamAPlayer1'], 'teamAPlayer1ServePlacement'] = point_df['side'] + ': ' + point_df['serveInPlacement']
point_df.loc[point_df['serverName'] == point_df['teamAPlayer2'], 'teamAPlayer2ServePlacement'] = point_df['side'] + ': ' + point_df['serveInPlacement']
point_df.loc[point_df['serverName'] == point_df['teamBPlayer1'], 'teamBPlayer1ServePlacement'] = point_df['side'] + ': ' + point_df['serveInPlacement']
point_df.loc[point_df['serverName'] == point_df['teamBPlayer2'], 'teamBPlayer2ServePlacement'] = point_df['side'] + ': ' + point_df['serveInPlacement']


### Add Column: ReturnPlacement for all 4 players

In [None]:
# Add the 'teamAPlayer1ReturnPlacement', 'teamAPlayer2ReturnPlacement', 'teamBPlayer1ReturnPlacement', and 'teamBPlayer2ReturnPlacement' columns
point_df['teamAPlayer1ReturnPlacement'] = None
point_df['teamAPlayer2ReturnPlacement'] = None
point_df['teamBPlayer1ReturnPlacement'] = None
point_df['teamBPlayer2ReturnPlacement'] = None

# Set return placements based on conditions
point_df.loc[point_df['returnerName'] == point_df['teamAPlayer1'], 'teamAPlayer1ReturnPlacement'] = point_df['returnDirection']
point_df.loc[point_df['returnerName'] == point_df['teamAPlayer2'], 'teamAPlayer2ReturnPlacement'] = point_df['returnDirection']
point_df.loc[point_df['returnerName'] == point_df['teamBPlayer1'], 'teamBPlayer1ReturnPlacement'] = point_df['returnDirection']
point_df.loc[point_df['returnerName'] == point_df['teamBPlayer2'], 'teamBPlayer2ReturnPlacement'] = point_df['returnDirection']


### Add Column: ReturnFhBh for all 4 players

In [None]:
# Add the 'teamAPlayer1ReturnFhBh', 'teamAPlayer2ReturnFhBh', 'teamBPlayer1ReturnFhBh', and 'teamBPlayer2ReturnFhBh' columns
point_df['teamAPlayer1ReturnFhBh'] = None
point_df['teamAPlayer2ReturnFhBh'] = None
point_df['teamBPlayer1ReturnFhBh'] = None
point_df['teamBPlayer2ReturnFhBh'] = None

# Set return FhBh based on conditions
point_df.loc[point_df['returnerName'] == point_df['teamAPlayer1'], 'teamAPlayer1ReturnFhBh'] = point_df['returnFhBh']
point_df.loc[point_df['returnerName'] == point_df['teamAPlayer2'], 'teamAPlayer2ReturnFhBh'] = point_df['returnFhBh']
point_df.loc[point_df['returnerName'] == point_df['teamBPlayer1'], 'teamBPlayer1ReturnFhBh'] = point_df['returnFhBh']
point_df.loc[point_df['returnerName'] == point_df['teamBPlayer2'], 'teamBPlayer2ReturnFhBh'] = point_df['returnFhBh']


### Add Column: LastShotPlacement for all 4 players

In [None]:
# Add the 'teamAPlayer1LastShotPlacement', 'teamAPlayer2LastShotPlacement', 'teamBPlayer1LastShotPlacement', and 'teamBPlayer2LastShotPlacement' columns
point_df['teamAPlayer1LastShotPlacement'] = None
point_df['teamAPlayer2LastShotPlacement'] = None
point_df['teamBPlayer1LastShotPlacement'] = None
point_df['teamBPlayer2LastShotPlacement'] = None

# Set last shot placement based on conditions
point_df.loc[point_df['lastShotHitBy'] == point_df['teamAPlayer1'], 'teamAPlayer1LastShotPlacement'] = point_df['lastShotDirection']
point_df.loc[point_df['lastShotHitBy'] == point_df['teamAPlayer2'], 'teamAPlayer2LastShotPlacement'] = point_df['lastShotDirection']
point_df.loc[point_df['lastShotHitBy'] == point_df['teamBPlayer1'], 'teamBPlayer1LastShotPlacement'] = point_df['lastShotDirection']
point_df.loc[point_df['lastShotHitBy'] == point_df['teamBPlayer2'], 'teamBPlayer2LastShotPlacement'] = point_df['lastShotDirection']


### Add Column: LastShotFhBh for all 4 players

In [None]:
# Add the 'teamAPlayer1LastShotFhBh', 'teamAPlayer2LastShotFhBh', 'teamBPlayer1LastShotFhBh', and 'teamBPlayer2LastShotFhBh' columns
point_df['teamAPlayer1LastShotFhBh'] = None
point_df['teamAPlayer2LastShotFhBh'] = None
point_df['teamBPlayer1LastShotFhBh'] = None
point_df['teamBPlayer2LastShotFhBh'] = None

# Set last shot FhBh based on conditions
point_df.loc[point_df['lastShotHitBy'] == point_df['teamAPlayer1'], 'teamAPlayer1LastShotFhBh'] = point_df['lastShotFhBh']
point_df.loc[point_df['lastShotHitBy'] == point_df['teamAPlayer2'], 'teamAPlayer2LastShotFhBh'] = point_df['lastShotFhBh']
point_df.loc[point_df['lastShotHitBy'] == point_df['teamBPlayer1'], 'teamBPlayer1LastShotFhBh'] = point_df['lastShotFhBh']
point_df.loc[point_df['lastShotHitBy'] == point_df['teamBPlayer2'], 'teamBPlayer2LastShotFhBh'] = point_df['lastShotFhBh']


### Add Column: LastShotResult for all 4 players

In [None]:
# Add the 'teamAPlayer1LastShotResult', 'teamAPlayer2LastShotResult', 'teamBPlayer1LastShotResult', and 'teamBPlayer2LastShotResult' columns
point_df['teamAPlayer1LastShotResult'] = None
point_df['teamAPlayer2LastShotResult'] = None
point_df['teamBPlayer1LastShotResult'] = None
point_df['teamBPlayer2LastShotResult'] = None

# Set last shot result based on conditions, excluding 'Ace' and 'Double Fault'
point_df.loc[
    (point_df['lastShotHitBy'] == point_df['teamAPlayer1']) &
    ~point_df['teamAPlayer1ServeResult'].isin(['Ace', 'Double Fault']),
    'teamAPlayer1LastShotResult'
] = point_df['lastShotResult']

point_df.loc[
    (point_df['lastShotHitBy'] == point_df['teamAPlayer2']) &
    ~point_df['teamAPlayer2ServeResult'].isin(['Ace', 'Double Fault']),
    'teamAPlayer2LastShotResult'
] = point_df['lastShotResult']

point_df.loc[
    (point_df['lastShotHitBy'] == point_df['teamBPlayer1']) &
    ~point_df['teamBPlayer1ServeResult'].isin(['Ace', 'Double Fault']),
    'teamBPlayer1LastShotResult'
] = point_df['lastShotResult']

point_df.loc[
    (point_df['lastShotHitBy'] == point_df['teamBPlayer2']) &
    ~point_df['teamBPlayer2ServeResult'].isin(['Ace', 'Double Fault']),
    'teamBPlayer2LastShotResult'
] = point_df['lastShotResult']


## Add Column: Poach

In [None]:
# Initialize the isPoach column in point_df to 0
point_df['isPoach'] = 0

# Iterate through the unique point numbers in shot_data
for point_number in shot_data['pointNumber'].unique():
    # Check if any row in shot_data has isPoach = 1 for the current point_number
    if ((shot_data['pointNumber'] == point_number) & (shot_data['isPoach'] == 1)).any():
        # Set isPoach = 1 in point_df for the corresponding point_number
        point_df.loc[point_df['pointNumber'] == point_number, 'isPoach'] = 1


## Add Column: serveFormation

In [None]:
point_df['serveFormation'] = None


# Iterate through the unique point numbers in shot_data
for point_number in shot_data['pointNumber'].unique():
    # Check if any row in shot_data has serveFormation = NA for the current point_number
    if ((shot_data['pointNumber'] == point_number) & (shot_data['serveFormation'] == 'I')).any():
        # Set serveFormation in point_df for the corresponding point_number row in shot_data
        point_df.loc[point_df['pointNumber'] == point_number, 'serveFormation'] = 'I'

    if ((shot_data['pointNumber'] == point_number) & (shot_data['serveFormation'] == 'Normal')).any():
        # Set serveFormation in point_df for the corresponding point_number row in shot_data
        point_df.loc[point_df['pointNumber'] == point_number, 'serveFormation'] = 'Normal'

## Add Column: returnFormation

In [None]:
point_df['returnFormation'] = 0
# Iterate through the unique point numbers in shot_data
for point_number in shot_data['pointNumber'].unique():
    # Check if any row in shot_data has isPoach = 1 for the current point_number
    if ((shot_data['pointNumber'] == point_number) & (shot_data['returnFormation'] == '2-Back')).any():
        # Set isPoach = 1 in point_df for the corresponding point_number
        point_df.loc[point_df['pointNumber'] == point_number, 'returnFormation'] = '2-Back'
    if ((shot_data['pointNumber'] == point_number) & (shot_data['returnFormation'] == 'Normal')).any():
        point_df.loc[point_df['pointNumber'] == point_number, 'returnFormation'] = 'Normal'

## Add Column: Name (For Video)

In [None]:
# Change pointScore to the specified format
point_df['Name'] = point_df.apply(lambda row: f"Set {row['setNum']}: {row['gameScore']}, {row['tiebreakScore']} {row['serverName']} Serving" if pd.notna(row['tiebreakScore']) else f"Set {row['setNum']}: {row['gameScore']}, {row['pointScore']} {row['serverName']} Serving", axis=1)


In [None]:
print(point_df[['setNum', 'gameScore','pointScore','tiebreakScore','Name']].head(5))
print(point_df[['setNum', 'gameScore','pointScore','tiebreakScore','Name']].tail(5))

    setNum gameScore pointScore tiebreakScore  \
0        1       0-0        0-0          None   
2        1       0-0       15-0          None   
5        1       0-0       30-0          None   
9        1       0-0       40-0          None   
12       1       1-0        0-0          None   

                                        Name  
0    Set 1: 0-0, 0-0 Spencer Johnson Serving  
2   Set 1: 0-0, 15-0 Spencer Johnson Serving  
5   Set 1: 0-0, 30-0 Spencer Johnson Serving  
9   Set 1: 0-0, 40-0 Spencer Johnson Serving  
12     Set 1: 1-0, 0-0 Samuel Rubell Serving  
     setNum gameScore pointScore tiebreakScore  \
257       1       6-6       None         3-Mar   
259       1       6-6       None         3-Apr   
261       1       6-6       None         3-May   
263       1       6-6       None         3-Jun   
268       1       6-6       None         4-Jun   

                                          Name  
257         Set 1: 6-6, 3-Mar Karl Lee Serving  
259  Set 1: 6-6, 3-Apr S

## Reorder DataFrame for Output

In [None]:
point_df_copy = point_df.copy()

In [None]:
point_df.shape

(75, 81)

In [None]:
point_df_copy.shape

(75, 81)

In [None]:
point_df.columns

Index(['pointNumber', 'teamAPlayer1', 'teamAPlayer2', 'teamBPlayer1',
       'teamBPlayer2', 'pointScore', 'gameScore', 'setScore', 'tiebreakScore',
       'side', 'serverName', 'returnerName', 'clientTeam', 'opponentTeam',
       'Position', 'pointEndPosition', 'Duration', 'rallyCount',
       'rallyCountFreq', 'firstServeIn', 'secondServeIn', 'serveResult',
       'serveInPlacement', 'firstServeZone', 'secondServeZone', 'isAce',
       'returnDirection', 'returnFhBh', 'errorType', 'returnError',
       'lastShotDirection', 'lastShotFhBh', 'lastShotHitBy', 'lastShotResult',
       'pointWonBy', 'isBreakPoint', 'bothAtNetTeamB', 'bothAtNetTeamA',
       'setNum', 'player1SetScore', 'player2SetScore', 'player1GameScore',
       'player2GameScore', 'player1PointScore', 'player2PointScore',
       'player1TiebreakScore', 'player2TiebreakScore', 'gameNumber',
       'teamAPlayer1ServeResult', 'teamAPlayer2ServeResult',
       'teamBPlayer1ServeResult', 'teamBPlayer2ServeResult',
       'pl

# WILL HAVE TO CHANGE THIS BASED ON POINT_DF.COLUMNS

In [None]:
desired_order = [
    'Name', 'pointNumber', 'setNum', 'gameNumber', 'teamAPlayer1', 'teamAPlayer2',
    'teamBPlayer1',  'teamBPlayer2',
    'pointScore', 'gameScore', 'setScore', 'tiebreakScore', 'side', 'serverName',
    'returnerName', 'Position', 'pointEndPosition', 'Duration', 'pointWonBy',
    'rallyCount', 'rallyCountFreq', 'firstServeIn', 'secondServeIn',
    'serveResult', 'serveInPlacement', 'firstServeZone', 'secondServeZone',
    'isAce', 'returnDirection', 'returnFhBh', 'errorType', 'returnError',
    'lastShotDirection', 'lastShotFhBh', 'lastShotHitBy', 'lastShotResult',
    'isBreakPoint', 'bothAtNetTeamA', 'bothAtNetTeamB', 'player1SetScore',
    'player2SetScore', 'player1GameScore', 'player2GameScore', 'player1PointScore',
    'player2PointScore', 'player1TiebreakScore', 'player2TiebreakScore',
    'teamAPlayer1ServeResult', 'teamAPlayer1ServePlacement', 'teamAPlayer1ReturnPlacement',
    'teamAPlayer1ReturnFhBh', 'teamAPlayer1LastShotPlacement', 'teamAPlayer1LastShotFhBh',
    'teamAPlayer1LastShotResult', 'teamAPlayer2ServeResult', 'teamAPlayer2ServePlacement',
       'teamAPlayer2ReturnPlacement', 'teamAPlayer2ReturnFhBh',
       'teamAPlayer2LastShotPlacement', 'teamAPlayer2LastShotFhBh',
       'teamAPlayer2LastShotResult', 'clientTeam', 'opponentTeam',
       'teamBPlayer1ServeResult', 'teamBPlayer2ServeResult',  'teamBPlayer1ServePlacement',
       'teamBPlayer2ServePlacement', 'teamBPlayer1ReturnPlacement',
       'teamBPlayer2ReturnPlacement','teamBPlayer1ReturnFhBh',
       'teamBPlayer2ReturnFhBh',  'teamBPlayer1LastShotPlacement',
       'teamBPlayer2LastShotPlacement',
       'teamAPlayer2LastShotFhBh', 'teamBPlayer1LastShotFhBh',
       'teamBPlayer2LastShotFhBh',  'teamBPlayer1LastShotResult',
       'teamBPlayer2LastShotResult', 'isPoach', 'serveFormation', 'returnFormation'
]

# Reorder the columns
point_df = point_df.reindex(columns=desired_order)

In [None]:
cols = ['pointNumber', 'teamAPlayer1', 'teamAPlayer2', 'teamBPlayer1',
       'teamBPlayer2', 'pointScore', 'gameScore', 'setScore', 'tiebreakScore',
       'side', 'serverName', 'returnerName', 'clientTeam', 'opponentTeam',
       'Position', 'pointEndPosition', 'Duration', 'rallyCount',
       'rallyCountFreq', 'firstServeIn', 'secondServeIn', 'serveResult',
       'serveInPlacement', 'firstServeZone', 'secondServeZone', 'isAce',
       'returnDirection', 'returnFhBh', 'errorType', 'returnError',
       'lastShotDirection', 'lastShotFhBh', 'lastShotHitBy', 'lastShotResult',
       'pointWonBy', 'isBreakPoint', 'bothAtNetTeamB', 'bothAtNetTeamA',
       'setNum', 'player1SetScore', 'player2SetScore', 'player1GameScore',
       'player2GameScore', 'player1PointScore', 'player2PointScore',
       'player1TiebreakScore', 'player2TiebreakScore', 'gameNumber',
       'teamAPlayer1ServeResult', 'teamAPlayer2ServeResult',
       'teamBPlayer1ServeResult', 'teamBPlayer2ServeResult',
       'player1ServeResult', 'teamAPlayer1ServePlacement',
       'teamAPlayer2ServePlacement', 'teamBPlayer1ServePlacement',
       'teamBPlayer2ServePlacement', 'teamAPlayer1ReturnPlacement',
       'teamAPlayer2ReturnPlacement', 'teamBPlayer1ReturnPlacement',
       'teamBPlayer2ReturnPlacement', 'teamAPlayer1ReturnFhBh',
       'teamAPlayer2ReturnFhBh', 'teamBPlayer1ReturnFhBh',
       'teamBPlayer2ReturnFhBh', 'teamAPlayer1LastShotPlacement',
       'teamAPlayer2LastShotPlacement', 'teamBPlayer1LastShotPlacement',
       'teamBPlayer2LastShotPlacement', 'teamAPlayer1LastShotFhBh',
       'teamAPlayer2LastShotFhBh', 'teamBPlayer1LastShotFhBh',
       'teamBPlayer2LastShotFhBh', 'teamAPlayer1LastShotResult',
       'teamAPlayer2LastShotResult', 'teamBPlayer1LastShotResult',
       'teamBPlayer2LastShotResult', 'Name']


In [None]:
set(cols) - set(desired_order)

{'player1ServeResult'}

## Error Check 25:
#### Check if the columns and their order are the same

In [None]:
print(point_df.shape)
print(point_df_copy.shape)

if (point_df.shape == point_df_copy.shape):
    print('Check passed.')
else:
    raise ValueError('Error: Not the same!')

(75, 81)
(75, 81)
Check passed.


In [None]:
# Get the set of column names for each DataFrame
point_df_columns = set(point_df.columns)
point_df_copy_columns = set(point_df_copy.columns)

# Find the column names unique to each DataFrame
unique_to_point_df = point_df_columns - point_df_copy_columns
unique_to_point_df_copy = point_df_copy_columns - point_df_columns

# Output the results
if unique_to_point_df:
    print("Columns unique to point_df:", unique_to_point_df)
else:
    print("All columns in point_df are also in point_df_copy")

if unique_to_point_df_copy:
    print("Columns unique to point_df_copy:", unique_to_point_df_copy)
else:
    print("All columns in point_df_copy are also in point_df")


All columns in point_df are also in point_df_copy
Columns unique to point_df_copy: {'player1ServeResult'}


## Warning: All nan to ""
### Alterating Data

In [None]:
# Change Na to "" Empty String

# Assuming point_df is your DataFrame
point_df.replace([pd.NA, None, pd.NaT, float('nan')], "", inplace=True)

# Double Check before Outputting Point (FOR VISUAL)

In [None]:
print(point_df.head(5))

                                        Name  pointNumber  setNum  gameNumber  \
0    Set 1: 0-0, 0-0 Spencer Johnson Serving            1       1           1   
2   Set 1: 0-0, 15-0 Spencer Johnson Serving            2       1           1   
5   Set 1: 0-0, 30-0 Spencer Johnson Serving            3       1           1   
9   Set 1: 0-0, 40-0 Spencer Johnson Serving            4       1           1   
12     Set 1: 1-0, 0-0 Samuel Rubell Serving            5       1           2   

       teamAPlayer1         teamAPlayer2 teamBPlayer1   teamBPlayer2  \
0   Spencer Johnson  Emon van Loben Sels     Karl Lee  Samuel Rubell   
2   Spencer Johnson  Emon van Loben Sels     Karl Lee  Samuel Rubell   
5   Spencer Johnson  Emon van Loben Sels     Karl Lee  Samuel Rubell   
9   Spencer Johnson  Emon van Loben Sels     Karl Lee  Samuel Rubell   
12  Spencer Johnson  Emon van Loben Sels     Karl Lee  Samuel Rubell   

   pointScore gameScore  ... teamBPlayer1LastShotPlacement  \
0         0-0     

In [None]:
print(point_df.tail(5))

                                          Name  pointNumber  setNum  \
257         Set 1: 6-6, 3-Mar Karl Lee Serving           71       1   
259  Set 1: 6-6, 3-Apr Spencer Johnson Serving           72       1   
261  Set 1: 6-6, 3-May Spencer Johnson Serving           73       1   
263    Set 1: 6-6, 3-Jun Samuel Rubell Serving           74       1   
268    Set 1: 6-6, 4-Jun Samuel Rubell Serving           75       1   

     gameNumber     teamAPlayer1         teamAPlayer2 teamBPlayer1  \
257          13  Spencer Johnson  Emon van Loben Sels     Karl Lee   
259          13  Spencer Johnson  Emon van Loben Sels     Karl Lee   
261          13  Spencer Johnson  Emon van Loben Sels     Karl Lee   
263          13  Spencer Johnson  Emon van Loben Sels     Karl Lee   
268          13  Spencer Johnson  Emon van Loben Sels     Karl Lee   

      teamBPlayer2 pointScore gameScore  ... teamBPlayer1LastShotPlacement  \
257  Samuel Rubell                  6-6  ...                              

In [None]:
point_df.columns

Index(['Name', 'pointNumber', 'setNum', 'gameNumber', 'teamAPlayer1',
       'teamAPlayer2', 'teamBPlayer1', 'teamBPlayer2', 'pointScore',
       'gameScore', 'setScore', 'tiebreakScore', 'side', 'serverName',
       'returnerName', 'Position', 'pointEndPosition', 'Duration',
       'pointWonBy', 'rallyCount', 'rallyCountFreq', 'firstServeIn',
       'secondServeIn', 'serveResult', 'serveInPlacement', 'firstServeZone',
       'secondServeZone', 'isAce', 'returnDirection', 'returnFhBh',
       'errorType', 'returnError', 'lastShotDirection', 'lastShotFhBh',
       'lastShotHitBy', 'lastShotResult', 'isBreakPoint', 'bothAtNetTeamA',
       'bothAtNetTeamB', 'player1SetScore', 'player2SetScore',
       'player1GameScore', 'player2GameScore', 'player1PointScore',
       'player2PointScore', 'player1TiebreakScore', 'player2TiebreakScore',
       'teamAPlayer1ServeResult', 'teamAPlayer1ServePlacement',
       'teamAPlayer1ReturnPlacement', 'teamAPlayer1ReturnFhBh',
       'teamAPlayer1LastSh

In [None]:
# Change Value Names for Match Viewer Output

# Assuming point_df is your DataFrame and teamAPlayer1 is the name from the first row
teamA = point_df.loc[0]['clientTeam']
teamB = point_df.loc[0]['opponentTeam']


# Replace values in the 'atNetPlayer1' column
# point_df['atNetTeamA'] = point_df['bothatNetTeamA'].replace({0: "", 1: teamA})
# point_df['atNetTeamB'] = point_df['bothatNetTeamB'].replace({0: "", 1: teamB})

# Add in Timestamps
#### Timestamps Error Check

In [None]:
if timestamp.shape[0] != point_df.shape[0]:
    raise ValueError("Error: The number of rows in timestamp and point_df are not the same.")
else:
    # Assign values to point_df
    point_df['Position'] = timestamp['pointStartTime'].values
    point_df['pointEndPosition'] = timestamp['pointEndTime'].values
    print("\u2713 Check passed")


✓ Check passed


# Check that Time Stamp head and end is same as point_df

In [None]:
print(timestamp.head())

   pointStartTime  pointEndTime
0            1655          5418
1           26769         42286
2           62435         76254
3           96884        111336
4          163514        168814


In [None]:
print(timestamp.tail())

    pointStartTime  pointEndTime
70         2730661       2734962
71         2766282       2772583
72         2789171       2793396
73         2813386       2820849
74         2843675       2855041


In [None]:
print(point_df[['Position', 'pointEndPosition']])

     Position  pointEndPosition
0        1655              5418
2       26769             42286
5       62435             76254
9       96884            111336
12     163514            168814
..        ...               ...
257   2730661           2734962
259   2766282           2772583
261   2789171           2793396
263   2813386           2820849
268   2843675           2855041

[75 rows x 2 columns]


# PRINT POINTCSV VISUALS

In [None]:
# Save point_df to CSV file

# Assuming point_df is your DataFrame and teamAPlayer1, teamAPlayer2, teamBPlayer1, and teamBPlayer2 are the names from the first row
teamAPlayer1NoSpace = shot_data.iloc[0]['teamAPlayer1'].replace(" ", "")
teamAPlayer2NoSpace = shot_data.iloc[0]['teamAPlayer2'].replace(" ", "")
teamBPlayer1NoSpace = shot_data.iloc[0]['teamBPlayer1'].replace(" ", "")
teamBPlayer2NoSpace = shot_data.iloc[0]['teamBPlayer2'].replace(" ", "")

# Save DataFrame to CSV file with modified player names
point_df.to_csv(f'Point_Visuals_{teamAPlayer1NoSpace}_{teamAPlayer2NoSpace}_{teamBPlayer1NoSpace}_{teamBPlayer2NoSpace}.csv', index=False)



In [None]:
def csv_to_json(csv_file_path, json_file_name):
    # Load CSV file into a pandas DataFrame
    df = pd.read_csv(csv_file_path)

    # Convert DataFrame to JSON format
    json_data = df.to_json(orient='records')

    # Save JSON data to a file in the same directory
    json_file_path = f'{json_file_name}'
    with open(json_file_path, 'w') as json_file:
        json_file.write(json_data)

    return json_file_path

# Define the CSV file path
csv_file_path = f"Point_Visuals_{teamAPlayer1NoSpace}_{teamAPlayer2NoSpace}_{teamBPlayer1NoSpace}_{teamBPlayer2NoSpace}.csv"

# Define the desired JSON file name (without extension)
json_file_name = f"Point_Visuals_{teamAPlayer1NoSpace}_{teamAPlayer2NoSpace}_{teamBPlayer1NoSpace}_{teamBPlayer2NoSpace}.json"


# Convert CSV to JSON and save in the same directory
output_file_path = csv_to_json(csv_file_path, json_file_name)

# Display the path where the JSON file is saved
output_file_path


'Point_Visuals_SpencerJohnson_EmonvanLobenSels_KarlLee_SamuelRubell.json'

# EDA

# CHANGES
1. Copy cells, do it for both players done
2. bothAtNetTeamA and bothAtNetTeamB win% and frequency done

---
3. isPoach frequency and win% for both teams
4. isFake frequency and win% for both teams
5. serveFormation direction, win%, freq
6. returnFormation win%, freq

## Shot CSV EDA
Add isPoach frequency and win% here
Add

In [None]:
# Can input CSV Directly here for statistics functions

# your_file_name = "filename.csv"
# shot_eda = pd.read_csv(your_file_name)

# if directly from notebook
shot_eda = shot_data.copy()

In [None]:
# Assuming teamAPlayer1 and teamAPlayer2 are the names from the first row in shot_eda DataFrame
teamAPlayer1 = shot_eda.iloc[0]['teamAPlayer1']
teamAPlayer2 = shot_eda.iloc[0]['teamAPlayer2']

# Filter shot_data based on the conditions for teamAPlayer1
approach_data_teamAPlayer1 = shot_eda[(shot_eda['isApproach'] == 1) & (shot_eda['shotHitBy'] == teamAPlayer1)]

# Count the distinct pointNumbers for teamAPlayer1
distinct_point_numbers_teamAPlayer1 = approach_data_teamAPlayer1['pointNumber'].nunique()

# Filter shot_data based on the conditions for teamAPlayer2
approach_data_teamAPlayer2 = shot_eda[(shot_eda['isApproach'] == 1) & (shot_eda['shotHitBy'] == teamAPlayer2)]

# Count the distinct pointNumbers for teamAPlayer2
distinct_point_numbers_teamAPlayer2 = approach_data_teamAPlayer2['pointNumber'].nunique()

# Print the results for teamAPlayer1 and teamAPlayer2
print(f"Number of Approach Shots hit by {teamAPlayer1}: {distinct_point_numbers_teamAPlayer1}")
print(f"Number of Approach Shots hit by {teamAPlayer2}: {distinct_point_numbers_teamAPlayer2}")


# Poaching EDA


# Fake EDA


#



Number of Approach Shots hit by Spencer Johnson: 2
Number of Approach Shots hit by Emon van Loben Sels: 3


In [None]:
print(point_df['Name'])

0        Set 1: 0-0, 0-0 Spencer Johnson Serving
2       Set 1: 0-0, 15-0 Spencer Johnson Serving
5       Set 1: 0-0, 30-0 Spencer Johnson Serving
9       Set 1: 0-0, 40-0 Spencer Johnson Serving
12         Set 1: 1-0, 0-0 Samuel Rubell Serving
                         ...                    
257           Set 1: 6-6, 3-Mar Karl Lee Serving
259    Set 1: 6-6, 3-Apr Spencer Johnson Serving
261    Set 1: 6-6, 3-May Spencer Johnson Serving
263      Set 1: 6-6, 3-Jun Samuel Rubell Serving
268      Set 1: 6-6, 4-Jun Samuel Rubell Serving
Name: Name, Length: 75, dtype: object


## Point CSV EDA

In [None]:
# # Can input CSV Directly here for statistics functions
# import pandas as pd
# your_file_name = "Point_Visuals_Anne-ChristineLutkemeyer_SehaYu.csv"
# point_df_eda = pd.read_csv(your_file_name)

# # OR MAKE point_df_eda copy of point_df

point_df_eda = point_df.copy()

In [None]:
print(point_df_eda.columns)

Index(['Name', 'pointNumber', 'setNum', 'gameNumber', 'teamAPlayer1',
       'teamAPlayer2', 'teamBPlayer1', 'teamBPlayer2', 'pointScore',
       'gameScore', 'setScore', 'tiebreakScore', 'side', 'serverName',
       'returnerName', 'Position', 'pointEndPosition', 'Duration',
       'pointWonBy', 'rallyCount', 'rallyCountFreq', 'firstServeIn',
       'secondServeIn', 'serveResult', 'serveInPlacement', 'firstServeZone',
       'secondServeZone', 'isAce', 'returnDirection', 'returnFhBh',
       'errorType', 'returnError', 'lastShotDirection', 'lastShotFhBh',
       'lastShotHitBy', 'lastShotResult', 'isBreakPoint', 'bothAtNetTeamA',
       'bothAtNetTeamB', 'player1SetScore', 'player2SetScore',
       'player1GameScore', 'player2GameScore', 'player1PointScore',
       'player2PointScore', 'player1TiebreakScore', 'player2TiebreakScore',
       'teamAPlayer1ServeResult', 'teamAPlayer1ServePlacement',
       'teamAPlayer1ReturnPlacement', 'teamAPlayer1ReturnFhBh',
       'teamAPlayer1LastSh

In [None]:
serve_in_rows = point_df_eda[(point_df_eda['serveResult'] == '1st Serve In') | (point_df_eda['serveResult'] == '2nd Serve In')]

# filter the rows where rallyCount column is equal to 1
desired_rows = serve_in_rows[serve_in_rows['rallyCount'] == 1]

# print each row
for index, row in desired_rows.iterrows():
    print(row[['pointNumber', 'setNum', 'gameNumber','serveResult','rallyCount' ,'serverName']])


pointNumber                 23
setNum                       1
gameNumber                   5
serveResult       1st Serve In
rallyCount                   1
serverName     Spencer Johnson
Name: 72, dtype: object
pointNumber                 25
setNum                       1
gameNumber                   5
serveResult       1st Serve In
rallyCount                   1
serverName     Spencer Johnson
Name: 75, dtype: object
pointNumber                 26
setNum                       1
gameNumber                   5
serveResult       1st Serve In
rallyCount                   1
serverName     Spencer Johnson
Name: 76, dtype: object
pointNumber                 42
setNum                       1
gameNumber                   9
serveResult       1st Serve In
rallyCount                   1
serverName     Spencer Johnson
Name: 134, dtype: object


In [None]:
first_player1Name = shot_data.iloc[0]['teamAPlayer1']



# PLAYER 1

# Display the results
print(f"\nServe Results for {first_player1Name}:")

# Assuming point_df_eda is your DataFrame
total_serves = len(point_df_eda[point_df_eda['serverName'] == first_player1Name])
print(total_serves)
first_serve_in_count = len(point_df_eda[(point_df_eda['serverName'] == first_player1Name) & (point_df_eda['firstServeIn'] == 1)])
first_serve_won_count = len(point_df_eda[(point_df_eda['serverName'] == first_player1Name) & (point_df_eda['firstServeIn'] == 1) & (point_df_eda['pointWonBy'] == first_player1Name)])
percentage_first_serve_in = (first_serve_in_count / total_serves) * 100 if total_serves > 0 else 0
percentage_first_serve_won = (first_serve_won_count / first_serve_in_count) * 100 if first_serve_in_count > 0 else 0

second_serve_total_count = len(point_df_eda[(point_df_eda['serverName'] == first_player1Name) & (point_df_eda['firstServeIn'] == 0)])
second_serve_in_count = len(point_df_eda[(point_df_eda['serverName'] == first_player1Name) & (point_df_eda['firstServeIn'] == 0)& (point_df_eda['secondServeIn'] == 1)])
second_serve_won_count = len(point_df_eda[(point_df_eda['serverName'] == first_player1Name) & (point_df_eda['firstServeIn'] == 0)& (point_df_eda['secondServeIn'] == 1) & (point_df_eda['pointWonBy'] == first_player1Name)])
percentage_second_serve_in = (second_serve_in_count / second_serve_total_count) * 100 if second_serve_total_count > 0 else 0
percentage_second_serve_won = (second_serve_won_count / second_serve_in_count) * 100 if second_serve_in_count > 0 else 0



# Display the results
print("\nTotal Serves:", total_serves)
print("First Serve In (Count):", first_serve_in_count)
print("First Serve Won (Count):", first_serve_won_count)
print(f"First Serve In (%): {percentage_first_serve_in:.2f}%")
print(f"First Serve Won (%): {percentage_first_serve_won:.2f}%")

print("Second Serve In (Count):", second_serve_in_count)
print("Second Serve Total (Count):", second_serve_total_count)
print("Second Serve Won (Count):", second_serve_won_count)
print(f"Second Serve In (%): {percentage_second_serve_in:.2f}%")
print(f"Second Serve Won (%): {percentage_second_serve_won:.2f}%")

# Assuming point_df is your DataFrame
count_is_ace = (point_df_eda[point_df_eda['serverName'] == first_player1Name]['isAce']).sum()
count_is_double_fault = ((point_df_eda['serverName'] == first_player1Name) & (point_df_eda['serveResult'] == "Double Fault")).sum()

# Display the results
print("Ace (Count):", count_is_ace)
print("Double Fault (Count):", count_is_double_fault)

# Count of rows where serverName is equal to the first row of player1Name and pointWonBy is equal to the first row of player1Name
total_service_points_won = len(point_df_eda[(point_df_eda['serverName'] == first_player1Name) & (point_df_eda['pointWonBy'] == first_player1Name)])
total_service_points_won_percentage = total_service_points_won / total_serves *100

# Display the results
print(f"Points Won on Serve (Count) {total_service_points_won}")

print(f"Points Won on Serve (%): {total_service_points_won_percentage:.2f}%")

# Assuming point_df is your DataFrame
return_points = point_df_eda[(point_df_eda['returnerName'] == first_player1Name)] # CHANGED THIS JERRY, REMOVED RALLY COUNT >= 2

total_return = len(return_points)
returnMade = len(return_points[(return_points['rallyCount'] > 2) | ((return_points['rallyCount'] == 2) & (return_points['lastShotResult'] != 'Error'))])
returnError = len(return_points[(return_points['lastShotResult'] == 'Error') & (return_points['rallyCount'] == 2)])
returnWinner = len(return_points[(return_points['lastShotResult'] == 'Winner') & (return_points['rallyCount'] == 2)])
returnMadePercentage = returnMade/total_return

returnWonByPlayer1 = len(return_points[return_points['pointWonBy'] == first_player1Name])
returnWonByPlayer1Percentage = returnWonByPlayer1 / returnMade * 100 if returnMade > 0 else 0

deuceReturnCount = len(return_points[return_points['side'] == 'Deuce'])
adReturnCount = len(return_points[return_points['side'] == 'Ad'])


deuceReturnMade = len(return_points[(return_points['side'] == 'Deuce') & ((return_points['rallyCount'] > 2) | ((return_points['rallyCount'] == 2) & (return_points['lastShotResult'] != 'Error')))])
adReturnMade = len(return_points[(return_points['side'] == 'Ad') & ((return_points['rallyCount'] > 2) | ((return_points['rallyCount'] == 2) & (return_points['lastShotResult'] != 'Error')))])

# Calculate deuceReturnMadePercentage with a check for division by zero
if deuceReturnCount != 0:
    deuceReturnMadePercentage = deuceReturnMade / deuceReturnCount
else:
    deuceReturnMadePercentage = np.nan  # or 0, or any placeholder value

# Calculate adReturnMadePercentage with a check for division by zero
if adReturnCount != 0:
    adReturnMadePercentage = adReturnMade / adReturnCount
else:
    adReturnMadePercentage = np.nan  # or 0, or any placeholder value

deuceReturnWonByPlayer1 = len(return_points[(return_points['side'] == 'Deuce') & (return_points['pointWonBy'] == first_player1Name) | ((return_points['rallyCount'] == 2) & (return_points['lastShotResult'] != 'Error'))])
adReturnWonByPlayer1 = len(return_points[(return_points['side'] == 'Ad') & (return_points['pointWonBy'] == first_player1Name) | ((return_points['rallyCount'] == 2) & (return_points['lastShotResult'] != 'Error'))])

deuceReturnWonByPlayer1Percentage = deuceReturnWonByPlayer1 / deuceReturnMade * 100 if deuceReturnMade > 0 else 0
adReturnWonByPlayer1Percentage = adReturnWonByPlayer1 / adReturnMade * 100 if adReturnMade > 0 else 0




print(f"\nReturn Results for {first_player1Name}:\n")

print("Total Return (Count):", total_return)
print("Return Won (Count):", returnWonByPlayer1)
print("Return Won (%):", returnWonByPlayer1Percentage)

print("\nReturn Made (Count):", returnMade)
print("Return Made (%):", returnMadePercentage)
print("Return Error (Count):", returnError)
print("Return Winner (Count):", returnWinner)

print("\nDeuce Return (Count):", deuceReturnCount)
print("Deuce Return Made (Count):", deuceReturnMade)
print("Deuce Return Made (%):", deuceReturnMadePercentage)
print("Deuce Return Won by Player1 (%):", deuceReturnWonByPlayer1Percentage)
print("Deuce Return Won by Player1 (Count):", deuceReturnWonByPlayer1)


print("\nAd Return (Count):", adReturnCount)
print("Ad Return Made (Count):", adReturnMade)
print("Ad Return Made (%):", adReturnMadePercentage)
print("Ad Return Won by Player1 (Count):", adReturnWonByPlayer1)
print("Ad Return Won by Player1 (%):", adReturnWonByPlayer1Percentage)

# Assuming return_points is your DataFrame
deuce_return_points = return_points[(return_points['side'] == 'Deuce') & (return_points['returnerName'] == first_player1Name) & (return_points['rallyCount'] >= 2)]

# Deuce Return Points Separated by returnFhBh
deuce_forehand_return_points = deuce_return_points[deuce_return_points['returnFhBh'] == 'Forehand']
deuce_backhand_return_points = deuce_return_points[deuce_return_points['returnFhBh'] == 'Backhand']


# Count for Deuce Return Points - Made
count_deuce_forehand_made = len(deuce_forehand_return_points[(deuce_forehand_return_points['rallyCount'] > 2) | ((deuce_forehand_return_points['rallyCount'] == 2) & (deuce_forehand_return_points['lastShotResult'] != 'Error'))])
count_deuce_backhand_made = len(deuce_backhand_return_points[(deuce_backhand_return_points['rallyCount'] > 2) | ((deuce_backhand_return_points['rallyCount'] == 2) & (deuce_backhand_return_points['lastShotResult'] != 'Error'))])

# Count for Deuce Return Points - Error
count_deuce_forehand_error = len(deuce_forehand_return_points[(deuce_forehand_return_points['lastShotResult'] == 'Error') & (deuce_forehand_return_points['rallyCount'] == 2)])
count_deuce_backhand_error = len(deuce_backhand_return_points[(deuce_backhand_return_points['lastShotResult'] == 'Error') & (deuce_backhand_return_points['rallyCount'] == 2)])

# Display the counts
print("\nDeuce Forehand Return Points - Made:", count_deuce_forehand_made)
print("Deuce Forehand Return Points - Error:", count_deuce_forehand_error)

print("Deuce Backhand Return Points - Made:", count_deuce_backhand_made)
print("Deuce Backhand Return Points - Error:", count_deuce_backhand_error)

# Assuming return_points is your DataFrame
ad_return_points = return_points[(return_points['side'] == 'Ad') & (return_points['returnerName'] == first_player1Name) & (return_points['rallyCount'] >= 2)]

# Ad Return Points Separated by returnFhBh
ad_forehand_return_points = ad_return_points[ad_return_points['returnFhBh'] == 'Forehand']
ad_backhand_return_points = ad_return_points[ad_return_points['returnFhBh'] == 'Backhand']

# Count for Ad Return Points - Made
count_ad_forehand_made = len(ad_forehand_return_points[(ad_forehand_return_points['rallyCount'] > 2) | ((ad_forehand_return_points['rallyCount'] == 2) & (ad_forehand_return_points['lastShotResult'] != 'Error'))])
count_ad_backhand_made = len(ad_backhand_return_points[(ad_backhand_return_points['rallyCount'] > 2) | ((ad_backhand_return_points['rallyCount'] == 2) & (ad_backhand_return_points['lastShotResult'] != 'Error'))])

# Count for Ad Return Points - Error
count_ad_forehand_error = len(ad_forehand_return_points[(ad_forehand_return_points['lastShotResult'] == 'Error') & (ad_forehand_return_points['rallyCount'] == 2)])
count_ad_backhand_error = len(ad_backhand_return_points[(ad_backhand_return_points['lastShotResult'] == 'Error') & (ad_backhand_return_points['rallyCount'] == 2)])

# Display the counts
print("\nAd Forehand Return Points - Made:", count_ad_forehand_made)
print("Ad Forehand Return Points - Error:", count_ad_forehand_error)

print("Ad Backhand Return Points - Made:", count_ad_backhand_made)
print("Ad Backhand Return Points - Error:", count_ad_backhand_error)



# NET----

print(f"\nAt Net Results for {first_player1Name}:\n")








#------------------------------------
# PLAYER 2
#------------------------------------

first_player1Name = shot_data.iloc[0]['teamAPlayer2']


# Display the results
print(f"\nServe Results for {first_player1Name}:")

# Assuming point_df_eda is your DataFrame
total_serves = len(point_df_eda[point_df_eda['serverName'] == first_player1Name])
print(total_serves)
first_serve_in_count = len(point_df_eda[(point_df_eda['serverName'] == first_player1Name) & (point_df_eda['firstServeIn'] == 1)])
first_serve_won_count = len(point_df_eda[(point_df_eda['serverName'] == first_player1Name) & (point_df_eda['firstServeIn'] == 1) & (point_df_eda['pointWonBy'] == first_player1Name)])
percentage_first_serve_in = (first_serve_in_count / total_serves) * 100 if total_serves > 0 else 0
percentage_first_serve_won = (first_serve_won_count / first_serve_in_count) * 100 if first_serve_in_count > 0 else 0

second_serve_total_count = len(point_df_eda[(point_df_eda['serverName'] == first_player1Name) & (point_df_eda['firstServeIn'] == 0)])
second_serve_in_count = len(point_df_eda[(point_df_eda['serverName'] == first_player1Name) & (point_df_eda['firstServeIn'] == 0)& (point_df_eda['secondServeIn'] == 1)])
second_serve_won_count = len(point_df_eda[(point_df_eda['serverName'] == first_player1Name) & (point_df_eda['firstServeIn'] == 0)& (point_df_eda['secondServeIn'] == 1) & (point_df_eda['pointWonBy'] == first_player1Name)])
percentage_second_serve_in = (second_serve_in_count / second_serve_total_count) * 100 if second_serve_total_count > 0 else 0
percentage_second_serve_won = (second_serve_won_count / second_serve_in_count) * 100 if second_serve_in_count > 0 else 0



# Display the results
print("\nTotal Serves:", total_serves)
print("First Serve In (Count):", first_serve_in_count)
print("First Serve Won (Count):", first_serve_won_count)
print(f"First Serve In (%): {percentage_first_serve_in:.2f}%")
print(f"First Serve Won (%): {percentage_first_serve_won:.2f}%")

print("Second Serve In (Count):", second_serve_in_count)
print("Second Serve Total (Count):", second_serve_total_count)
print("Second Serve Won (Count):", second_serve_won_count)
print(f"Second Serve In (%): {percentage_second_serve_in:.2f}%")
print(f"Second Serve Won (%): {percentage_second_serve_won:.2f}%")

# Assuming point_df is your DataFrame
count_is_ace = (point_df_eda[point_df_eda['serverName'] == first_player1Name]['isAce']).sum()
count_is_double_fault = ((point_df_eda['serverName'] == first_player1Name) & (point_df_eda['serveResult'] == "Double Fault")).sum()

# Display the results
print("Ace (Count):", count_is_ace)
print("Double Fault (Count):", count_is_double_fault)

# Count of rows where serverName is equal to the first row of player1Name and pointWonBy is equal to the first row of player1Name
total_service_points_won = len(point_df_eda[(point_df_eda['serverName'] == first_player1Name) & (point_df_eda['pointWonBy'] == first_player1Name)])
total_service_points_won_percentage = total_service_points_won / total_serves *100

# Display the results
print(f"Points Won on Serve (Count) {total_service_points_won}")

print(f"Points Won on Serve (%): {total_service_points_won_percentage:.2f}%")

# Assuming point_df is your DataFrame
return_points = point_df_eda[(point_df_eda['returnerName'] == first_player1Name)] # CHANGED THIS JERRY, REMOVED RALLY COUNT >= 2

total_return = len(return_points)
returnMade = len(return_points[(return_points['rallyCount'] > 2) | ((return_points['rallyCount'] == 2) & (return_points['lastShotResult'] != 'Error'))])
returnError = len(return_points[(return_points['lastShotResult'] == 'Error') & (return_points['rallyCount'] == 2)])
returnWinner = len(return_points[(return_points['lastShotResult'] == 'Winner') & (return_points['rallyCount'] == 2)])
returnMadePercentage = returnMade/total_return

returnWonByPlayer1 = len(return_points[return_points['pointWonBy'] == first_player1Name])
returnWonByPlayer1Percentage = returnWonByPlayer1 / returnMade * 100 if returnMade > 0 else 0

deuceReturnCount = len(return_points[return_points['side'] == 'Deuce'])
adReturnCount = len(return_points[return_points['side'] == 'Ad'])


deuceReturnMade = len(return_points[(return_points['side'] == 'Deuce') & ((return_points['rallyCount'] > 2) | ((return_points['rallyCount'] == 2) & (return_points['lastShotResult'] != 'Error')))])
adReturnMade = len(return_points[(return_points['side'] == 'Ad') & ((return_points['rallyCount'] > 2) | ((return_points['rallyCount'] == 2) & (return_points['lastShotResult'] != 'Error')))])

# Calculate deuceReturnMadePercentage with a check for division by zero
if deuceReturnCount != 0:
    deuceReturnMadePercentage = deuceReturnMade / deuceReturnCount
else:
    deuceReturnMadePercentage = np.nan  # or 0, or any placeholder value

# Calculate adReturnMadePercentage with a check for division by zero
if adReturnCount != 0:
    adReturnMadePercentage = adReturnMade / adReturnCount
else:
    adReturnMadePercentage = np.nan  # or 0, or any placeholder value

deuceReturnWonByPlayer1 = len(return_points[(return_points['side'] == 'Deuce') & (return_points['pointWonBy'] == first_player1Name) | ((return_points['rallyCount'] == 2) & (return_points['lastShotResult'] != 'Error'))])
adReturnWonByPlayer1 = len(return_points[(return_points['side'] == 'Ad') & (return_points['pointWonBy'] == first_player1Name) | ((return_points['rallyCount'] == 2) & (return_points['lastShotResult'] != 'Error'))])

deuceReturnWonByPlayer1Percentage = deuceReturnWonByPlayer1 / deuceReturnMade * 100 if deuceReturnMade > 0 else 0
adReturnWonByPlayer1Percentage = adReturnWonByPlayer1 / adReturnMade * 100 if adReturnMade > 0 else 0




print(f"\nReturn Results for {first_player1Name}:\n")

print("Total Return (Count):", total_return)
print("Return Won (Count):", returnWonByPlayer1)
print("Return Won (%):", returnWonByPlayer1Percentage)

print("\nReturn Made (Count):", returnMade)
print("Return Made (%):", returnMadePercentage)
print("Return Error (Count):", returnError)
print("Return Winner (Count):", returnWinner)

print("\nDeuce Return (Count):", deuceReturnCount)
print("Deuce Return Made (Count):", deuceReturnMade)
print("Deuce Return Made (%):", deuceReturnMadePercentage)
print("Deuce Return Won by Player1 (%):", deuceReturnWonByPlayer1Percentage)
print("Deuce Return Won by Player1 (Count):", deuceReturnWonByPlayer1)


print("\nAd Return (Count):", adReturnCount)
print("Ad Return Made (Count):", adReturnMade)
print("Ad Return Made (%):", adReturnMadePercentage)
print("Ad Return Won by Player1 (Count):", adReturnWonByPlayer1)
print("Ad Return Won by Player1 (%):", adReturnWonByPlayer1Percentage)

# Assuming return_points is your DataFrame
deuce_return_points = return_points[(return_points['side'] == 'Deuce') & (return_points['returnerName'] == first_player1Name) & (return_points['rallyCount'] >= 2)]

# Deuce Return Points Separated by returnFhBh
deuce_forehand_return_points = deuce_return_points[deuce_return_points['returnFhBh'] == 'Forehand']
deuce_backhand_return_points = deuce_return_points[deuce_return_points['returnFhBh'] == 'Backhand']


# Count for Deuce Return Points - Made
count_deuce_forehand_made = len(deuce_forehand_return_points[(deuce_forehand_return_points['rallyCount'] > 2) | ((deuce_forehand_return_points['rallyCount'] == 2) & (deuce_forehand_return_points['lastShotResult'] != 'Error'))])
count_deuce_backhand_made = len(deuce_backhand_return_points[(deuce_backhand_return_points['rallyCount'] > 2) | ((deuce_backhand_return_points['rallyCount'] == 2) & (deuce_backhand_return_points['lastShotResult'] != 'Error'))])

# Count for Deuce Return Points - Error
count_deuce_forehand_error = len(deuce_forehand_return_points[(deuce_forehand_return_points['lastShotResult'] == 'Error') & (deuce_forehand_return_points['rallyCount'] == 2)])
count_deuce_backhand_error = len(deuce_backhand_return_points[(deuce_backhand_return_points['lastShotResult'] == 'Error') & (deuce_backhand_return_points['rallyCount'] == 2)])

# Display the counts
print("\nDeuce Forehand Return Points - Made:", count_deuce_forehand_made)
print("Deuce Forehand Return Points - Error:", count_deuce_forehand_error)

print("Deuce Backhand Return Points - Made:", count_deuce_backhand_made)
print("Deuce Backhand Return Points - Error:", count_deuce_backhand_error)

# Assuming return_points is your DataFrame
ad_return_points = return_points[(return_points['side'] == 'Ad') & (return_points['returnerName'] == first_player1Name) & (return_points['rallyCount'] >= 2)]

# Ad Return Points Separated by returnFhBh
ad_forehand_return_points = ad_return_points[ad_return_points['returnFhBh'] == 'Forehand']
ad_backhand_return_points = ad_return_points[ad_return_points['returnFhBh'] == 'Backhand']

# Count for Ad Return Points - Made
count_ad_forehand_made = len(ad_forehand_return_points[(ad_forehand_return_points['rallyCount'] > 2) | ((ad_forehand_return_points['rallyCount'] == 2) & (ad_forehand_return_points['lastShotResult'] != 'Error'))])
count_ad_backhand_made = len(ad_backhand_return_points[(ad_backhand_return_points['rallyCount'] > 2) | ((ad_backhand_return_points['rallyCount'] == 2) & (ad_backhand_return_points['lastShotResult'] != 'Error'))])

# Count for Ad Return Points - Error
count_ad_forehand_error = len(ad_forehand_return_points[(ad_forehand_return_points['lastShotResult'] == 'Error') & (ad_forehand_return_points['rallyCount'] == 2)])
count_ad_backhand_error = len(ad_backhand_return_points[(ad_backhand_return_points['lastShotResult'] == 'Error') & (ad_backhand_return_points['rallyCount'] == 2)])

# Display the counts
print("\nAd Forehand Return Points - Made:", count_ad_forehand_made)
print("Ad Forehand Return Points - Error:", count_ad_forehand_error)

print("Ad Backhand Return Points - Made:", count_ad_backhand_made)
print("Ad Backhand Return Points - Error:", count_ad_backhand_error)

print(f"\nAt Net Results for {first_player1Name}:\n")


print("\nServe Formation Results:\n")
# Normal formation
total_normal_serve = len(point_df_eda[point_df_eda['serveFormation'] == 'Normal'])
percentage_normal_serve = (total_normal_serve / len(point_df_eda)) * 100
normal_serve_won = len(point_df_eda[(point_df_eda['serveFormation'] == 'Normal') & ((point_df_eda['pointWonBy'] == shot_data.iloc[0]['teamAPlayer1']) | (point_df_eda['pointWonBy'] == shot_data.iloc[0]['teamAPlayer2']))])
normal_serve_won_percentage = (normal_serve_won / total_normal_serve) * 100


# I formation
total_i_serve = len(point_df_eda[point_df_eda['serveFormation'] == 'I'])
percentage_i_serve = (total_i_serve / len(point_df_eda)) * 100
i_serve_won = len(point_df_eda[(point_df_eda['serveFormation'] == 'I') &
                               ((point_df_eda['pointWonBy'] == shot_data.iloc[0]['teamAPlayer1']) |
                                (point_df_eda['pointWonBy'] == shot_data.iloc[0]['teamAPlayer2']))])
i_serve_won_percentage = (i_serve_won / total_i_serve) * 100

print(f"Total Normal serve formations: {total_normal_serve}")
print(f"Percentage of Normal serve formations: {percentage_normal_serve:.2f}%")
print(f"Number of Normal serve formations won: {normal_serve_won}")
print(f"Percentage of Normal serve formations won: {normal_serve_won_percentage:.2f}%")
print('\n')
print(f"Total I-Formation serve formations: {total_i_serve}")
print(f"Percentage of I-Formation serve formations: {percentage_i_serve:.2f}%")
print(f"Number of I-Formation serve formations won: {i_serve_won}")
print(f"Percentage of I-Formation serve formations won: {i_serve_won_percentage:.2f}%")

print("\nReturn Formation Results:\n")
# Normal formation
total_normal_return = len(point_df_eda[point_df_eda['returnFormation'] == 'Normal'])
percentage_normal_return = (total_normal_return / len(point_df_eda)) * 100
normal_return_won = len(point_df_eda[(point_df_eda['returnFormation'] == 'Normal') & ((point_df_eda['pointWonBy'] == shot_data.iloc[0]['teamAPlayer1']) | (point_df_eda['pointWonBy'] == shot_data.iloc[0]['teamAPlayer2']))])
normal_return_won_percentage = (normal_return_won / total_normal_return) * 100

# 2-Back formation
total_2back_return = len(point_df_eda[point_df_eda['returnFormation'] == '2-Back'])
percentage_2back_return = (total_2back_return / len(point_df_eda)) * 100
twoback_return_won = len(point_df_eda[(point_df_eda['returnFormation'] == '2-Back') & ((point_df_eda['pointWonBy'] == shot_data.iloc[0]['teamAPlayer1']) | (point_df_eda['pointWonBy'] == shot_data.iloc[0]['teamAPlayer2']))])
twoback_return_won_percentage = (twoback_return_won / total_2back_return) * 100

# Print the results for 'Normal' return formations
print(f"Total 'Normal' return formations: {total_normal_return}")
print(f"Percentage of 'Normal' return formations: {percentage_normal_return:.2f}%")
print(f"Number of 'Normal' return formations won: {normal_return_won}")
print(f"Percentage of 'Normal' return formations won: {normal_return_won_percentage:.2f}%")

# Print the results for '2-back' return formations
print(f"Total '2-back' return formations: {total_2back_return}")
print(f"Percentage of '2-back' return formations: {percentage_2back_return:.2f}%")
print(f"Number of '2-back' return formations won: {twoback_return_won}")
print(f"Percentage of '2-back' return formations won: {twoback_return_won_percentage:.2f}%")

print("\nNet Results\n")

total_at_net = len(point_df_eda[point_df_eda['bothAtNetTeamA'] == 1])
percentage_at_net = (total_at_net / len(point_df_eda)) * 100
at_net_and_won = len(point_df_eda[(point_df_eda['bothAtNetTeamA'] == 1) & ((point_df_eda['pointWonBy'] == shot_data.iloc[0]['teamAPlayer1']) | (point_df_eda['pointWonBy'] == shot_data.iloc[0]['teamAPlayer2']))])
at_net_and_won_percentage = (at_net_and_won / total_at_net) * 100
print(f"Total Points Both At Net: {total_at_net}")
print(f"Percentage of Total Points At Net: {percentage_at_net:.2f}")
print(f"Both At Net Win %: {at_net_and_won_percentage:.2f}\n")
# bothatnetteamA and teamB win% and frequency
# isPoach frequency
total_poach = len(point_df_eda[point_df_eda['isPoach'] == 1])
percentage_poach = (total_poach / len(point_df_eda)) * 100
poach_and_won = len(point_df_eda[(point_df_eda['isPoach'] == 1) & ((point_df_eda['pointWonBy'] == shot_data.iloc[0]['teamAPlayer1']) | (point_df_eda['pointWonBy'] == shot_data.iloc[0]['teamAPlayer2']))])
poach_and_won_percentage = (poach_and_won / total_poach) * 100

print(f"Total Points Poached: {total_poach}")
print(f"Percentage of Total Points Poached: {percentage_poach:.2f}")
print(f"Poach Points Won: {poach_and_won}")
print(f"Poach Points Win %: {poach_and_won_percentage:.2f}")


# # Total points where atNetPlayer1 = first_player1Name
# total_at_net_player1 = len(point_df_eda[point_df_eda['atNetPlayer1'] == first_player1Name])

# # Percentage of points where atNetPlayer1 = 1 out of total points
# percentage_at_net_player1 = (total_at_net_player1 / len(point_df_eda)) * 100 if len(point_df_eda) > 0 else 0

# # Display the total count and percentage of points where atNetPlayer1 = 1
# print(f"Total Net Points for {first_player1Name}: {total_at_net_player1}")
# print(f"Percentage of Net Points for {first_player1Name}: {percentage_at_net_player1:.2f}%")

# # Points where atNetPlayer1 = first_player1Name and pointWonBy = first_player1Name
# at_net_player1_and_won_by_player1 = len(point_df_eda[(point_df_eda['atNetPlayer1'] == first_player1Name) & (point_df_eda['pointWonBy'] == first_player1Name)])

# # Percentage of points where atNetPlayer1 = first_player1Name and pointWonBy = first_player1Name out of total points where atNetPlayer1 = 1
# percentage_at_net_player1_and_won_by_player1 = (at_net_player1_and_won_by_player1 / total_at_net_player1) * 100 if total_at_net_player1 > 0 else 0

# # Display the count and percentage of points where atNetPlayer1 = 1 and pointWonBy = first_player1Name
# print(f"\nTotal Net Points won by {first_player1Name}: {at_net_player1_and_won_by_player1}")
# print(f"Percentage of Net Points won by {first_player1Name}: {percentage_at_net_player1_and_won_by_player1:.2f}%")



Serve Results for Spencer Johnson:
18

Total Serves: 18
First Serve In (Count): 14
First Serve Won (Count): 8
First Serve In (%): 77.78%
First Serve Won (%): 57.14%
Second Serve In (Count): 4
Second Serve Total (Count): 4
Second Serve Won (Count): 0
Second Serve In (%): 100.00%
Second Serve Won (%): 0.00%
Ace (Count): 4
Double Fault (Count): 0
Points Won on Serve (Count) 8
Points Won on Serve (%): 44.44%

Return Results for Spencer Johnson:

Total Return (Count): 22
Return Won (Count): 10
Return Won (%): 58.82352941176471

Return Made (Count): 17
Return Made (%): 0.7727272727272727
Return Error (Count): 2
Return Winner (Count): 0

Deuce Return (Count): 3
Deuce Return Made (Count): 1
Deuce Return Made (%): 0.3333333333333333
Deuce Return Won by Player1 (%): 200.0
Deuce Return Won by Player1 (Count): 2

Ad Return (Count): 19
Ad Return Made (Count): 16
Ad Return Made (%): 0.8421052631578947
Ad Return Won by Player1 (Count): 8
Ad Return Won by Player1 (%): 50.0

Deuce Forehand Return Poin

In [None]:
# Caitlin Breakpoint Data

# points_returned = point_df_eda[point_df_eda[]]

# print(f"\nBreakpoint Results for {first_player1Name}:\n")

# # Total points where isBreakPoint = 1
# total_breakpoint = len(point_df_eda[point_df_eda['isBreakPoint'] == 1])

# # Points where isBreakPoint = 1 and pointWonBy = first_player1Name
# breakpoint_and_won_by_player1 = len(point_df_eda[(point_df_eda['isBreakPoint'] == 1) & (point_df_eda['pointWonBy'] == first_player1Name)])

# # Percentage of points where isBreakPoint = 1 and pointWonBy = first_player1Name out of total points where isBreakPoint = 1
# percentage_breakpoint_and_won_by_player1 = (breakpoint_and_won_by_player1 / total_breakpoint) * 100 if total_breakpoint > 0 else 0

# # Display the total count of points where isBreakPoint = 1
# print(f"Total Breakpoints: {total_breakpoint}")

# # Display the count and percentage of points where isBreakPoint = 1 and pointWonBy = first_player1Name
# print(f"Total Breakpoints won by {first_player1Name}: {breakpoint_and_won_by_player1}")
# print(f"Percentage of Breakpoints won by {first_player1Name}: {percentage_breakpoint_and_won_by_player1:.2f}%")

# # Total points where isBreakPoint = 1 and serverName = first_player1Name
# total_breakpoint_serve = len(point_df_eda[(point_df_eda['isBreakPoint'] == 1) & (point_df_eda['serverName'] == first_player1Name)])

# # Points where isBreakPoint = 1, serverName = first_player1Name, and pointWonBy = first_player1Name
# breakpoint_and_won_by_player1_serve = len(point_df_eda[(point_df_eda['isBreakPoint'] == 1) & (point_df_eda['serverName'] == first_player1Name) & (point_df_eda['pointWonBy'] == first_player1Name)])

# # Percentage of points where isBreakPoint = 1, serverName = first_player1Name, and pointWonBy = first_player1Name out of total points where isBreakPoint = 1 and serverName = first_player1Name
# percentage_breakpoint_and_won_by_player1_serve = (breakpoint_and_won_by_player1_serve / total_breakpoint_serve) * 100 if total_breakpoint_serve > 0 else 0

# # Display the total count of points where isBreakPoint = 1 and serverName = first_player1Name
# print(f"\nTotal Breakpoints on Serve for {first_player1Name}: {total_breakpoint_serve}")

# # Display the count and percentage of points where isBreakPoint = 1, serverName = first_player1Name, and pointWonBy = first_player1Name
# print(f"Total Breakpoints won on Serve by {first_player1Name}: {breakpoint_and_won_by_player1_serve}")
# print(f"Percentage of Breakpoints won on Serve by {first_player1Name}: {percentage_breakpoint_and_won_by_player1_serve:.2f}%")

# Total points where isBreakPoint = 1 and returnerName = first_player1Name
total_breakpoint_return = len(point_df_eda[(point_df_eda['isBreakPoint'] == 1) & (point_df_eda['returnerName'] == first_player1Name)])

# Points where isBreakPoint = 1, returnerName = first_player1Name, and pointWonBy = first_player1Name
breakpoint_and_won_by_player1_return = len(point_df_eda[(point_df_eda['isBreakPoint'] == 1) & (point_df_eda['returnerName'] == first_player1Name) & (point_df_eda['pointWonBy'] == first_player1Name)])

# Percentage of points where isBreakPoint = 1, returnerName = first_player1Name, and pointWonBy = first_player1Name out of total points where isBreakPoint = 1 and returnerName = first_player1Name
percentage_breakpoint_and_won_by_player1_return = (breakpoint_and_won_by_player1_return / total_breakpoint_return) * 100 if total_breakpoint_return > 0 else 0

# Display the total count of points where isBreakPoint = 1 and returnerName = first_player1Name
print(f"\nTotal Breakpoints on Return for {first_player1Name}: {total_breakpoint_return}")

# Display the count and percentage of points where isBreakPoint = 1, returnerName = first_player1Name, and pointWonBy = first_player1Name
print(f"Total Breakpoints won on Return by {first_player1Name}: {breakpoint_and_won_by_player1_return}")
print(f"Percentage of Breakpoints won on Return by {first_player1Name}: {percentage_breakpoint_and_won_by_player1_return:.2f}%")

# Jimmy Returning Games Won
# games won/returning games by Jimmy Hou

# points_returned = point_df_eda[point_df_eda["returnerName"] == first_player1Name]

# # Return percentage won on first serve
# first_serves_won = points_returned[(points_returned['firstServeIn'] == 1) & (points_returned['pointWonBy'] == first_player1Name)]
# total_first_serves = points_returned[points_returned['firstServeIn'] == 1]
# fs_won_per_player1 = 100 * len(first_serves_won) / len(total_first_serves)
# # print(f"\n{player1} won {fs_won_per_player1:.2f}% of first serves returned.")
# print(f"\nPerecentage of Breakpoinnts won by {first_player1Name} on Return when returning a first serve: {fs_won_per_player1:.2f}%")

# # Return percentage won on second serve
# second_serves_won = points_returned[(points_returned['secondServeIn'] == 1) & (points_returned['pointWonBy'] == first_player1Name)]
# total_second_serves = points_returned[points_returned['secondServeIn'] == 1]
# ss_won_per_player1 = 100 * len(second_serves_won) / len(total_second_serves)
# # print(f"{player1} won {ss_won_per_player1:.2f}% of second serves returned."
# print(f"Perecentage of Breakpoinnts won by {first_player1Name} on Return when returning a second serve: {ss_won_per_player1:.2f}%")



Total Breakpoints on Return for Emon van Loben Sels: 0
Total Breakpoints won on Return by Emon van Loben Sels: 0
Percentage of Breakpoints won on Return by Emon van Loben Sels: 0.00%


In [None]:
# Filter points where serverName is equal to first_player1Name
filtered_points = point_df[point_df['serverName'] == first_player1Name]

# Group the filtered points by player1ServePlacement and count the occurrences
serve_placement_counts = filtered_points.groupby('teamAPlayer1ServePlacement').size()

serve_placements_ad = serve_placement_counts.filter(like='Ad').sum()
serve_placements_deuce = serve_placement_counts.filter(like='Deuce').sum()

# Iterate over filtered_points
for index, point in filtered_points.iterrows():
    serve_placement = point['teamAPlayer1ServePlacement']

    # Check if serve placement is not in serve_placement_counts
    if serve_placement not in serve_placement_counts:
        print(point)


# Initialize dictionaries to store counts and percentages
point_won_counts = {}
point_won_percentages = {}
print(f"Total {len(filtered_points)}")

# Iterate over serve placements
for serve_placement, count in serve_placement_counts.items():
    # Filter points with the specific serve placement
    serve_placement_points = filtered_points[filtered_points['teamAPlayer1ServePlacement'] == serve_placement]

    # Count points won by first_player1Name
    point_won_count = serve_placement_points[serve_placement_points['pointWonBy'] == first_player1Name].shape[0]

    # Calculate percentage
    point_won_percentage = (point_won_count / count) * 100 if count > 0 else 0

    # Store counts and percentages
    point_won_counts[serve_placement] = point_won_count
    point_won_percentages[serve_placement] = point_won_percentage





# Filter serve placements for Ad and Deuce [CHANGED: BRIAN NTOES]
serve_placements_ad = serve_placement_counts.filter(like='Ad').sum()
serve_placements_deuce = serve_placement_counts.filter(like='Deuce').sum()

# Print counts and percentages [CHANGED: BRIAN NOTES]
for serve_placement, count in serve_placement_counts.items():
    print(f"Serve Placement: {serve_placement}")
    print(f"Total Serves: {count}")

    if "Deuce" in serve_placement: # [CHANGED: BRIAN NOTES]
        deuce_serve_format_number = f"{count/serve_placements_deuce:.2f}"
        deuce_serve_percent = round(float(deuce_serve_format_number) * 100,2)
        print(f"Serve Frequency: {deuce_serve_percent}% ({count}/{serve_placements_deuce}) Deuce Serves")
    if "Ad" in serve_placement: # [CHANGED: BRIAN NOTES]
        ad_serve_format_number = f"{count/serve_placements_ad:.2f}"
        ad_serve_percent = round(float(ad_serve_format_number) * 100,2)
        print(f"Serve Frequency: {ad_serve_percent}% ({count}/{serve_placements_ad}) Ad Serves")

    print(f"Serves Won by {first_player1Name}: {point_won_counts.get(serve_placement, 0)}")
    print(f"Percentage: {point_won_percentages.get(serve_placement, 0):.2f}%\n")


# print("This is Ad count: " + str(len(point_df[(point_df['serverName'] == first_player1Name) & (point_df['side'] == 'Ad')])))
# print("This is Deuce count: " + str(len(point_df[(point_df['serverName'] == first_player1Name) & (point_df['side'] == 'Deuce')])))

Total 21
Serve Placement: 
Total Serves: 21
Serves Won by Emon van Loben Sels: 9
Percentage: 42.86%



In [None]:
first_player1Name = shot_data.iloc[0]['teamAPlayer1']
print(f"\nError Data for {first_player1Name}:\n")
# Filter the DataFrame based on specified conditions
total_errors = point_df[(point_df['lastShotHitBy'] == first_player1Name) &
                           (point_df['lastShotResult'] == 'Error')]

import numpy as np

# Filter rows without NaN values in relevant columns
forehand_errors = point_df[(point_df['lastShotHitBy'] == first_player1Name) &
                           (point_df['lastShotResult'] == 'Error') &
                           (point_df['lastShotFhBh'] == 'Forehand') &
                           (~point_df['errorType'].isnull())]  # Ensure 'errorType' column doesn't have NaN

backhand_errors = point_df[(point_df['lastShotHitBy'] == first_player1Name) &
                           (point_df['lastShotResult'] == 'Error') &
                           (point_df['lastShotFhBh'] == 'Backhand') &
                           (~point_df['errorType'].isnull())]  # Ensure 'errorType' column doesn't have NaN

# Count the occurrences of 'Forehand' and 'Backhand' separately
forehand_counts = forehand_errors.shape[0]  # Count rows
backhand_counts = backhand_errors.shape[0]  # Count rows

# Print the total error counts for verification
total_error_counts = forehand_counts + backhand_counts



# Desired output order
desired_order = ['Net', 'Long', 'Wide Right', 'Wide Left']

# # Get value counts of 'errorType' for Forehand errors [CHANGED: BRIAN]
# forehand_error_types = forehand_errors['errorType'].value_counts(dropna=False).loc[desired_order]  # Include NaN values in count
# forehand_error_types_df = pd.DataFrame(forehand_error_types) # change into dataframe to erase object line
# Get value counts of 'errorType' for Backhand errors



######################### CHANGED SECTION ##############################################
# [CHANGED: BRIAN]

forehand_error_types = forehand_errors['errorType'].value_counts(dropna=False)

# Create a Series with desired index containing zeros
zeros_series = pd.Series(0, index=desired_order)

# Combine the original Series with the zeros Series
forehand_error_types_combined = forehand_error_types.combine(zeros_series, max, fill_value=0)

# Reindex the Series to follow the desired order
forehand_error_types_ordered = forehand_error_types_combined.reindex(desired_order, fill_value=0)

# Create the DataFrame
forehand_error_types_df = pd.DataFrame(forehand_error_types_ordered, columns=['Count'])

# Get value counts of 'errorType' for Backhand errors [CHANGED: BRIAN]
# backhand_error_types = backhand_errors['errorType'].value_counts(dropna=False).loc[desired_order]  # Include NaN values in count
# backhand_error_types_df = pd.DataFrame(backhand_error_types) # change into dataframe to erase object line

# Get value counts of 'errorType' for Backhand errors
backhand_error_types = backhand_errors['errorType'].value_counts(dropna=False)

# Combine the original Series with the zeros Series
backhand_error_types_combined = backhand_error_types.combine(zeros_series, max, fill_value=0)

# Reindex the Series to follow the desired order
backhand_error_types_ordered = backhand_error_types_combined.reindex(desired_order, fill_value=0)

# Create the DataFrame
backhand_error_types_df = pd.DataFrame(backhand_error_types_ordered, columns=['Count'])



######################### CHANGED SECTION ##############################################


# Print the counts and error types
print("Count of Total errors:", total_error_counts)
print("Count of Forehand errors:", forehand_counts)
print(f"Forehand Error %: {(forehand_counts/total_error_counts)*100:.2f}%")
print("Count of Backhand errors:", backhand_counts)
print(f"Backhand Error %: {(backhand_counts/total_error_counts)*100:.2f}%")
print("\nForehand errors:\n", forehand_error_types_df)


# # Group by both 'lastShotDirection' and 'errorType', and then count occurrences
forehand_error_counts = forehand_errors.groupby(['teamAPlayer1LastShotPlacement', 'errorType']).size().unstack(fill_value=0)  # Fill NaN with 0
forehand_error_counts_ordered = forehand_error_counts.reindex(columns = desired_order) # [CHANGED: BRIAN]

print("\nValue counts of 'errorType' for Forehand errors with different directions:\n", forehand_error_counts_ordered)


print("\nBackhand errors:\n", backhand_error_types_df)


# Group by both 'lastShotDirection' and 'errorType', and then count occurrences
backhand_error_counts = backhand_errors.groupby(['teamAPlayer1LastShotPlacement', 'errorType']).size().unstack(fill_value=0) # Fill NaN with 0
backhand_error_counts_ordered = backhand_error_counts.reindex(columns = desired_order) # [CHANGED: BRIAN]

print("\nValue counts of 'errorType' for Backhand errors with different directions:\n", backhand_error_counts_ordered)




# ----- PLAYER 2 -----
first_player1Name = shot_data.iloc[0]['teamAPlayer2']

print(f"\nError Data for {first_player1Name}:\n")
# Filter the DataFrame based on specified conditions
total_errors = point_df[(point_df['lastShotHitBy'] == first_player1Name) &
                           (point_df['lastShotResult'] == 'Error')]

import numpy as np

# Filter rows without NaN values in relevant columns
forehand_errors = point_df[(point_df['lastShotHitBy'] == first_player1Name) &
                           (point_df['lastShotResult'] == 'Error') &
                           (point_df['lastShotFhBh'] == 'Forehand') &
                           (~point_df['errorType'].isnull())]  # Ensure 'errorType' column doesn't have NaN

backhand_errors = point_df[(point_df['lastShotHitBy'] == first_player1Name) &
                           (point_df['lastShotResult'] == 'Error') &
                           (point_df['lastShotFhBh'] == 'Backhand') &
                           (~point_df['errorType'].isnull())]  # Ensure 'errorType' column doesn't have NaN

# Count the occurrences of 'Forehand' and 'Backhand' separately
forehand_counts = forehand_errors.shape[0]  # Count rows
backhand_counts = backhand_errors.shape[0]  # Count rows

# Print the total error counts for verification
total_error_counts = forehand_counts + backhand_counts



# Desired output order
desired_order = ['Net', 'Long', 'Wide Right', 'Wide Left']

# # Get value counts of 'errorType' for Forehand errors [CHANGED: BRIAN]
# forehand_error_types = forehand_errors['errorType'].value_counts(dropna=False).loc[desired_order]  # Include NaN values in count
# forehand_error_types_df = pd.DataFrame(forehand_error_types) # change into dataframe to erase object line
# Get value counts of 'errorType' for Backhand errors



######################### CHANGED SECTION ##############################################
# [CHANGED: BRIAN]

forehand_error_types = forehand_errors['errorType'].value_counts(dropna=False)

# Create a Series with desired index containing zeros
zeros_series = pd.Series(0, index=desired_order)

# Combine the original Series with the zeros Series
forehand_error_types_combined = forehand_error_types.combine(zeros_series, max, fill_value=0)

# Reindex the Series to follow the desired order
forehand_error_types_ordered = forehand_error_types_combined.reindex(desired_order, fill_value=0)

# Create the DataFrame
forehand_error_types_df = pd.DataFrame(forehand_error_types_ordered, columns=['Count'])

# Get value counts of 'errorType' for Backhand errors [CHANGED: BRIAN]
# backhand_error_types = backhand_errors['errorType'].value_counts(dropna=False).loc[desired_order]  # Include NaN values in count
# backhand_error_types_df = pd.DataFrame(backhand_error_types) # change into dataframe to erase object line

# Get value counts of 'errorType' for Backhand errors
backhand_error_types = backhand_errors['errorType'].value_counts(dropna=False)

# Combine the original Series with the zeros Series
backhand_error_types_combined = backhand_error_types.combine(zeros_series, max, fill_value=0)

# Reindex the Series to follow the desired order
backhand_error_types_ordered = backhand_error_types_combined.reindex(desired_order, fill_value=0)

# Create the DataFrame
backhand_error_types_df = pd.DataFrame(backhand_error_types_ordered, columns=['Count'])



######################### CHANGED SECTION ##############################################


# Print the counts and error types
print("Count of Total errors:", total_error_counts)
print("Count of Forehand errors:", forehand_counts)
print(f"Forehand Error %: {(forehand_counts/total_error_counts)*100:.2f}%")
print("Count of Backhand errors:", backhand_counts)
print(f"Backhand Error %: {(backhand_counts/total_error_counts)*100:.2f}%")
print("\nForehand errors:\n", forehand_error_types_df)


# # Group by both 'lastShotDirection' and 'errorType', and then count occurrences
forehand_error_counts = forehand_errors.groupby(['teamAPlayer1LastShotPlacement', 'errorType']).size().unstack(fill_value=0)  # Fill NaN with 0
forehand_error_counts_ordered = forehand_error_counts.reindex(columns = desired_order) # [CHANGED: BRIAN]

print("\nValue counts of 'errorType' for Forehand errors with different directions:\n", forehand_error_counts_ordered)


print("\nBackhand errors:\n", backhand_error_types_df)


# Group by both 'lastShotDirection' and 'errorType', and then count occurrences
backhand_error_counts = backhand_errors.groupby(['teamAPlayer1LastShotPlacement', 'errorType']).size().unstack(fill_value=0) # Fill NaN with 0
backhand_error_counts_ordered = backhand_error_counts.reindex(columns = desired_order) # [CHANGED: BRIAN]

print("\nValue counts of 'errorType' for Backhand errors with different directions:\n", backhand_error_counts_ordered)


Error Data for Spencer Johnson:

Count of Total errors: 12
Count of Forehand errors: 6
Forehand Error %: 50.00%
Count of Backhand errors: 6
Backhand Error %: 50.00%

Forehand errors:
             Count
Net             4
Long            2
Wide Right      0
Wide Left       0

Value counts of 'errorType' for Forehand errors with different directions:
 errorType                      Net  Long  Wide Right  Wide Left
teamAPlayer1LastShotPlacement                                  
Crosscourt                       3     0         NaN        NaN
Down the Line                    1     2         NaN        NaN

Backhand errors:
             Count
Net             4
Long            1
Wide Right      1
Wide Left       0

Value counts of 'errorType' for Backhand errors with different directions:
 errorType                      Net  Long  Wide Right  Wide Left
teamAPlayer1LastShotPlacement                                  
Crosscourt                       3     1           1        NaN
Down the Line 

In [None]:
forehand_errors.columns

Index(['Name', 'pointNumber', 'setNum', 'gameNumber', 'teamAPlayer1',
       'teamAPlayer2', 'teamBPlayer1', 'teamBPlayer2', 'pointScore',
       'gameScore', 'setScore', 'tiebreakScore', 'side', 'serverName',
       'returnerName', 'Position', 'pointEndPosition', 'Duration',
       'pointWonBy', 'rallyCount', 'rallyCountFreq', 'firstServeIn',
       'secondServeIn', 'serveResult', 'serveInPlacement', 'firstServeZone',
       'secondServeZone', 'isAce', 'returnDirection', 'returnFhBh',
       'errorType', 'returnError', 'lastShotDirection', 'lastShotFhBh',
       'lastShotHitBy', 'lastShotResult', 'isBreakPoint', 'bothAtNetTeamA',
       'bothAtNetTeamB', 'player1SetScore', 'player2SetScore',
       'player1GameScore', 'player2GameScore', 'player1PointScore',
       'player2PointScore', 'player1TiebreakScore', 'player2TiebreakScore',
       'teamAPlayer1ServeResult', 'teamAPlayer1ServePlacement',
       'teamAPlayer1ReturnPlacement', 'teamAPlayer1ReturnFhBh',
       'teamAPlayer1LastSh

# CONDENSED REPORT (NEEDS UPDATE)

In [None]:
# pip install reportlab

In [None]:
# from reportlab.lib import colors
# from reportlab.lib.pagesizes import letter
# from reportlab.platypus import SimpleDocTemplate, Table, TableStyle

# # Your raw output data
# raw_data = [
#     {"Serve Placement": "Ad: Body", "Total Serves": 3, "Serves Won by Tian Fang Ran": 3, "Percentage": "100.00%"},
#     {"Serve Placement": "Ad: T", "Total Serves": 11, "Serves Won by Tian Fang Ran": 8, "Percentage": "72.73%"},
#     {"Serve Placement": "Ad: Wide", "Total Serves": 19, "Serves Won by Tian Fang Ran": 10, "Percentage": "52.63%"},
#     {"Serve Placement": "Deuce: Body", "Total Serves": 5, "Serves Won by Tian Fang Ran": 2, "Percentage": "40.00%"},
#     {"Serve Placement": "Deuce: T", "Total Serves": 19, "Serves Won by Tian Fang Ran": 10, "Percentage": "52.63%"},
#     {"Serve Placement": "Deuce: Wide", "Total Serves": 14, "Serves Won by Tian Fang Ran": 7, "Percentage": "50.00%"}
# ]

# # Convert raw data to list of lists for table
# table_data = [["Serve Placement", "Total Serves", "Serves Won by Tian Fang Ran", "Percentage"]]
# for row in raw_data:
#     table_data.append([row["Serve Placement"], row["Total Serves"], row["Serves Won by Tian Fang Ran"], row["Percentage"]])

# # Create PDF
# pdf_filename = "tennis_stats.pdf"
# doc = SimpleDocTemplate(pdf_filename, pagesize=letter)
# table = Table(table_data)

# # Add style to table
# style = TableStyle([('BACKGROUND', (0, 0), (-1, 0), colors.gray),
#                     ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
#                     ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
#                     ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
#                     ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
#                     ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
#                     ('GRID', (0, 0), (-1, -1), 1, colors.black)])
# table.setStyle(style)

# # Add table to PDF
# doc.build([table])
