# Post Process
- Adds Point Number (Comment out if already there)
- Forward Fills these columns'gameScore','setScore', 'tiebreakScore', 'serverName', 'player1Name', 'player2Name'
   - Add this when in tagger: Player1Hand, player2Hand 
- Adds Returner Name
- Adds Shot Hit By for each Shot
- Adds isInsidein/isInsideOut (booleans 0/1) Columns

In [1265]:
import pandas as pd
import numpy as np

# Load in Data

In [1266]:
# Put your file name here
shot_data = pd.read_csv("./oldtagger/nandaPerotFullMatchAgg.csv")



# Revert Dates in setScore/gameScore to Strings

# Clean because match has both taggers

In [1267]:
import re

# Define a mapping for month abbreviations
month_mapping = {'Jan': '1', 'Feb': '2', 'Mar': '3', 'Apr': '4', 'May': '5', 'Jun': '6',
                 'Jul': '7', 'Aug': '8', 'Sep': '9', 'Oct': '10', 'Nov': '11', 'Dec': '12'}

# Function to convert string like 'Jan-00' to '1-0'
def convert_score_string(score_str):
    # Check if the string has a month abbreviation and a year ending with '00'
    if re.match(r'^\d{1,2}-[A-Za-z]{3}$', score_str):
        # Extract year and month abbreviation
        year, month = score_str.split('-')

        # Remove leading zeros from the year
        year = str(int(year))

        # Replace month abbreviation with corresponding number
        month_number = month_mapping.get(month, month)

        # Concatenate the parts to form the transformed string
        transformed_str = f'{year}-{month_number}'

        return transformed_str

    # Check if the string has a month abbreviation and a year with leading '0's
    elif re.match(r'^[A-Za-z]{3}-\d{1,2}$', score_str):
        # Extract month abbreviation and year
        month, year = score_str.split('-')

        # Replace month abbreviation with corresponding number
        month_number = month_mapping.get(month, month)

        # Remove leading zeros from the year
        year = str(int(year))

        # Concatenate the parts to form the transformed string
        transformed_str = f'{month_number}-{year}'

        return transformed_str

    return score_str

# Apply the conversion function to the relevant columns in shot_data
shot_data['gameScore'] = shot_data['gameScore'].apply(convert_score_string)
shot_data['setScore'] = shot_data['setScore'].apply(convert_score_string)
# shot_data['tiebreakerScore'] = shot_data['tiebreakerScore'].apply(convert_score_string)


In [1268]:
# Assuming shot_data is your DataFrame
unique_set_scores = shot_data['setScore'].unique()
unique_game_scores = shot_data['gameScore'].unique()

# Print unique values
print("Unique Set Scores:", unique_set_scores)
print("Unique Game Scores:", unique_game_scores)


Unique Set Scores: ['0-0' '1-0']
Unique Game Scores: ['0-0' '1-0' '1-1' '2-1' '3-1' '3-2' '4-2' '4-3' '5-3' '5-4' '5-5' '6-5'
 '0-1']


In [1269]:
# Create new columns
shot_data['shotFhBh'] = shot_data['shotType'].apply(lambda x: 'Forehand' if 'Forehand' in str(x) else ('Backhand' if 'Backhand' in str(x) else None))
shot_data['shotDirection'] = shot_data['shotType'].apply(lambda x: 'Crosscourt' if 'Crosscourt' in str(x) else ('Down the Line' if 'Down the Line' in str(x) else None))


shot_data.loc[pd.notna(shot_data['returnData']), 'shotFhBh'] = shot_data.loc[pd.notna(shot_data['returnData']), 'returnData'].apply(lambda x: 'Forehand' if 'Forehand' in str(x) else ('Backhand' if 'Backhand' in str(x) else None))
shot_data.loc[pd.notna(shot_data['returnData']), 'shotDirection'] = shot_data.loc[pd.notna(shot_data['returnData']), 'returnData'].apply(lambda x: 'Crosscourt' if 'Crosscourt' in str(x) else ('Down the Line' if 'Down the Line' in str(x) else None))


# Display the resulting DataFrame
print(shot_data[['shotType', 'returnData', 'shotFhBh', 'shotDirection']].head(15))

                  shotType              returnData  shotFhBh  shotDirection
0                      NaN                     NaN      None           None
1                      NaN     Forehand Crosscourt  Forehand     Crosscourt
2      Forehand Crosscourt                     NaN  Forehand     Crosscourt
3   Forehand Down the Line                     NaN  Forehand  Down the Line
4      Backhand Crosscourt                     NaN  Backhand     Crosscourt
5                      NaN                     NaN      None           None
6                      NaN     Backhand Crosscourt  Backhand     Crosscourt
7   Forehand Down the Line                     NaN  Forehand  Down the Line
8      Forehand Crosscourt                     NaN  Forehand     Crosscourt
9   Forehand Down the Line                     NaN  Forehand  Down the Line
10     Backhand Crosscourt                     NaN  Backhand     Crosscourt
11  Backhand Down the Line                     NaN  Backhand  Down the Line
12          

In [1270]:
# IF NO SIDE Column (Old tagger)
deuce_values = ['0-0', '15-15', '40-15', '30-0', '0-30', '15-40', '40-40 (Deuce)', '30-30']
shot_data['side'] = shot_data.apply(lambda row: 'Deuce' if str(row['pointScore']) in deuce_values and row['isPointStart'] == 1 else 'Ad' if str(row['pointScore']) is not None and row['isPointStart'] == 1 else None, axis=1)

# Display the resulting DataFrame
print(shot_data[['shotType', 'shotFhBh', 'shotDirection', 'pointScore', 'side']].head(10))

                 shotType  shotFhBh  shotDirection pointScore   side
0                     NaN      None           None        0-0  Deuce
1                     NaN  Forehand     Crosscourt        0-0   None
2     Forehand Crosscourt  Forehand     Crosscourt        0-0   None
3  Forehand Down the Line  Forehand  Down the Line        0-0   None
4     Backhand Crosscourt  Backhand     Crosscourt        0-0   None
5                     NaN      None           None       15-0     Ad
6                     NaN  Backhand     Crosscourt       15-0   None
7  Forehand Down the Line  Forehand  Down the Line       15-0   None
8     Forehand Crosscourt  Forehand     Crosscourt       15-0   None
9  Forehand Down the Line  Forehand  Down the Line       15-0   None


In [1271]:
# Check Columns
shot_data.columns

Index(['pointScore', 'gameScore', 'setScore', 'isPointStart', 'pointStartTime',
       'isPointEnd', 'pointEndTime', 'shotInRally', 'serverName',
       'firstServeIn', 'firstServeZone', 'firstServeXCoord',
       'firstServeYCoord', 'secondServeIn', 'secondServeZone',
       'secondServeXCoord', 'secondServeYCoord', 'returnData',
       'returnContactX', 'returnContactY', 'returnPlacementX',
       'returnPlacementY', 'shotContactX', 'shotContactY', 'shotType',
       'Topspin', 'isVolley', 'isApproach', 'isDropshot', 'shotLocationX',
       'shotLocationY', 'isWinner', 'isErrorWideR', 'isErrorWideL',
       'isErrorNet', 'isErrorLong', 'clientTeam', 'Date', 'Division', 'Event',
       'lineupPosition', 'matchDetails', 'opponentTeam', 'player1Name',
       'player2Name', 'Round', 'Surface', 'Notes', 'pointNumber',
       'returnerName', 'shotHitBy', 'isInsideOut', 'isInsideIn', 'shotFhBh',
       'shotDirection', 'side'],
      dtype='object')

In [1272]:
# Count NaN or empty values in specified columns
na_counts = shot_data[['shotInRally', 'pointScore', 'gameScore', 'setScore', 'side']].isna().sum()

# Display the counts
print(na_counts)



shotInRally      0
pointScore       0
gameScore        0
setScore         0
side           580
dtype: int64


In [1273]:
# Filter rows where 'shotInRally' is NaN
na_rows = shot_data[shot_data['shotInRally'].isna()]

# Display the rows
print(na_rows)

Empty DataFrame
Columns: [pointScore, gameScore, setScore, isPointStart, pointStartTime, isPointEnd, pointEndTime, shotInRally, serverName, firstServeIn, firstServeZone, firstServeXCoord, firstServeYCoord, secondServeIn, secondServeZone, secondServeXCoord, secondServeYCoord, returnData, returnContactX, returnContactY, returnPlacementX, returnPlacementY, shotContactX, shotContactY, shotType, Topspin, isVolley, isApproach, isDropshot, shotLocationX, shotLocationY, isWinner, isErrorWideR, isErrorWideL, isErrorNet, isErrorLong, clientTeam, Date, Division, Event, lineupPosition, matchDetails, opponentTeam, player1Name, player2Name, Round, Surface, Notes, pointNumber, returnerName, shotHitBy, isInsideOut, isInsideIn, shotFhBh, shotDirection, side]
Index: []

[0 rows x 56 columns]


# Cleaning Shot DF

In [1274]:
# Make Scores Strings not Date Time
columns_to_convert = ['gameScore', 'setScore'] #if no tiebreakScore
# columns_to_convert = ['gameScore', 'setScore', 'tiebreakScore']
shot_data[columns_to_convert] = shot_data[columns_to_convert].astype(object)

In [1275]:
# Adds Point Number, by incrementing when isPointStart = 1

point_starts = (shot_data['isPointStart'] == 1) 
shot_data['pointNumber'] = point_starts.cumsum()

# print(shot_data['pointNumber'].tail(10))

In [1276]:
# Forward Fill GameScore, SetScore, tiebreakScore, serverName, player1Name, player2Name

columns_to_fill = ['gameScore','setScore', 'serverName', 'player1Name', 'player2Name'] 
# columns_to_fill = ['gameScore','setScore', 'tiebreakScore', 'serverName', 'player1Name', 'player2Name'] 
# Add player1Hand, player2Hand when it is in tagger

for column in columns_to_fill:
    shot_data[column].replace(['', 'na'], pd.NaT, inplace=True)
    shot_data[column] = shot_data[column].ffill()

# Print the updated DataFrame
print(shot_data)


    pointScore gameScore setScore  isPointStart  pointStartTime  isPointEnd  \
0          0-0       0-0      0-0           1.0         20671.0         NaN   
1          0-0       0-0      0-0           NaN             NaN         NaN   
2          0-0       0-0      0-0           NaN             NaN         NaN   
3          0-0       0-0      0-0           NaN             NaN         NaN   
4          0-0       0-0      0-0           NaN             NaN         1.0   
..         ...       ...      ...           ...             ...         ...   
697       0-40       5-3      1-0           NaN             NaN         NaN   
698       0-40       5-3      1-0           NaN             NaN         1.0   
699      15-40       5-3      1-0           1.0       6497093.0         NaN   
700      15-40       5-3      1-0           NaN             NaN         NaN   
701      15-40       5-3      1-0           NaN             NaN         1.0   

     pointEndTime  shotInRally     serverName  firs

In [1277]:
# Get Returner Name for Shot DF

# Extract player1Name and player2Name from the first row of shot_data
first_row_shot_df = shot_data.iloc[0]
player1_name = first_row_shot_df['player1Name']
player2_name = first_row_shot_df['player2Name']

# # Can Also Manually inpute Player 1 and 2 name
# player1_name = "Kimmi Hance"
# player2_name = "Malaika Rapolu"

def get_returner_name(server_name):
    return player2_name if server_name == player1_name else player1_name

# Add 'Returner Name' column to point_df using the function
shot_data['returnerName'] = shot_data['serverName'].apply(get_returner_name)

print(f"Player 1 = {player1_name}, Player 2 = {player2_name}")
# print(shot_data[['serverName','returnerName']])

Player 1 = Govind Nanda, Player 2 = Raphael Perot


In [1278]:
# Add Shot Hit By for each Shot

shot_data['shotHitBy'] = shot_data.apply(lambda row: row['serverName'] if row['shotInRally'] % 2 == 1 else row['returnerName'], axis=1)
# print(shot_data[['serverName', 'shotHitBy','pointNumber', 'shotInRally']].head(20))

In [1279]:

# Change this with whatever is player1Hand and player2Hand first Column Values
player1hand = "Right"
player2hand = "Right"

# Add columns for isInsideOut and isInsideIn, initially set to 0
shot_data['isInsideOut'] = 0
shot_data['isInsideIn'] = 0

# Iterate through rows
for index, row in shot_data.iterrows():
    shotHitBy = row['shotHitBy']
    player_hand = player1hand if shotHitBy == row['player1Name'] else player2hand
    
    if player_hand == "Right":
        if row['side'] == "Duece" and row['shotType'] == "Backhand" and row['shotDirection'] == "Crosscourt":
            shot_data.at[index, 'isInsideOut'] = 1
        elif row['side'] == "Ad" and row['shotType'] == "Forehand" and row['shotDirection'] == "Crosscourt":
            shot_data.at[index, 'isInsideOut'] = 1
        elif row['side'] == "Duece" and row['shotType'] == "Backhand" and row['shotDirection'] == "Down the Line":
            shot_data.at[index, 'isInsideIn'] = 1
        elif row['side'] == "Ad" and row['shotType'] == "Forehand" and row['shotDirection'] == "Down the Line":
            shot_data.at[index, 'isInsideIn'] = 1
    elif player_hand == "Left":
        if row['side'] == "Ad" and row['shotType'] == "Backhand" and row['shotDirection'] == "Crosscourt":
            shot_data.at[index, 'isInsideOut'] = 1
        elif row['side'] == "Duece" and row['shotType'] == "Forehand" and row['shotDirection'] == "Crosscourt":
            shot_data.at[index, 'isInsideOut'] = 1
        elif row['side'] == "Ad" and row['shotType'] == "Backhand" and row['shotDirection'] == "Down the Line":
            shot_data.at[index, 'isInsideIn'] = 1
        elif row['side'] == "Duece" and row['shotType'] == "Forehand" and row['shotDirection'] == "Down the Line":
            shot_data.at[index, 'isInsideIn'] = 1

# Ouput Improved Shot Csv HERE

In [1280]:
# # Ouput Improved Shot Csv HERE

# your_df_name = "NandaPerotFullMatch2.csv" # edit this 

# shot_data.to_csv(your_df_name, index=False)

# Below is for Point CSV

In [1281]:
# import pandas as pd

# # Assuming shot_data is your DataFrame
# # You can replace 'shot_data' with the actual name of your DataFrame
# filtered_data = shot_data[(shot_data['shotHitBy'] == 'Govind Nanda') & (shot_data['isVolley'] == True)]
# unique_point_numbers = filtered_data['pointNumber'].unique()

# print(f"The number of unique pointNumbers where shotHitBy Govind Nanda and isVolley is True: {len(unique_point_numbers)}")
# print(f"List of unique pointNumbers: {unique_point_numbers}")


In [1282]:
# import pandas as pd

# # Assuming shot_data is your DataFrame
# # You can replace 'shot_data' with the actual name of your DataFrame
# total_volleys = shot_data[shot_data['isVolley'] == True]['pointNumber'].nunique()

# print(f"The total number of volleys (considering unique pointNumbers): {total_volleys}")


# This Needs to be adjusted if tagger template columns change !

# Create Point DF

In [1283]:
# Creating point_df (with only 1 row for each pointNumber)
point_df = shot_data.drop_duplicates(subset='pointNumber')[['pointNumber']]

# Add Side

In [1284]:
# Group shot_data by 'pointNumber' and get the first 'side' value for each group
side_values = shot_data.groupby('pointNumber')['side'].first().reset_index()

point_df['side'] = side_values['side'].values

In [1285]:
# Adds Server and Returner Names

point_df['serverName'] = shot_data.groupby('pointNumber')['serverName'].first().values
point_df['returnerName'] = shot_data.groupby('pointNumber')['returnerName'].first().values
point_df['Name'] = shot_data.groupby('pointNumber')['pointScore'].first().values

In [1286]:
# print(point_df.tail(6))

In [1287]:
# Add Start and End times per point

for index, row in shot_data.iterrows():
    point_number = row['pointNumber']
    
    if row['isPointStart'] == 1:
        point_df.loc[point_df['pointNumber'] == point_number, 'Position'] = row['pointStartTime']
    if row['isPointEnd'] == 1:
        point_df.loc[point_df['pointNumber'] == point_number, 'pointEndPosition'] = row['pointEndTime']

# Add Duration
point_df['Duration'] = point_df['pointEndPosition'] - point_df['Position']

# Add Rally Column

In [1288]:
# Find the highest shotInRally for each pointNumber in shot_data
max_rally_per_point = shot_data.groupby('pointNumber')['shotInRally'].max().reset_index()
point_df['rallyCount'] = list(max_rally_per_point['shotInRally'])

# Add 'rallyCountFreq' column based on specified conditions
point_df['rallyCountFreq'] = point_df['rallyCount'].apply(lambda x: '1 - 4' if 1 <= x <= 4 else ('5 - 8' if 5 <= x <= 8 else ('9 - 12' if 9 <= x <= 12 else ('13 +' if x >= 13 else 'Error'))))


# Serve 

In [1289]:
# Add firstServeIn and secondServeIn

# Add firstServeIn and secondServeIn columns
point_df['firstServeIn'] = 0
point_df['secondServeIn'] = 0

for point_number in shot_data['pointNumber'].unique():
    # Check if firstServeIn is 1 for the given pointNumber in shot_data
    if any((shot_data['pointNumber'] == point_number) & (shot_data['firstServeIn'] == 1)):
        point_df.loc[point_df['pointNumber'] == point_number, 'firstServeIn'] = 1
    
    # Check if secondServeIn is 1 for the given pointNumber in shot_data
    if any((shot_data['pointNumber'] == point_number) & (shot_data['secondServeIn'] == 1)):
        point_df.loc[point_df['pointNumber'] == point_number, 'secondServeIn'] = 1

In [1290]:
# Add serveResult and serveInPlacement

point_df['serveResult'] = ''
point_df['serveInPlacement'] = ''

for point_number in shot_data['pointNumber'].unique():
    if any((shot_data['pointNumber'] == point_number) & (shot_data['firstServeIn'] == 1)):
        point_df.loc[point_df['pointNumber'] == point_number, 'serveResult'] = '1st Serve In'
        point_df.loc[point_df['pointNumber'] == point_number, 'serveInPlacement'] = shot_data['firstServeZone']
    elif any((shot_data['pointNumber'] == point_number) & (shot_data['secondServeIn'] == 1)):
        point_df.loc[point_df['pointNumber'] == point_number, 'serveResult'] = '2nd Serve In'
        point_df.loc[point_df['pointNumber'] == point_number, 'serveInPlacement'] = shot_data['secondServeZone']    
    else: 
        point_df.loc[point_df['pointNumber'] == point_number, 'serveResult'] = 'Double Fault'
        point_df.loc[point_df['pointNumber'] == point_number, 'serveInPlacement'] = np.nan
        
    point_df.loc[point_df['pointNumber'] == point_number, 'firstServeZone'] = shot_data['firstServeZone']
    point_df.loc[point_df['pointNumber'] == point_number, 'secondServeZone'] = shot_data['secondServeZone']
    
# Display the result
print(point_df['serveResult'].unique())


['1st Serve In' '2nd Serve In' 'Double Fault']


In [1291]:
serve_zones = shot_data.loc[shot_data['shotInRally'] == 1, ['pointNumber', 'firstServeZone', 'secondServeZone', 'firstServeIn', 'secondServeIn']].drop_duplicates()

# Define a function to handle serveInPlacement based on conditions
def get_serve_placement(row):
    if row['firstServeIn'] == 1:
        return row['firstServeZone']
    elif row['secondServeIn'] == 1:
        return row['secondServeZone']
    else:
        return 'Double Fault'

# Apply the function to create the serveInPlacement column
point_df['serveInPlacement'] = shot_data.apply(get_serve_placement, axis=1)
point_df['firstServeZone'] = shot_data.groupby('pointNumber')['firstServeZone'].first().values
point_df['secondServeZone'] = shot_data.groupby('pointNumber')['secondServeZone'].first().values

# Display the result
print(point_df.head(10))

    pointNumber   side     serverName   returnerName   Name  Position  \
0             1  Deuce  Raphael Perot   Govind Nanda    0-0   20671.0   
5             2     Ad  Raphael Perot   Govind Nanda   15-0   52726.0   
12            3  Deuce  Raphael Perot   Govind Nanda  15-15   95309.0   
14            4     Ad  Raphael Perot   Govind Nanda  30-15  120318.0   
15            5  Deuce  Raphael Perot   Govind Nanda  30-30  156269.0   
20            6     Ad  Raphael Perot   Govind Nanda  30-40  201932.0   
28            7  Deuce   Govind Nanda  Raphael Perot    0-0  271253.0   
35            8     Ad   Govind Nanda  Raphael Perot   0-15  311691.0   
37            9  Deuce   Govind Nanda  Raphael Perot  15-15  344517.0   
54           10     Ad   Govind Nanda  Raphael Perot  30-15  398115.0   

    pointEndPosition  Duration  rallyCount rallyCountFreq  firstServeIn  \
0                NaN       NaN           5          5 - 8             1   
5                NaN       NaN           7    

In [1292]:
double_fault_rows = point_df[point_df['serveResult'] == 'Double Fault']
print(double_fault_rows)

     pointNumber   side     serverName   returnerName   Name   Position  \
14             4     Ad  Raphael Perot   Govind Nanda  30-15   120318.0   
190           33  Deuce   Govind Nanda  Raphael Perot  40-15  1415438.0   
197           35  Deuce  Raphael Perot   Govind Nanda    0-0  1527426.0   
435           75  Deuce  Raphael Perot   Govind Nanda  15-15  4333699.0   

     pointEndPosition  Duration  rallyCount rallyCountFreq  firstServeIn  \
14           135340.0   15022.0           1          1 - 4             0   
190         1442327.0   26889.0           1          1 - 4             0   
197         1547779.0   20353.0           1          1 - 4             0   
435         4349691.0   15992.0           1          1 - 4             0   

     secondServeIn   serveResult serveInPlacement firstServeZone  \
14               0  Double Fault     Double Fault           Wide   
190              0  Double Fault     Double Fault           Wide   
197              0  Double Fault     Do

# Ace

In [1293]:
point_df['isAce'] = ((point_df['rallyCount'] == 1) & ((point_df['serveInPlacement'] != "Double Fault")))

# Display the resulting DataFrame
print(point_df[['pointNumber', 'rallyCount', 'isAce']].head(14))

    pointNumber  rallyCount  isAce
0             1           5  False
5             2           7  False
12            3           2  False
14            4           1  False
15            5           5  False
20            6           8  False
28            7           7  False
35            8           2  False
37            9          17  False
54           10           9  False
63           11           1   True
64           12           3  False
67           13           5  False
72           14           8  False


# Return

In [1294]:
# Assuming point_df is your DataFrame

# Set the initial values of 'returnDirection' and 'returnHand' columns to None
point_df['returnDirection'] = None
point_df['returnFhBh'] = None

# Iterate through pointNumber in shot_data
for point_number in shot_data['pointNumber'].unique():
    # Check if shotInRally == 2 exists for the given pointNumber
    if 2 in shot_data.loc[shot_data['pointNumber'] == point_number, 'shotInRally'].values:
        # Get the information from the corresponding row
        row_with_return_info = shot_data[(shot_data['pointNumber'] == point_number) & (shot_data['shotInRally'] == 2)].iloc[0]

        # Assign values to 'returnDirection' and 'returnHand' columns
        point_df.loc[point_df['pointNumber'] == point_number, 'returnDirection'] = row_with_return_info['shotDirection']
        point_df.loc[point_df['pointNumber'] == point_number, 'returnFhBh'] = row_with_return_info['shotFhBh']

# Display the modified DataFrame
print(point_df.head(10))


    pointNumber   side     serverName   returnerName   Name  Position  \
0             1  Deuce  Raphael Perot   Govind Nanda    0-0   20671.0   
5             2     Ad  Raphael Perot   Govind Nanda   15-0   52726.0   
12            3  Deuce  Raphael Perot   Govind Nanda  15-15   95309.0   
14            4     Ad  Raphael Perot   Govind Nanda  30-15  120318.0   
15            5  Deuce  Raphael Perot   Govind Nanda  30-30  156269.0   
20            6     Ad  Raphael Perot   Govind Nanda  30-40  201932.0   
28            7  Deuce   Govind Nanda  Raphael Perot    0-0  271253.0   
35            8     Ad   Govind Nanda  Raphael Perot   0-15  311691.0   
37            9  Deuce   Govind Nanda  Raphael Perot  15-15  344517.0   
54           10     Ad   Govind Nanda  Raphael Perot  30-15  398115.0   

    pointEndPosition  Duration  rallyCount rallyCountFreq  firstServeIn  \
0                NaN       NaN           5          5 - 8             1   
5                NaN       NaN           7    

# Add Error Column

In [1295]:
# Create an empty DataFrame to store the results
error_results = pd.DataFrame(columns=['errorType', 'pointNumber'])

# Iterate through entire shot_data
for index, row in shot_data.iterrows():
    pointNumber = row['pointNumber']
    point_error_value = None
    
    if row['isErrorWideR'] == 1:
        point_error_value = 'Wide Right'
    elif row['isErrorWideL'] == 1:
        point_error_value = 'Wide Left'
    elif 'isErrorNet' in row and row['isErrorNet'] == 1:
        point_error_value = 'Net'
    elif row['isErrorLong'] == 1:
        point_error_value = 'Long'
    

    # If an error is found, append the result to the error_results DataFrame
    if point_error_value is not None:
        error_results = pd.concat([error_results, pd.DataFrame({'pointNumber': [pointNumber], 'errorType': [point_error_value]})], ignore_index=True)


# Drop duplicates based on 'pointNumber'
error_results = error_results.drop_duplicates(subset=['pointNumber'])

In [1296]:
# Create a dictionary mapping 'pointNumber' to 'errorType' in error_results
error_type_mapping = dict(zip(error_results['pointNumber'], error_results['errorType']))

# Create 'errorType' column in point_df based on the mapping
point_df['errorType'] = point_df['pointNumber'].map(error_type_mapping)

point_df = point_df.replace({np.nan: None})

# Return Stuff

In [1297]:
def get_return_error(row):
    if row['rallyCount'] == 2:
        return row['errorType']
    else:
        return None

point_df.loc[point_df['pointNumber'] == point_number, 'serveInPlacement'] = shot_data['secondServeZone']  

# Apply the functions to create the new columns
point_df['returnError'] = point_df.apply(get_return_error, axis=1)


In [1298]:
# print(point_df["returnError"].unique())
# print(point_df[['rallyCount','returnDirection','returnFhBh']])

# Last Shot

In [1299]:

point_df['lastShotDirection'] = None
point_df['lastShotFhBh'] = None

# Iterate through unique pointNumbers in shot_data
for point_number in shot_data['pointNumber'].unique():
    # Check if isPointEnd == 1 exists for the given pointNumber
    if 1 in shot_data.loc[shot_data['pointNumber'] == point_number, 'isPointEnd'].values:
        # Get the information from the corresponding row
        row_with_return_info = shot_data[(shot_data['pointNumber'] == point_number) & (shot_data['isPointEnd'] == 1)].iloc[0]

        # Assign values to 'lastShotDirection' and 'lastShotFhBh' columns
        point_df.loc[point_df['pointNumber'] == point_number, 'lastShotDirection'] = row_with_return_info['shotDirection']
        point_df.loc[point_df['pointNumber'] == point_number, 'lastShotFhBh'] = row_with_return_info['shotFhBh']

In [1300]:
# # Display the modified DataFrame
# print(point_df[['rallyCount','lastShotDirection','lastShotFhBh']])

# Shot 3 Info

In [1301]:
# point_df['shot3Direction'] = None
# point_df['shot3FhBh'] = None
# point_df['shot3Error'] = None

# # Iterate through pointNumber in shot_data
# for point_number in shot_data['pointNumber'].unique():
#     # Check if shotInRally == 2 exists for the given pointNumber
#     if 3 in shot_data.loc[shot_data['pointNumber'] == point_number, 'shotInRally'].values:
#         # Get the information from the corresponding row
#         row_with_shot3_info = shot_data[(shot_data['pointNumber'] == point_number) & (shot_data['shotInRally'] == 3)].iloc[0]

#         # Assign values to 'returnDirection' and 'returnHand' columns
#         point_df.loc[shot_data['pointNumber'] == point_number, 'shot3Direction'] = row_with_shot3_info['shotDirection']
#         point_df.loc[shot_data['pointNumber'] == point_number, 'shot3FhBh'] = row_with_shot3_info['shotFhBh']

#         # Check if rallyCount is 3 for this pointNumber
#         if point_df.loc[point_df['pointNumber'] == point_number, 'rallyCount'].values[0] == 3:
#             # Assign value from 'errorType' column to 'shot3Error'
#             point_df.loc[point_df['pointNumber'] == point_number, 'shot3Error'] = point_df['errorType']

#             # # Check if isWinner is True for this row in shot_data
#             # if row_with_shot3_info['isWinner']:
#             #     point_df.loc[point_df['pointNumber'] == point_number, 'shot3Winner'] = True

# # Display the modified DataFrame
# print(point_df[['rallyCount','shot3FhBh','shot4Direction']])

In [1302]:
# import pandas as pd

# # Assuming shot_data and point_df are your DataFrames

# def get_serve_shot3_forehand_backhand(row):
#     if row['shotInRally'] == 3 and pd.notna(row['shotType']):
#         if 'Forehand' in row['shotType']:
#             return 'Forehand'
#         elif 'Backhand' in row['shotType']:
#             return 'Backhand'
#     return None

# def get_serve_shot3_placement(row):
#     if row['shotInRally'] == 3 and pd.notna(row['shotType']):
#         if 'Down the Line' in row['shotType']:
#             return 'Down the Line'
#         elif 'Crosscourt' in row['shotType']:
#             return 'Crosscourt'
#     return None

# def get_serve_shot3_error(row):
#     if row['rallyCount'] == 3:
#         return row['errorType']
#     else:
#         return None


# # Step 1: Add 'Return Forehand/Backhand' column to point_df
# point_df['Serve +1 Forehand/Backhand'] = point_df.apply(get_serve_shot3_forehand_backhand, axis=1)

# # Step 2: Add 'Return Placement' column to point_df
# point_df['Serve +1 Placement'] = point_df.apply(get_return_placement, axis=1)

# # Apply the modified function to create the new column 'Return Error Type' in shot_data
# point_df['Serve +1 Error Type'] = point_df.apply(get_return_error, axis=1)


# Shot 4 Info

In [1303]:
# point_df['shot4Direction'] = None
# point_df['shot4FhBh'] = None
# point_df['shot4Error'] = None
# shotInRally = 4

# # Iterate through pointNumber in shot_data
# for point_number in shot_data['pointNumber'].unique():
#     if shotInRally in shot_data.loc[shot_data['pointNumber'] == point_number, 'shotInRally'].values:
#         # Get the information from the corresponding row
#         row_with_shot3_info = shot_data[(shot_data['pointNumber'] == point_number) & (shot_data['shotInRally'] == shotInRally)].iloc[0]

#         # Assign values to 'returnDirection' and 'returnHand' columns
#         point_df.loc[shot_data['pointNumber'] == point_number, 'shot4Direction'] = row_with_shot3_info['shotDirection']
#         point_df.loc[shot_data['pointNumber'] == point_number, 'shot4FhBh'] = row_with_shot3_info['shotFhBh']

#         if point_df.loc[point_df['pointNumber'] == point_number, 'rallyCount'].values[0] == 3:
#             point_df.loc[point_df['pointNumber'] == point_number, 'shot4Error'] = point_df['errorType']

            
# print(point_df[['rallyCount','shot4FhBh','shot4Direction']])

In [1304]:
# print(point_df['Return +1 Error Type'].unique())

In [1305]:
# point_df.shape

In [1306]:
# serve_plus1_placement_counts = point_df['Serve +1 Placement'].value_counts()

# # Display the count of unique values in 'Serve +1 Placement'
# print(serve_plus1_placement_counts)


# last shot info

In [1307]:
# def get_last_shot_forehand_backhand(row):
#     if row['isPointEnd'] == 1 and pd.notna(row['shotType']):
#         if 'Forehand' in row['shotType']:
#             return 'Forehand'
#         elif 'Backhand' in row['shotType']:
#             return 'Backhand'
#     return None

# def get_last_shot_placement(row):
#     if row['isPointEnd'] == 1 and pd.notna(row['shotType']):
#         if 'Down the Line' in row['shotType']:
#             return 'Down the Line'
#         elif 'Crosscourt' in row['shotType']:
#             return 'Crosscourt'
#     return None

# def get_last_shot_error(row):
#     if row['isPointEnd'] == 1 and pd.notna(row['shotType']):
#         return row['errorType']
#     else:
#         return None

# # Step 1: Add 'Return Forehand/Backhand' column to point_df
# point_df['Last Shot Forehand/Backhand'] = point_df.apply(get_last_shot_forehand_backhand, axis=1)

# # Step 2: Add 'Return Placement' column to point_df
# point_df['Last Shot Placement'] = point_df.apply(get_last_shot_placement, axis=1)

#       # Apply the modified function to create the new column 'Return Error Type' in shot_data
# point_df['Last Shot Error Type'] = point_df.apply(get_last_shot_error, axis=1)

In [1308]:
# print(point_df['Last Shot Error Type'].unique())

# pointWonBy

In [1309]:
# Filter shot_data for rows where isPointEnd is 1
filtered_df = shot_data[shot_data['isPointEnd'] == 1]

# Record values for specified columns
pointWonBy_df2 = filtered_df[['player1Name', 'player2Name', 'serverName', 'isWinner', 'pointNumber', 'shotInRally']].copy()

# Determine returnerName based on serverName
pointWonBy_df2['returnerName'] = pointWonBy_df2.apply(lambda row: row['player2Name'] if row['serverName'] == row['player1Name'] else row['player1Name'], axis=1)


for index, row in pointWonBy_df2.iterrows():
    if (row['isWinner'] == 1 and row['shotInRally'] % 2 == 1) or (row['isWinner'] != 1 and row['shotInRally'] % 2 == 0):
        pointWonBy_df2.at[index, 'pointWonBy'] = row['serverName']
    elif (row['isWinner'] == 1 and row['shotInRally'] % 2 == 0) or (row['isWinner'] != 1 and row['shotInRally'] % 2 == 1):
        pointWonBy_df2.at[index, 'pointWonBy'] = row['returnerName']
    else:
        pointWonBy_df2.at[index, 'pointWonBy'] = None

point_df['pointWonBy'] = pointWonBy_df2['pointWonBy'].values

In [1310]:
# # Display the resulting DataFrame
# print(point_df)

# Break Point

In [1311]:
# List of values to check for in 'Name' column
break_point_values = ['0-40', '15-40', '30-40', '40-40 (Deuce)', '40-40 (Ad)']

# Create 'isBreakPoint' column in point_df
point_df['isBreakPoint'] = point_df['Name'].isin(break_point_values)

In [1312]:
# Display the resulting DataFrame
# print(point_df[['pointNumber', 'Name', 'isBreakPoint']])

# Add Side

In [1313]:
# # List of values to check for in 'Name' column for Deuce
# deuce_values = ['0-0' ,'15-15','40-15','30-0' , '0-30','15-40' ,'40-40 (Deuce)','30-30']

# # Create 'servingSide' column in point_df
# point_df['servingSide'] = np.where(point_df['Name'].isin(deuce_values), 'Deuce', 'Ad')

In [1314]:
# # Display the resulting DataFrame
# print(point_df[['pointNumber', 'Name', 'servingSide']])

# At Net (will be replaced by atNet Button later)

In [1315]:
import numpy as np

# Copy the DataFrame
at_net_df2 = shot_data.copy()

# Record values for specified columns
at_net_df2 = at_net_df2[['player1Name', 'player2Name', 'isVolley', 'serverName', 'shotHitBy', 'returnerName', 'pointNumber', 'shotInRally']].copy()

# Add columns for aggregation
at_net_df2['atNetPlayer1'] = np.where((at_net_df2['isVolley'] == 1) & (at_net_df2['shotHitBy'] == at_net_df2['player1Name']), 1, 0)
at_net_df2['atNetPlayer2'] = np.where((at_net_df2['isVolley'] == 1) & (at_net_df2['shotHitBy'] == at_net_df2['player2Name']), 1, 0)

# Aggregate atNetPlayer1 and atNetPlayer2 based on pointNumber
at_net_df2['atNetPlayer1_Agg'] = at_net_df2.groupby('pointNumber')['atNetPlayer1'].transform('max')
at_net_df2['atNetPlayer2_Agg'] = at_net_df2.groupby('pointNumber')['atNetPlayer2'].transform('max')

# Drop the duplicate rows created during aggregation
at_net_df2.drop_duplicates(subset=['pointNumber'], inplace=True)

point_df['atNetPlayer1'] = at_net_df2['atNetPlayer1_Agg'].values
point_df['atNetPlayer2'] = at_net_df2['atNetPlayer2_Agg'].values

In [1316]:
print(point_df.tail(10))

     pointNumber   side     serverName   returnerName   Name   Position  \
633          113     Ad   Govind Nanda  Raphael Perot   15-0  6042098.0   
637          114  Deuce   Govind Nanda  Raphael Perot   30-0  6067753.0   
646          115     Ad   Govind Nanda  Raphael Perot  30-15  6222710.0   
666          116  Deuce   Govind Nanda  Raphael Perot  40-15  6284050.0   
671          117     Ad   Govind Nanda  Raphael Perot  40-30  6322807.0   
673          118  Deuce  Raphael Perot   Govind Nanda    0-0  6351895.0   
676          119     Ad  Raphael Perot   Govind Nanda   0-15  6372657.0   
678          120  Deuce  Raphael Perot   Govind Nanda   0-30  6413797.0   
683          121     Ad  Raphael Perot   Govind Nanda   0-40  6444496.0   
699          122  Deuce  Raphael Perot   Govind Nanda  15-40  6497093.0   

    pointEndPosition Duration  rallyCount rallyCountFreq  ...  returnFhBh  \
633        6050306.0   8208.0           4          1 - 4  ...    Forehand   
637        6095938.0

# Replace Nan with None

In [1317]:
point_df = point_df.replace({np.nan: None})

# Set Score
- Add Set Score
- Add setNum

In [1318]:
# Group shot_data by 'pointNumber' and get the first value of 'setScore'
setScore_df = shot_data.groupby('pointNumber')['setScore'].first().reset_index()

point_df['setScore'] = setScore_df['setScore'].values

In [1319]:
# Extract numbers from 'setScore' and calculate the sum plus 1
point_df['setNum'] = point_df['setScore'].apply(lambda x: sum(int(char) for char in x if char.isdigit()) + 1)

In [1325]:
# Create an empty DataFrame to store gameScore and setScore
setScore = shot_data.groupby('pointNumber').first()['setScore'].reset_index()
gameScore = shot_data.groupby('pointNumber').first()['gameScore'].reset_index()
pointScore = shot_data.groupby('pointNumber').first()['pointScore'].reset_index()


point_df['setScore'] = setScore['setScore'].values
point_df['gameScore'] = gameScore['gameScore'].values
point_df['pointScore'] = pointScore['pointScore'].values

In [1326]:
# Add Game Number, Set Number, Game/Set/Point for each player

point_df[['player1SetScore', 'player2SetScore']] = point_df['setScore'].str.split('-', expand=True)
point_df[['player1GameScore', 'player2GameScore']] = point_df['gameScore'].str.split('-', expand=True)
point_df[['player1PointScore', 'player2PointScore']] = point_df['pointScore'].str.split('-', expand=True)

def calculate_game_number(score):
    return int(score.split('-')[0]) + int(score.split('-')[1])

# Apply the function to create the 'gameNumber' column
point_df['gameNumber'] = point_df['gameScore'].apply(calculate_game_number)

# EDA

# Duece Serve Placement and Win %

In [1341]:
# # Testing Serve In Placement
# import pandas as pd

# serverName = 'Govind Nanda'

# # Assuming shot_data is your DataFrame
# # You can replace 'shot_data' with the actual name of your DataFrame
# filtered_data = point_df[(point_df['serverName'] == serverName) & 
#                           (point_df['side'] == 'Deuce')]

# serve_in_placement_counts = filtered_data['serveInPlacement'].value_counts()

# print("Value counts for serveInPlacement:")
# print(serve_in_placement_counts)

# # Group by 'serveInPlacement' and count occurrences of pointWinner being 'Govind Nanda'
# server_wins_by_placement = filtered_data.groupby('serveInPlacement')['pointWonBy'].apply(lambda x: (x == serverName).sum()).reset_index()

# # Rename the columns for clarity
# server_wins_by_placement.columns = ['serveInPlacement', 'Wins']

# print(f"Wins for each Duece Serve Placement")
# print(server_wins_by_placement)


Value counts for serveInPlacement:
serveInPlacement
T               11
Wide             9
Body             6
Double Fault     1
Name: count, dtype: int64
Wins for each Duece Serve Placement
  serveInPlacement  Wins
0             Body     5
1     Double Fault     0
2                T     9
3             Wide     7


# Ad Serve Placement Win %

Value counts for serveInPlacement:
serveInPlacement
T       12
Wide    10
Body     6
Name: count, dtype: int64


In [1332]:
import pandas as pd

# Assuming point_df is your DataFrame
# You can replace 'point_df' with the actual name of your DataFrame

# Filter point_df based on 'serverName' and 'side'
filtered_data = point_df[(point_df['serverName'] == 'Govind Nanda') & (point_df['side'] == 'Ad')]

# Group by 'serveInPlacement' and count occurrences of pointWinner being 'Govind Nanda'
govind_nanda_wins_by_placement = filtered_data.groupby('serveInPlacement')['pointWonBy'].apply(lambda x: (x == 'Govind Nanda').sum()).reset_index()

# Rename the columns for clarity
govind_nanda_wins_by_placement.columns = ['serveInPlacement', 'govindNandaWins']

print("Number of times pointWinner is 'Govind Nanda' for each unique value of serveInPlacement:")
print(govind_nanda_wins_by_placement)


Number of times pointWinner is 'Govind Nanda' for each unique value of serveInPlacement:
  serveInPlacement  govindNandaWins
0             Body                3
1                T                9
2             Wide                5


In [1333]:
import pandas as pd

# Assuming point_df is your DataFrame
# You can replace 'point_df' with the actual name of your DataFrame

# Count occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In'
total_govind_nanda_serves = point_df[(point_df['serverName'] == 'Govind Nanda') & 
                                      (point_df['serveResult'] == '1st Serve In')].shape[0]

# Count occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is also 'Govind Nanda'
govind_nanda_wins_on_first_serve = point_df[(point_df['serverName'] == 'Govind Nanda') & 
                                            (point_df['serveResult'] == '1st Serve In') & 
                                            (point_df['pointWonBy'] == 'Govind Nanda')].shape[0]

# Print the counts
print(f"Occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In': {total_govind_nanda_serves}")
print(f"Occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is 'Govind Nanda': {govind_nanda_wins_on_first_serve}")


Occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In': 37
Occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is 'Govind Nanda': 26


In [1334]:
import pandas as pd

# Assuming point_df is your DataFrame
# You can replace 'point_df' with the actual name of your DataFrame

# Count occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In'
total_govind_nanda_serves = point_df[(point_df['serverName'] == 'Govind Nanda') & 
                                      (point_df['serveResult'] == '2nd Serve In')].shape[0]

# Count occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is also 'Govind Nanda'
govind_nanda_wins_on_first_serve = point_df[(point_df['serverName'] == 'Govind Nanda')].shape[0]

# Print the counts
print(f"Occurrences where serverName is 'Govind Nanda' and serveResult is '2nd Serve In': {total_govind_nanda_serves}")
print(f"Occurrences where serverName is 'Govind Nanda', serveResult is '2nd Serve In', and pointWinner is 'Govind Nanda': {govind_nanda_wins_on_first_serve}")


Occurrences where serverName is 'Govind Nanda' and serveResult is '2nd Serve In': 17
Occurrences where serverName is 'Govind Nanda', serveResult is '2nd Serve In', and pointWinner is 'Govind Nanda': 55


In [1335]:
import pandas as pd

# Assuming point_df is your DataFrame
# You can replace 'point_df' with the actual name of your DataFrame

# Count occurrences where serverName is 'Govind Nanda' and serveResult is 'Double Fault'
govind_nanda_double_faults = point_df[(point_df['serverName'] == 'Govind Nanda') & 
                                       (point_df['serveResult'] == 'Double Fault')].shape[0]

# Print the count
print(f"Occurrences where serverName is 'Govind Nanda' and serveResult is 'Double Fault': {govind_nanda_double_faults}")


Occurrences where serverName is 'Govind Nanda' and serveResult is 'Double Fault': 1


In [1336]:
import pandas as pd

# Assuming point_df is your DataFrame
# You can replace 'point_df' with the actual name of your DataFrame

# Filter rows where serverName is 'Govind Nanda' and serveResult is 'Double Fault'
govind_nanda_double_fault_rows = point_df[(point_df['serverName'] == 'Govind Nanda') & 
                                          (point_df['serveResult'] == 'Double Fault')]

# Display the selected rows
print("Rows where serverName is 'Govind Nanda' and serveResult is 'Double Fault':")
print(govind_nanda_double_fault_rows['pointWonBy'])


Rows where serverName is 'Govind Nanda' and serveResult is 'Double Fault':
190    Raphael Perot
Name: pointWonBy, dtype: object


In [1337]:
import pandas as pd

# Assuming point_df is your DataFrame
# You can replace 'point_df' with the actual name of your DataFrame

# Count occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In'
total_govind_nanda_serves = point_df[(point_df['serverName'] == 'Govind Nanda') & 
                                      (point_df['serveResult'] == '1st Serve In')].shape[0]

# Count occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is also 'Govind Nanda'
govind_nanda_wins_on_first_serve = point_df[(point_df['serverName'] == 'Govind Nanda')].shape[0]

# Print the counts
print(f"Occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In': {total_govind_nanda_serves}")
print(f"Occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is 'Govind Nanda': {govind_nanda_wins_on_first_serve}")


Occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In': 37
Occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is 'Govind Nanda': 55


In [1338]:
import pandas as pd

# Assuming point_df is your DataFrame
# You can replace 'point_df' with the actual name of your DataFrame

# Count occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In'
total_govind_nanda_serves = point_df[(point_df['serverName'] == 'Govind Nanda') & 
                                     (point_df['pointWonBy'] == 'Govind Nanda')].shape[0]

# Count occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is also 'Govind Nanda'
second_serves = point_df[(point_df['serverName'] == 'Govind Nanda') & 
                                      (point_df['serveResult'] == '2nd Serve In') | 
                                      (point_df['serveResult'] == 'Double Fault')].shape[0]
# Print the counts
print(f"Occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In': {total_govind_nanda_serves}")
print(f"Occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is 'Govind Nanda': {second_serves}")


Occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In': 38
Occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is 'Govind Nanda': 21


In [1339]:
import pandas as pd

# Assuming point_df is your DataFrame
# You can replace 'point_df' with the actual name of your DataFrame

# Count occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In'
points_won_serving = point_df[(point_df['serverName'] == 'Govind Nanda') & 
                                     (point_df['pointWonBy'] == 'Govind Nanda')].shape[0]

# Count occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is also 'Govind Nanda'
point_serving = point_df[(point_df['serverName'] == 'Govind Nanda')].shape[0]
# Print the counts
print(f"Occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In': {points_won_serving}")
print(f"Occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is 'Govind Nanda': {point_serving}")


Occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In': 38
Occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is 'Govind Nanda': 55


In [1197]:
import pandas as pd

# Assuming point_df is your DataFrame
# You can replace 'point_df' with the actual name of your DataFrame

# Count occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In'
points_won_serving = point_df[(point_df['returnerName'] == 'Govind Nanda') & 
                                     (point_df['pointWonBy'] == 'Govind Nanda')].shape[0]

# Count occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is also 'Govind Nanda'
point_serving = point_df[(point_df['returnerName'] == 'Govind Nanda')].shape[0]
# Print the counts
print(f"Occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In': {points_won_serving}")
print(f"Occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is 'Govind Nanda': {point_serving}")


Occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In': 32
Occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is 'Govind Nanda': 67


In [1040]:

govind_nanda_serves = point_df[(point_df['serverName'] == 'Govind Nanda') & 
                                      (point_df['serveResult'] == '2nd Serve In')].shape[0]

# Count occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is also 'Govind Nanda'
second_serves = point_df[(point_df['serverName'] == 'Govind Nanda') & 
                                      (point_df['serveResult'] == '2nd Serve In') | 
                                      (point_df['serveResult'] == 'Double Fault')].shape[0]
# Print the counts
print(f"Occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In': {total_govind_nanda_serves}")
print(f"Occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is 'Govind Nanda': {second_serves}")


Occurrences where serverName is 'Govind Nanda' and serveResult is '1st Serve In': 38
Occurrences where serverName is 'Govind Nanda', serveResult is '1st Serve In', and pointWinner is 'Govind Nanda': 23


In [1041]:
point_df.columns

Index(['pointNumber', 'side', 'serverName', 'returnerName', 'Name', 'Position',
       'pointEndPosition', 'Duration', 'serveResult', 'serveInPlacement',
       'firstServeZone', 'secondServeZone', 'returnDirection', 'returnFhBh',
       'rallyCount', 'errorType', 'returnError', 'lastShotDirection',
       'lastShotFhBh', 'point_winner', 'isBreakPoint', 'servingSide',
       'atNetName', 'setScore', 'setScoreSum', 'rallyCountFreq', 'isAce',
       'gameScore', 'pointScore', 'player1Set', 'player2Set', 'player1Game',
       'player2Game', 'player1Point', 'player2Point', 'gameNumber'],
      dtype='object')

In [1049]:
import pandas as pd

# Assuming point_df is your DataFrame
# You can replace 'point_df' with the actual name of your DataFrame

# Count occurrences where atNetName is 'Govind Nanda'
govind_nanda_at_net_count = point_df[point_df['atNetName'] == 'Govind Nanda'].shape[0]

# Count occurrences where atNetName is 'Govind Nanda'
govind_nanda_wins_at_net_count = point_df[(point_df['atNetName'] == 'Govind Nanda') &
                                                          (point_df['pointWonBy'] == 'Govind Nanda')].shape[0]

# Display the count
print(f"Count of points where atNetName is 'Govind Nanda': {govind_nanda_at_net_count}")
print(f"Count of points where 'Govind Nanda' wins at net: {govind_nanda_wins_at_net_count}")



Count of points where atNetName is 'Govind Nanda': 18
Count of points where 'Govind Nanda' wins at net: 14


# PRINT HERE

In [1042]:
# Save point_df to CSV file
point_df.to_csv('point_df_1-22_v3.csv', index=False)
