# Swing Vision Transformation

This notebook converts SwingVision data (excel) into UCLA Tennis Consulting format (csv)

Run all cells ONCE; restart Kernel and Run All again if needed

If there are any errors, go back to the swingvision excel file to fix.

# ---------------

### Setup

In [135]:
# If this is your first time running notebooks, uncomment the pip install commands:
# %pip install pandas
# %pip install numpy

import pandas as pd
import numpy as np
import os 
import re

# Option to display max rows/columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

### Load in Data

If you run into any errors reading the Google Sheets Link, go to the "How to Clean 101" guide FAQ to fix.

If you are cleaning by yourself, you could opt to download the excel file locally and read in.

In [136]:
# Option 1 - Individual Cleaning
# Input file name here
# your_file_name = 'Emon_VanLobenSels_Roger_PascualFerra.xlsx'
# swing_data = pd.read_excel(your_file_name, sheet_name='Shots')
# swing_data.shape

# Option 2 - Enables Collaboration on Cleaning [RECOMMENDED]
# Copy Google Sheets Link and replace the edit?usp=sharing with export?format=csv
your_file_name = 'https://docs.google.com/spreadsheets/d/1RiF4AXyyjeGRuUZXupHwp0v8GkXmhRZuC5C0jQsjumk/export?format=xlsx'
swing_data = pd.read_excel(your_file_name, sheet_name='Shots')
swing_data.shape

(466, 26)

## Cleaning

#### Drop all "Feed" Rows

In [137]:
swing_data.query('Stroke == "Feed"')

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Unnamed: 6,Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time
49,Arda Azkara,1,none,Feed,Flat,19.052607,12,13,3,1,deep,ad_alley,far,4.81295,23.757168,deep,deuce,near,4.059814,1.223002,0.494838,---,Out,False,00:36:57,407.200012
214,Arda Azkara,0,none,Feed,Flat,20.784662,37,38,7,1,short,ad,far,0.746238,14.549606,deep,deuce,near,0.960955,0.442007,0.735769,---,In,False,00:57:36,1646.400024
287,Arda Azkara,0,none,Feed,Flat,22.18947,60,61,12,2,short,ad,far,2.875797,17.845619,short,ad,far,2.007545,15.68469,1.388274,---,Net,False,01:12:40,2549.580078


In [138]:
rows_to_drop = swing_data.query('Stroke == "Feed"').index
swing_data = swing_data.drop(rows_to_drop)
swing_data = swing_data.reset_index(drop=True) # Important to reindex to avoid missing indicies
swing_data.shape

(463, 26)

#### Check Points with Unusual Serves

The "Type" column should identify first_serve and second_serve. 

There should only be 1 first_serve and 1 second_serve PER point (unless there is a let or some other issue -- in that case, delete the appropriate serve)

In [139]:
serve_counts = swing_data.groupby('Point')['Type'].value_counts().unstack(fill_value=0)
serve_counts['no_first_serve'] = serve_counts.get('first_serve', 0) == 0
no_first = serve_counts[serve_counts['no_first_serve']].index.tolist()
if len(no_first) > 0:
    raise ValueError(f"No First Serve Found: {no_first}")
else:
    print("Check Passed")

Check Passed


In [140]:
serve_counts['too_many_first_serves'] = serve_counts.get('first_serve', 0) > 1
too_many_first = serve_counts[serve_counts['too_many_first_serves']].index.tolist()
if len(too_many_first) > 0:
    raise ValueError(f"Multiple 1st Serves Found: {too_many_first}")
else:
    print("Check Passed")

Check Passed


In [141]:
serve_counts['too_many_second_serves'] = serve_counts.get('second_serve', 0) > 1
too_many_second = serve_counts[serve_counts['too_many_second_serves']].index.tolist()
if len(too_many_second) > 0:
    raise ValueError(f"Multiple 2nd Serves Found: {too_many_second}")
else:
    print("Check Passed")

Check Passed


#### Drop rows with "none" shots

In [142]:
none_data = swing_data.query('Type == "none"')
none_data.shape

(19, 26)

Check if there are points with MULTIPLE "none" shots. These are usually indicators of mistakes. 

If the shot was actually in play, make sure "Type" column is corrected to "in_play".

Make sure that each unique point has a unique Point Number!

In [143]:
points = []

def points_to_keep(none_data):
    unique_points = none_data['Point'].unique()
    for i in unique_points:
        current_point = none_data[none_data['Point'] == i]
        if len(current_point['Shot'].unique()) > 1:  # Check if more than one unique value
            points.append(int(i))  # Append the Point value itself
    return points

result = points_to_keep(none_data)

if len(points) > 0:
    raise ValueError(f'Point Numbers to Check: {points}')
else:
    print("Check Passed")

Check Passed


In [144]:
points = []

def points_to_drop(none_data):
    unique_points = none_data['Point'].unique()
    for i in unique_points:
        current_point = none_data[none_data['Point'] == i]
        if len(current_point['Shot'].unique()) == 1:  # Check if only one unique value
            points.extend(current_point.index.tolist())  # Append all indexes for this point
    return points

drop_index = points_to_drop(none_data)
print(drop_index)

[25, 53, 58, 119, 129, 139, 171, 177, 189, 202, 218, 262, 267, 286, 295, 323, 374, 434, 440]


In [145]:
swing_data = swing_data.drop(drop_index)
swing_data = swing_data.reset_index(drop=True) # Important to reindex to avoid missing indicies
swing_data.shape

(444, 26)

### Load in Points data

In [146]:
swing_data_points = pd.read_excel(your_file_name, sheet_name='Points')
swing_data_points.shape

(93, 15)

In [147]:
def create_point(server, player1score, player2score):
    if server == "host":
        return str(player1score) + "-" + str(player2score)
    else:
        return str(player2score) + "-" + str(player1score)
    
swing_data_points['pointScore'] = swing_data_points.apply(lambda x: create_point(x['Match Server'], x['Host Game Score'], x['Guest Game Score']), axis=1)

In [148]:
swing_data_points = swing_data_points.rename(columns={'Break Point' : 'isBreakPoint'})
swing_data_points['isBreakPoint'] = swing_data_points['isBreakPoint'].replace(False, '')
swing_data_points['isBreakPoint'] = swing_data_points['isBreakPoint'].replace(True, 1)

In [149]:
swing_data_points = swing_data_points[['Point', 'pointScore', 'isBreakPoint']]
swing_data_points.head()

Unnamed: 0,Point,pointScore,isBreakPoint
0,1,0-0,
1,2,15-0,
2,3,30-0,
3,4,40-0,
4,5,40-15,


In [150]:
swing_data = pd.merge(swing_data, swing_data_points, on='Point')

### Load in Games data

In [151]:
swing_data_games = pd.read_excel(your_file_name, sheet_name='Games')
swing_data_games.shape

(16, 9)

In [152]:
swing_data_games.head()

Unnamed: 0,Game,Set,Server,Host Set Score,Guest Set Score,Game Winner,Start Time,Video Time,Duration
0,1,1,host,0,0,host,00:30:13,2.97,89.519997
1,2,1,guest,1,0,guest,00:31:42,92.480003,238.600006
2,3,1,host,1,1,guest,00:35:41,331.079987,302.600006
3,4,1,guest,1,2,guest,00:40:44,633.679993,168.0
4,5,1,host,1,3,host,00:43:32,801.679993,390.279999


In [153]:
def create_game(player1game, player2game):
        return str(player1game) + "-" + str(player2game)

swing_data_games['gameScore'] = swing_data_games.apply(lambda x: create_game(x['Host Set Score'], x['Guest Set Score']),  axis=1)

In [154]:
swing_data_games = swing_data_games[['Game', 'gameScore']]

In [155]:
swing_data = pd.merge(swing_data, swing_data_games, on="Game")

### Load in Sets data

In [156]:
swing_data_sets = pd.read_excel(your_file_name, sheet_name='Sets')
swing_data_sets.shape

(2, 10)

In [157]:
host_set_score = 0
guest_set_score = 0

def create_set(set_winner):
        global host_set_score, guest_set_score  # Declare global variables
        if set_winner == "host":
                host_set_score += 1
        else:        
                guest_set_score += 1
        
        return str(host_set_score) + "-" + str(guest_set_score)

swing_data_sets['setScore'] = None
swing_data_sets.at[0, 'setScore'] = "0-0"
    
swing_data_sets.iloc[1:, swing_data_sets.columns.get_loc('setScore')] = swing_data_sets.iloc[1:].apply(lambda x: create_set(x['Set Winner']),  axis=1)


In [158]:
swing_data_sets = swing_data_sets[['Set', 'setScore']]

In [159]:
swing_data = pd.merge(swing_data, swing_data_sets, on="Set")

### Create Shot Data CSV

In [160]:
# Check existing columns
swing_data.columns

Index(['Player', 'Shot', 'Type', 'Stroke', 'Spin', 'Speed (MPH)', 'Unnamed: 6',
       'Point', 'Game', 'Set', 'Bounce Depth', 'Bounce Zone', 'Bounce Side',
       'Bounce (x)', 'Bounce (y)', 'Hit Depth', 'Hit Zone', 'Hit Side',
       'Hit (x)', 'Hit (y)', 'Hit (z)', 'Direction', 'Result', 'Favorited',
       'Start Time', 'Video Time', 'pointScore', 'isBreakPoint', 'gameScore',
       'setScore'],
      dtype='object')

In [161]:
swing_data.head()

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Unnamed: 6,Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time,pointScore,isBreakPoint,gameScore,setScore
0,Spencer Johnson,1,first_serve,Serve,Slice,81.565956,1,1,1,1,short,deuce,far,-0.140262,18.195896,deep,deuce,near,0.447903,0.34623,2.539594,down the T,In,False,00:30:11,0.97,0-0,,0-0,0-0
1,Arda Azkara,2,first_return,Forehand,Flat,27.105749,1,1,1,1,deep,ad,far,1.241675,20.501915,out,deuce,far,-0.776679,25.533381,1.123833,inside in,Net,False,00:30:12,1.73,0-0,,0-0,0-0
2,Spencer Johnson,1,first_serve,Serve,Slice,77.499817,2,2,1,1,short,ad,far,3.234928,16.858412,out,ad,near,-1.672043,-0.560185,2.712817,out wide,In,False,00:30:28,18.469999,15-0,,0-0,0-0
3,Arda Azkara,2,first_return,Forehand,Slice,30.874706,2,2,1,1,deep,ad,far,2.877553,18.732235,deep,ad_alley,far,5.129539,24.444994,1.264149,down the line,Net,False,00:30:29,19.370001,15-0,,0-0,0-0
4,Spencer Johnson,1,first_serve,Serve,Slice,108.353493,3,3,1,1,short,deuce,far,-2.785697,17.19445,deep,deuce,near,0.450073,0.073044,2.559511,out wide,In,False,00:30:44,34.330002,30-0,,0-0,0-0


In [162]:
# add in all desired column labels, with swingvision labels at end

columm_names = (['pointScore', 'gameScore', 'setScore',
                'isPointStart', 'pointStartTime', 'isPointEnd', 'pointEndTime','pointNumber',
                'isBreakPoint','shotInRally','side','serverName',
                'serverFarNear','firstServeIn','firstServeZone',
                'firstServeXCoord','firstServeYCoord',
                'secondServeIn','secondServeZone','secondServeXCoord',
                'secondServeYCoord','isAce','shotContactX',
                'shotContactY','shotDirection','shotFhBh',
                'isSlice','isVolley','isOverhead','isApproach','isDropshot', 'isLet',
                'isExcitingPoint','atNetPlayer1','atNetPlayer2','isLob',
                'shotLocationX','shotLocationY','isWinner','isErrorWideR', 'isErrorWideL',
                'isErrorNet','isErrorLong','clientTeam',
                'Date', 'Division', 'Event', 'lineupPosition','matchDetails',
                'matchVenue' , 'opponentTeam', 
                'player1Name', 'player2Name','player1Hand','player2Hand',
            'Round','Surface','Notes'])

shot_data = pd.DataFrame(columns=columm_names)
shot_data

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isLet,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes


##### Score Columns

In [163]:
shot_data['pointScore'] = swing_data['pointScore']
shot_data['gameScore'] = swing_data['gameScore']
shot_data['setScore'] = swing_data['setScore']

In [164]:
shot_data.head(10)

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isLet,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes
0,0-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,15-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,15-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,30-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,30-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,40-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,40-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,40-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,40-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


##### isPointStart and isPointEnd columns 

In [165]:
def assign_pointstart(x):
    if (x == 'first_serve') | (x == 'second_serve'):
        return 1
    
    return ''

shot_data['isPointStart'] = swing_data['Type'].apply(assign_pointstart)


index_list = []

for i in swing_data['Point'].unique().tolist():
    last_point_index = swing_data[swing_data['Point'] == i].index[-1]
    index_list.append(last_point_index)
    
shot_data.loc[index_list,'isPointEnd'] = 1
shot_data['isPointEnd'] = shot_data['isPointEnd'].fillna('')

##### pointStartTime and pointEndTime Columns

In [166]:
def convert_time(time):
    return int(time * 1000)

# def convert_time(time):
shot_data['pointStartTime'] = swing_data['Video Time'].apply(convert_time)

# Assigns last shot time to pointEndTime column
shot_data['pointEndTime'] = np.where(shot_data['isPointEnd'] == 1, shot_data['pointStartTime'], '')

##### pointNumber Column

In [167]:
shot_data['pointNumber'] = swing_data['Point']

##### isBreakPoint Column

In [168]:
shot_data['isBreakPoint'] = swing_data['isBreakPoint']

##### shotInRally column

In [169]:
shot_data.shotInRally = swing_data.Shot

##### side Column

In [170]:
def side(x, side, xcoord):
    if 'deuce' in x:
        return 'Deuce'
    elif 'ad' in x:
        return 'Ad'
    elif 'center_line' in x: # unique values include deuce, ad and center_line
        if (side == 'near') & (xcoord > 0):
            return 'Deuce'
        else:
            return 'Ad'
    else:
        return ''

shot_data['side'] = swing_data.apply(lambda x: side(x['Hit Zone'], x['Hit Side'], x['Bounce (x)']), axis = 1)

##### Players

In [171]:
# ucla roster 24-25 men and womens
ucla_roster_24_25 = ["Gianluca Ballotta", 
                   "Kaylan Bigun", 
                   "Cassius Chinlund",
                   "Andrei Crabel",
                   "Alexander Hoogmartens",
                   "Spencer Johnson",
                   "Rudy Quan",
                   "Giacomo Revelli",
                   "Aadarsh Tripathi",
                   "Emon van Loben Sels",
                   "Leo Von Bismark",
                   
                   "Olivia Center",
                   "Kate Fakih",
                   "Bianca Fernandez",
                   "Ahmani Guichard",
                   "Kimmi Hance",
                   "Mia Jovic",
                   "Anne-Christine Lutkemeyer",
                   "Elise Wagle"]

In [172]:
# list of names who are playing in match
players = swing_data['Player'].unique()

# checks which one is UCLA player
# Example: make comparison case-insensitive
is_ucla_player = [
    any(name.lower() in roster_name.lower() for roster_name in ucla_roster_24_25)
    for name in players
]

In [173]:
# assigns ucla player to player 1, and non ucla to player 2
shot_data.loc[0, "player1Name"] = players[is_ucla_player]
shot_data.loc[0, "player2Name"] = players[np.invert(is_ucla_player)]

##### serverName Column

In [174]:
def assign_server_name(stroke, server):
    if stroke != 'Serve':
        return ''
    
    if server.startswith(players[is_ucla_player][0]):
        return 'Player1'
    elif server.startswith(players[np.invert(is_ucla_player)][0]):
        return 'Player2'
    
shot_data['serverName'] = swing_data.apply(lambda x: assign_server_name(x['Stroke'], x['Player']), axis=1)
shot_data['serverName'].replace(['', 'na'], pd.NaT, inplace=True)
shot_data['serverName'] = shot_data['serverName'].ffill()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  shot_data['serverName'].replace(['', 'na'], pd.NaT, inplace=True)


##### serverFarNear Column

In [175]:
shot_data.serverFarNear = np.where((swing_data.Stroke == 'Serve'), np.where(swing_data['Hit Side'] == 'far', 'Far', 'Near'), '')
shot_data['serverFarNear'].replace(['', 'na'], pd.NaT, inplace=True)
shot_data['serverFarNear'] = shot_data['serverFarNear'].ffill()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  shot_data['serverFarNear'].replace(['', 'na'], pd.NaT, inplace=True)


##### firstServeIn and secondServeIn Columns

In [176]:
shot_data.firstServeIn = np.where((swing_data.Type == 'first_serve'),np.where((shot_data.isPointStart == 1) & (swing_data['Result'] == 'In'), 1, 0), np.nan)
shot_data.secondServeIn =np.where((swing_data.Type == 'second_serve') & (shot_data.isPointStart == 1), np.where(swing_data['Result'] == 'In', 1,0), np.nan)

##### SwingVision Coord Transformation
court coordinates

- swingvision - meters, near side center marks (0,0)
    - singles court x [-4.1148, 4.1148], y [0, 23.7744]
    - doubles court x [-5.485, 5.485]

- our coordinates - center of net (0,0)
    - singles court x [-157.5, 157.5], y [-455, 455]

- conversion:
    - shot_x = (157.5/4.1148) * swing_x
    - shot_y = (455/11.8872) * swing_y + 455
    - ratio = 38.2764654418

##### firstServeXCoord, firstYServeYCoord, secondServeXCoord, and secondServeyCoord Columns

In [177]:
def first_serve_x_coordinates(stroke, x):
    if stroke == 'first_serve':
        return x * 38.2764654418
    else:
        return np.nan

def first_serve_y_coordinates(stroke, y):
    if stroke == 'first_serve':
        return (y - 11.8872) * 38.2764654418
    else:
        return np.nan
    
shot_data['firstServeXCoord'] = swing_data.apply(lambda row: first_serve_x_coordinates(row['Type'], row['Bounce (x)']), axis=1)
shot_data['firstServeYCoord'] = swing_data.apply(lambda row: first_serve_y_coordinates(row['Type'], row['Bounce (y)']), axis=1)


def second_serve_x_coordinates(stroke, x):
    if stroke == 'second_serve':
        return x * 38.2764654418
    else:
        return np.nan

def second_serve_y_coordinates(stroke, y):
    if stroke == 'second_serve':
        return (y - 11.8872) * 38.2764654418
    else:
        return np.nan
    
shot_data['secondServeXCoord'] = swing_data.apply(lambda row: second_serve_x_coordinates(row['Type'], row['Bounce (x)']), axis=1)
shot_data['secondServeYCoord'] = swing_data.apply(lambda row: second_serve_y_coordinates(row['Type'], row['Bounce (y)']), axis=1)

##### firstServeZone and secondServeZone Columns
- serving zones: T, Body, Wide
- Wide: x in [-inf, -105] u [105, inf]
- Body: x in [-105, -52.5] u [52.5, 105]
- T: x in [-52.5, 52.5]

In [178]:
def label_zone(x_coord):
    if x_coord != '':
        x_coord = float(x_coord)
        if (x_coord < -105) | (x_coord > 105):
            return 'Wide'
        elif (-105 <= x_coord <= -52.5) | (52.5 <= x_coord <= 105):
            return 'Body'
        elif -52.5 < x_coord < 52.5:
            return 'T'
    return ''

# convert x coord to serve zone
shot_data.firstServeZone = shot_data.firstServeXCoord.apply(label_zone)
shot_data.secondServeZone = shot_data.secondServeXCoord.apply(label_zone)

##### isAce Column

In [179]:
shot_data['isAce'] = np.where((swing_data.Stroke == 'Serve') & 
                              (shot_data.isPointEnd == 1) & 
                              (shot_data.secondServeIn != 0), 1, np.nan)

##### shotContactX and shotContactY Columns

In [180]:
# Functions to transform the swingvision coordinates
def transf_x_coord_sv_to_shot(sv_col) :
    return sv_col * 38.2764654418
def transf_y_coord_sv_to_shot(sv_col) :
    return (sv_col - 11.8872) * 38.2764654418

# want to convert swingvision coordinates into our own
shot_data['shotContactX'] = transf_x_coord_sv_to_shot(swing_data['Hit (x)'])
shot_data['shotContactY'] = transf_y_coord_sv_to_shot(swing_data['Hit (y)'])

##### shotFhBh Column

In [181]:
def classify_shot(stroke):
    if stroke == 'FH Volley':
        return 'Forehand'
    elif stroke == 'BH Volley':
        return 'Backhand'
    elif stroke == 'Forehand':
        return 'Forehand'
    elif stroke == 'Backhand':
        return 'Backhand'
    elif stroke == 'Overhead':
        return 'Forehand'
    else:
        return ''

# Applying the function to the DataFrame
shot_data['shotFhBh'] = swing_data['Stroke'].apply(classify_shot)

##### isSlice, isTopspin, isFlat, isKick Columns

In [182]:
shot_data['isSlice'] = swing_data['Spin'].apply(lambda x: '1' if x == 'Slice' else '')
shot_data['isTopspin'] = swing_data['Spin'].apply(lambda x: '1' if x == 'Topspin' else '') # added these metrics
shot_data['isFlat'] = swing_data['Spin'].apply(lambda x: '1' if x == 'Flat' else '') # added these metrics
shot_data['isKick'] = swing_data['Spin'].apply(lambda x: '1' if x == 'Kick' else '') # added these metrics

##### isVolley Column

In [183]:
shot_data['isVolley'] = swing_data['Stroke'].apply(lambda x: 1 if x in ['FH Volley', 'BH Volley', 'Volley'] else '') # need to classify shotFhBh when doing isVolley

##### isOverhead Column

In [184]:
shot_data['isOverhead'] = swing_data['Stroke'].apply(lambda x: 1 if x == 'Overhead' else '')

##### shotLocationX and shotLocationY Columns

In [185]:
# Functions to transform the swingvision coordinates
def transf_x_loc(stroke, sv_col):
    if stroke != 'first_serve' and stroke != 'second_serve':
        return sv_col * 38.2764654418
    return np.nan
    
def transf_y_loc(stroke, sv_col):
    if stroke != 'first_serve' and stroke != 'second_serve':
        return (sv_col - 11.8872) * 38.2764654418
    return np.nan

# want to convert swingvision coordinates into our own
shot_data['shotLocationX'] = swing_data.apply(lambda x: transf_x_loc(x['Type'], x['Bounce (x)']), axis=1)
shot_data['shotLocationY'] = swing_data.apply(lambda x: transf_y_loc(x['Type'], x['Bounce (y)']), axis=1)

##### shotDirection column

In [186]:
# down the line --> switches btwn deuce and ad
# crosscourt --> remains on same side
shot_data['shotDirection'] = np.where((shot_data.shotContactX * shot_data.shotLocationX > 0) & (shot_data.shotInRally != 1), 
"Down the Line", 
    np.where((shot_data.shotInRally != 1), 'Crosscourt', ''))

##### isWinner Column

In [187]:
shot_data.isWinner = np.where((shot_data.isPointEnd == 1) & (shot_data.secondServeIn != '0') &
                              (swing_data.Result == 'In'), 1, np.nan)


##### isErrorWideR Column

In [188]:
def wide_right_function(side, x, y, end):
    if (side == 'far' and x < -157.5 and end == 1) or (side == 'near' and x > 157.5 and end == 1):
        return 1
    return np.nan

# Assign 'isErrorWideR' using values from both 'swing_data' and 'shot_data'
shot_data['isErrorWideR'] = shot_data.apply(lambda x: wide_right_function(swing_data.loc[x.name, 'Hit Side'], 
                                                                x['shotLocationX'], x['shotLocationY'], x['isPointEnd']), axis=1)

##### isErrorWideL Column

In [189]:
def wide_left_function(side, x, y, end):
    if (side == 'far' and x > 157.5 and end == 1) or (side == 'near' and x < -157.5 and end == 1):
        return 1
    return np.nan

# Assign 'isErrorWideR' using values from both 'swing_data' and 'shot_data'
shot_data['isErrorWideL'] = shot_data.apply(lambda x: wide_left_function(swing_data.loc[x.name, 'Hit Side'], 
                                                                x['shotLocationX'], x['shotLocationY'], x['isPointEnd']), axis=1)


##### isErrorNet Column

In [190]:
shot_data.isErrorNet = np.where((swing_data.Result == 'Net'), 1, np.nan)

##### isErrorLong Column

In [191]:
shot_data['isErrorLong'] = np.where((swing_data['Result'] == 'Out') & (shot_data['shotLocationY'].abs() > 455), 1, np.nan)

##### Group First Serve and Second Serve Columns

In [192]:
# All columns
default_cols = ['pointScore', 'gameScore', 'setScore', 'isPointStart', 'pointStartTime',
       'isPointEnd', 'pointEndTime', 'pointNumber', 'isBreakPoint',
       'shotInRally', 'side', 'serverName', 'serverFarNear', 'firstServeIn',
       'firstServeZone', 'firstServeXCoord', 'firstServeYCoord',
       'secondServeIn', 'secondServeZone', 'secondServeXCoord',
       'secondServeYCoord', 'isAce', 'shotContactX', 'shotContactY',
       'shotDirection', 'shotFhBh', 'isSlice', 'isVolley', 'isOverhead',
       'isApproach', 'isDropshot', 'isExcitingPoint', 'atNetPlayer1',
       'atNetPlayer2', 'isLob', 'shotLocationX', 'shotLocationY', 'isWinner',
       'isErrorWideR', 'isErrorWideL', 'isErrorNet', 'isErrorLong',
       'clientTeam', 'Date', 'Division', 'Event', 'lineupPosition',
       'matchDetails', 'matchVenue', 'opponentTeam', 'player1Name',
       'player2Name', 'player1Hand', 'player2Hand', 'Round', 'Surface',
       'Notes', 'isTopspin', 'isFlat', 'isKick']

# Assign all columns to have value be taken from the first serve row
agg_dict = {col: 'first' for col in default_cols}

# Reassign select columns to have value be taken form the second serve row
agg_dict.update({'isPointEnd': 'last', 
                 'pointEndTime': 'last', 
                 'secondServeIn' : 'last',
                 'secondServeZone' : 'last',
                 'secondServeXCoord' : 'last', 
                 'secondServeYCoord' : 'last',
                 'isAce' : 'last', 
                 'shotContactX' : 'last', 
                 'shotContactY' : 'last',
                 'isWinner' : 'last',
                 'isErrorWideR' : 'last',
                 'isErrorWideL' : 'last', 
                 'isErrorNet' : 'last',
                 'isErrorLong' : 'last'
                })

# Group by isPointStart and pointNumber
grouped_df = shot_data.groupby(['shotInRally', 'pointNumber'], as_index=False).agg(agg_dict)
shot_data = grouped_df.sort_values(by=['pointNumber', 'shotInRally'], ascending=[True, True]).reset_index(drop = True)

### To Dos:

- isApproach column
- isDropshot column
- isExcitingPoint column
- atNetPlayer1, atNetPlayer2 column
- isLob column



### Save as CSV

In [193]:
player1NameNoSpace = str(shot_data.iloc[0]['player1Name']).replace(" ", "")
player2NameNoSpace = str(shot_data.iloc[0]['player2Name']).replace(" ", "")

shot_data.to_csv(f'swingvision_{player1NameNoSpace}_{player2NameNoSpace}.csv', index=False)
print(f'swingvision_{player1NameNoSpace}_{player2NameNoSpace}.csv')

swingvision_SpencerJohnson_ArdaAzkara.csv


End