# Swing Vision Transformation
#### Converting SwingVision data into UCLA Tennis Consulting format

## Notebook Start

In [1]:
import pandas as pd
import numpy as np
import os 
import re

# Option to display max rows/columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

### Load in data

In [2]:
# Input file name here
your_file_name = 'aadarsh_allamerican.xlsx'
swing_data = pd.read_excel(your_file_name, sheet_name='Shots')
swing_data.shape

(982, 25)

In [3]:
swing_data.head()

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time
0,Luca Staeheli,1,first_serve,Serve,Slice,118.40052,1,1,1,short,ad,far,0.37887,16.762121,out,deuce,near,1.135853,-0.554549,2.673766,down the T,Out,False,21:27:07,17.92
1,Aadarsh Tripathi,1,none,Backhand,Slice,53.254807,1,1,1,deep,deuce,far,-0.750807,20.385843,out,deuce,far,-0.694103,26.614202,1.223969,inside out,Net,False,21:27:08,18.969999
2,Luca Staeheli,1,second_serve,Serve,Slice,76.265121,1,1,1,short,deuce,far,-0.73195,16.227081,deep,deuce,near,0.803059,-0.379706,2.749103,down the T,In,False,21:27:17,27.41
3,Aadarsh Tripathi,2,second_return,Backhand,Topspin,54.827156,1,1,1,short,ad,near,-0.973862,7.295816,deep,deuce,far,-1.363457,24.678232,1.643152,inside in,In,False,21:27:18,28.32
4,Luca Staeheli,3,serve_plus_one,Backhand,Topspin,60.095497,1,1,1,deep,ad,far,2.04651,20.453445,deep,ad,near,-0.583427,0.621297,0.803284,cross court,In,False,21:27:19,29.57


### Clean swing vision data

In [4]:
swing_data.query('Stroke == "Feed"').head()

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time
12,Aadarsh Tripathi,0,none,Feed,Slice,19.918634,2,1,1,short,ad,near,-0.430203,5.804375,deep,deuce,far,-0.957253,21.578362,2.052984,---,In,False,21:27:38,48.709999
24,Aadarsh Tripathi,0,none,Feed,Flat,23.382744,5,1,1,deep,ad,near,-0.044347,3.948372,deep,deuce,far,-3.760015,21.839136,0.972103,---,In,False,21:29:00,130.149994
35,Luca Staeheli,0,none,Feed,Flat,24.871349,7,2,1,short,deuce,far,-1.670223,13.518161,out,deuce,far,-2.310343,25.097612,1.392078,---,Net,False,21:30:59,249.460007
50,Luca Staeheli,0,none,Feed,Flat,19.052607,9,2,1,deep,deuce,near,0.584094,4.607936,short,ad,far,2.249055,18.12291,3.073324,---,In,False,21:32:19,329.100006
81,Luca Staeheli,0,none,Feed,Flat,24.438272,12,2,1,short,ad,far,0.324774,15.704046,deep,deuce,far,-0.530713,23.145393,1.785928,---,Net,False,21:34:35,465.709991


In [5]:
swing_data.query('Shot == 0').head()

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time
12,Aadarsh Tripathi,0,none,Feed,Slice,19.918634,2,1,1,short,ad,near,-0.430203,5.804375,deep,deuce,far,-0.957253,21.578362,2.052984,---,In,False,21:27:38,48.709999
24,Aadarsh Tripathi,0,none,Feed,Flat,23.382744,5,1,1,deep,ad,near,-0.044347,3.948372,deep,deuce,far,-3.760015,21.839136,0.972103,---,In,False,21:29:00,130.149994
35,Luca Staeheli,0,none,Feed,Flat,24.871349,7,2,1,short,deuce,far,-1.670223,13.518161,out,deuce,far,-2.310343,25.097612,1.392078,---,Net,False,21:30:59,249.460007
50,Luca Staeheli,0,none,Feed,Flat,19.052607,9,2,1,deep,deuce,near,0.584094,4.607936,short,ad,far,2.249055,18.12291,3.073324,---,In,False,21:32:19,329.100006
81,Luca Staeheli,0,none,Feed,Flat,24.438272,12,2,1,short,ad,far,0.324774,15.704046,deep,deuce,far,-0.530713,23.145393,1.785928,---,Net,False,21:34:35,465.709991


In [6]:
swing_data.query('Type == "none"').head()

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time
1,Aadarsh Tripathi,1,none,Backhand,Slice,53.254807,1,1,1,deep,deuce,far,-0.750807,20.385843,out,deuce,far,-0.694103,26.614202,1.223969,inside out,Net,False,21:27:08,18.969999
12,Aadarsh Tripathi,0,none,Feed,Slice,19.918634,2,1,1,short,ad,near,-0.430203,5.804375,deep,deuce,far,-0.957253,21.578362,2.052984,---,In,False,21:27:38,48.709999
24,Aadarsh Tripathi,0,none,Feed,Flat,23.382744,5,1,1,deep,ad,near,-0.044347,3.948372,deep,deuce,far,-3.760015,21.839136,0.972103,---,In,False,21:29:00,130.149994
35,Luca Staeheli,0,none,Feed,Flat,24.871349,7,2,1,short,deuce,far,-1.670223,13.518161,out,deuce,far,-2.310343,25.097612,1.392078,---,Net,False,21:30:59,249.460007
50,Luca Staeheli,0,none,Feed,Flat,19.052607,9,2,1,deep,deuce,near,0.584094,4.607936,short,ad,far,2.249055,18.12291,3.073324,---,In,False,21:32:19,329.100006


#### Remove all the rows where it is a feed

In [7]:
rows_to_drop = swing_data.query('Type == "none"').index
swing_data = swing_data.drop(rows_to_drop)
swing_data = swing_data.reset_index(drop=True) # Important to reindex to avoid missing indicies
swing_data.shape

(847, 25)

In [8]:
swing_data.head()

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time
0,Luca Staeheli,1,first_serve,Serve,Slice,118.40052,1,1,1,short,ad,far,0.37887,16.762121,out,deuce,near,1.135853,-0.554549,2.673766,down the T,Out,False,21:27:07,17.92
1,Luca Staeheli,1,second_serve,Serve,Slice,76.265121,1,1,1,short,deuce,far,-0.73195,16.227081,deep,deuce,near,0.803059,-0.379706,2.749103,down the T,In,False,21:27:17,27.41
2,Aadarsh Tripathi,2,second_return,Backhand,Topspin,54.827156,1,1,1,short,ad,near,-0.973862,7.295816,deep,deuce,far,-1.363457,24.678232,1.643152,inside in,In,False,21:27:18,28.32
3,Luca Staeheli,3,serve_plus_one,Backhand,Topspin,60.095497,1,1,1,deep,ad,far,2.04651,20.453445,deep,ad,near,-0.583427,0.621297,0.803284,cross court,In,False,21:27:19,29.57
4,Aadarsh Tripathi,4,return_plus_one,Backhand,Topspin,57.061077,1,1,1,deep,deuce,near,0.761086,1.642498,out,ad,far,2.400083,25.378126,1.205338,down the line,In,False,21:27:20,30.76


### Load in Points data

In [9]:
swing_data_points = pd.read_excel(your_file_name, sheet_name='Points')
swing_data_points.shape

(147, 15)

In [10]:
def create_point(server, player1score, player2score):
    if server == "host":
        return str(player1score) + "-" + str(player2score)
    else:
        return str(player2score) + "-" + str(player1score)
    
swing_data_points['pointScore'] = swing_data_points.apply(lambda x: create_point(x['Match Server'], x['Host Game Score'], x['Guest Game Score']), axis=1)

In [11]:
swing_data_points = swing_data_points.rename(columns={'Break Point' : 'isBreakPoint'})
swing_data_points['isBreakPoint'] = swing_data_points['isBreakPoint'].replace(False, '')
swing_data_points['isBreakPoint'] = swing_data_points['isBreakPoint'].replace(True, 1)

In [12]:
swing_data_points = swing_data_points[['Point', 'pointScore', 'isBreakPoint']]
swing_data_points.head()

Unnamed: 0,Point,pointScore,isBreakPoint
0,1,0-0,
1,2,15-0,
2,3,15-15,
3,4,30-15,
4,5,40-15,


In [13]:
swing_data = pd.merge(swing_data, swing_data_points, on='Point')

### Load in Games data

In [14]:
swing_data_games = pd.read_excel(your_file_name, sheet_name='Games')
swing_data_games.shape

(27, 9)

In [15]:
swing_data_games.head()

Unnamed: 0,Game,Set,Server,Host Set Score,Guest Set Score,Game Winner,Start Time,Video Time,Duration
0,1,1,guest,0,0,guest,21:27:09,19.92,163.259995
1,2,1,host,0,1,guest,21:29:53,183.179993,319.920013
2,3,1,guest,0,2,guest,21:35:13,503.109985,158.669998
3,4,1,host,0,3,host,21:37:51,661.77002,223.649994
4,5,1,guest,1,3,guest,21:41:35,885.419983,220.889999


In [16]:
def create_game(player1game, player2game):
        return str(player1game) + "-" + str(player2game)

    
swing_data_games['gameScore'] = swing_data_games.apply(lambda x: create_game(x['Host Set Score'], x['Guest Set Score']),  axis=1)

In [17]:
swing_data_games

Unnamed: 0,Game,Set,Server,Host Set Score,Guest Set Score,Game Winner,Start Time,Video Time,Duration,gameScore
0,1,1,guest,0,0,guest,21:27:09,19.92,163.259995,0-0
1,2,1,host,0,1,guest,21:29:53,183.179993,319.920013,0-1
2,3,1,guest,0,2,guest,21:35:13,503.109985,158.669998,0-2
3,4,1,host,0,3,host,21:37:51,661.77002,223.649994,0-3
4,5,1,guest,1,3,guest,21:41:35,885.419983,220.889999,1-3
5,6,1,host,1,4,host,21:45:16,1106.310059,243.630005,1-4
6,7,1,guest,2,4,guest,21:49:19,1349.930054,469.589996,2-4
7,8,1,host,2,5,guest,21:57:09,1819.530029,271.0,2-5
8,9,2,guest,0,0,guest,22:01:40,2090.530029,326.980011,0-0
9,10,2,host,0,1,host,22:07:07,2417.51001,208.199997,0-1


In [18]:
swing_data_games = swing_data_games[['Game', 'gameScore']]

In [19]:
swing_data = pd.merge(swing_data, swing_data_games, on="Game")

In [20]:
swing_data.head()

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time,pointScore,isBreakPoint,gameScore
0,Luca Staeheli,1,first_serve,Serve,Slice,118.40052,1,1,1,short,ad,far,0.37887,16.762121,out,deuce,near,1.135853,-0.554549,2.673766,down the T,Out,False,21:27:07,17.92,0-0,,0-0
1,Luca Staeheli,1,second_serve,Serve,Slice,76.265121,1,1,1,short,deuce,far,-0.73195,16.227081,deep,deuce,near,0.803059,-0.379706,2.749103,down the T,In,False,21:27:17,27.41,0-0,,0-0
2,Aadarsh Tripathi,2,second_return,Backhand,Topspin,54.827156,1,1,1,short,ad,near,-0.973862,7.295816,deep,deuce,far,-1.363457,24.678232,1.643152,inside in,In,False,21:27:18,28.32,0-0,,0-0
3,Luca Staeheli,3,serve_plus_one,Backhand,Topspin,60.095497,1,1,1,deep,ad,far,2.04651,20.453445,deep,ad,near,-0.583427,0.621297,0.803284,cross court,In,False,21:27:19,29.57,0-0,,0-0
4,Aadarsh Tripathi,4,return_plus_one,Backhand,Topspin,57.061077,1,1,1,deep,deuce,near,0.761086,1.642498,out,ad,far,2.400083,25.378126,1.205338,down the line,In,False,21:27:20,30.76,0-0,,0-0


### Load in Sets data

In [21]:
swing_data_sets = pd.read_excel(your_file_name, sheet_name='Sets')
swing_data_sets.shape

(3, 10)

In [22]:
swing_data_sets

Unnamed: 0,Set,Host Score,Guest Score,Host Tiebreak Score,Guest Tiebreak Score,Set Winner,Super Tiebreak,Start Time,Video Time,Duration
0,1,2,6,0,0,guest,False,21:27:09,19.92,2070.600098
1,2,6,3,0,0,host,False,22:01:40,2090.530029,2334.280029
2,3,4,6,0,0,guest,False,22:40:34,4424.810059,2722.959961


In [23]:
host_set_score = 0
guest_set_score = 0

def create_set(set_winner):
        global host_set_score, guest_set_score  # Declare global variables
        if set_winner == "host":
                host_set_score += 1
        else:        
                guest_set_score += 1
        
        return str(host_set_score) + "-" + str(guest_set_score)

swing_data_sets['setScore'] = None
swing_data_sets.at[0, 'setScore'] = "0-0"
    
swing_data_sets.iloc[1:, swing_data_sets.columns.get_loc('setScore')] = swing_data_sets.iloc[1:].apply(lambda x: create_set(x['Set Winner']),  axis=1)


In [24]:
swing_data_sets = swing_data_sets[['Set', 'setScore']]


In [25]:
swing_data = pd.merge(swing_data, swing_data_sets, on="Set")
swing_data.head()

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time,pointScore,isBreakPoint,gameScore,setScore
0,Luca Staeheli,1,first_serve,Serve,Slice,118.40052,1,1,1,short,ad,far,0.37887,16.762121,out,deuce,near,1.135853,-0.554549,2.673766,down the T,Out,False,21:27:07,17.92,0-0,,0-0,0-0
1,Luca Staeheli,1,second_serve,Serve,Slice,76.265121,1,1,1,short,deuce,far,-0.73195,16.227081,deep,deuce,near,0.803059,-0.379706,2.749103,down the T,In,False,21:27:17,27.41,0-0,,0-0,0-0
2,Aadarsh Tripathi,2,second_return,Backhand,Topspin,54.827156,1,1,1,short,ad,near,-0.973862,7.295816,deep,deuce,far,-1.363457,24.678232,1.643152,inside in,In,False,21:27:18,28.32,0-0,,0-0,0-0
3,Luca Staeheli,3,serve_plus_one,Backhand,Topspin,60.095497,1,1,1,deep,ad,far,2.04651,20.453445,deep,ad,near,-0.583427,0.621297,0.803284,cross court,In,False,21:27:19,29.57,0-0,,0-0,0-0
4,Aadarsh Tripathi,4,return_plus_one,Backhand,Topspin,57.061077,1,1,1,deep,deuce,near,0.761086,1.642498,out,ad,far,2.400083,25.378126,1.205338,down the line,In,False,21:27:20,30.76,0-0,,0-0,0-0


### Create shot data csv

In [26]:
# Check existing columns
swing_data.columns

Index(['Player', 'Shot', 'Type', 'Stroke', 'Spin', 'Speed (MPH)', 'Point',
       'Game', 'Set', 'Bounce Depth', 'Bounce Zone', 'Bounce Side',
       'Bounce (x)', 'Bounce (y)', 'Hit Depth', 'Hit Zone', 'Hit Side',
       'Hit (x)', 'Hit (y)', 'Hit (z)', 'Direction', 'Result', 'Favorited',
       'Start Time', 'Video Time', 'pointScore', 'isBreakPoint', 'gameScore',
       'setScore'],
      dtype='object')

In [27]:
swing_data.head()

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time,pointScore,isBreakPoint,gameScore,setScore
0,Luca Staeheli,1,first_serve,Serve,Slice,118.40052,1,1,1,short,ad,far,0.37887,16.762121,out,deuce,near,1.135853,-0.554549,2.673766,down the T,Out,False,21:27:07,17.92,0-0,,0-0,0-0
1,Luca Staeheli,1,second_serve,Serve,Slice,76.265121,1,1,1,short,deuce,far,-0.73195,16.227081,deep,deuce,near,0.803059,-0.379706,2.749103,down the T,In,False,21:27:17,27.41,0-0,,0-0,0-0
2,Aadarsh Tripathi,2,second_return,Backhand,Topspin,54.827156,1,1,1,short,ad,near,-0.973862,7.295816,deep,deuce,far,-1.363457,24.678232,1.643152,inside in,In,False,21:27:18,28.32,0-0,,0-0,0-0
3,Luca Staeheli,3,serve_plus_one,Backhand,Topspin,60.095497,1,1,1,deep,ad,far,2.04651,20.453445,deep,ad,near,-0.583427,0.621297,0.803284,cross court,In,False,21:27:19,29.57,0-0,,0-0,0-0
4,Aadarsh Tripathi,4,return_plus_one,Backhand,Topspin,57.061077,1,1,1,deep,deuce,near,0.761086,1.642498,out,ad,far,2.400083,25.378126,1.205338,down the line,In,False,21:27:20,30.76,0-0,,0-0,0-0


In [28]:
# add in all desired column labels, with swingvision labels at end

columm_names = (['pointScore', 'gameScore', 'setScore', 'isPointStart', 
                                         'pointStartTime', 'isPointEnd', 'pointEndTime','pointNumber',
                                         'isBreakPoint','shotInRally','side','serverName',
                                         'serverFarNear','firstServeIn','firstServeZone',
                                         'firstServeXCoord','firstServeYCoord',
                                         'secondServeIn','secondServeZone','secondServeXCoord',
                                         'secondServeYCoord','isAce','shotContactX',
                                         'shotContactY','shotDirection','shotFhBh',
                                         'isSlice','isVolley','isOverhead','isApproach','isDropshot',
                                         'isExcitingPoint','atNetPlayer1','atNetPlayer2','isLob',
                                         'shotLocationX','shotLocationY','isWinner','isErrorWideR', 'isErrorWideL',
                                         'isErrorNet','isErrorLong','clientTeam',
                                         'Date', 'Division', 'Event', 'lineupPosition','matchDetails',
                                         'matchVenue' , 'opponentTeam', 
                                         'player1Name', 'player2Name','player1Hand','player2Hand',
                                        'Round','Surface','Notes'])

shot_data = pd.DataFrame(columns=columm_names)
shot_data

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes


### Score Columns

In [29]:
shot_data['pointScore'] = swing_data['pointScore']
shot_data['gameScore'] = swing_data['gameScore']
shot_data['setScore'] = swing_data['setScore']

In [30]:
shot_data.head(10)

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes
0,0-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,0-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,0-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,0-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,0-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,0-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### isPointStart and isPointEnd columns 

In [31]:
def assign_pointstart(x):
    if (x == 'first_serve') | (x == 'second_serve'):
        return 1
    
    return ''

shot_data['isPointStart'] = swing_data['Type'].apply(assign_pointstart)


index_list = []

for i in swing_data['Point'].unique().tolist():
    last_point_index = swing_data[swing_data['Point'] == i].index[-1]
    index_list.append(last_point_index)
    
shot_data.loc[index_list,'isPointEnd'] = 1
shot_data['isPointEnd'] = shot_data['isPointEnd'].fillna('')

### pointStartTime and pointEndTime Columns

In [32]:
# Grab the initial time
match_start_time = re.split(":", swing_data["Start Time"][0])

for i in range(len(swing_data)): # converting into milliseconds
    time = re.split(":", swing_data["Start Time"][i])
    shot_data.loc[i,'pointStartTime'] = ((int(time[0]) - int(match_start_time[0])) *  3600000
                                       + (int(time[1]) - int(match_start_time[1])) *  60000
                                       + (int(time[2]) - int(match_start_time[2])) *  1000)
    
# Assigns last shot time to pointEndTime column
shot_data['pointEndTime'] = np.where(shot_data['isPointEnd'] == 1, shot_data['pointStartTime'], '')

### pointNumber Column

In [33]:
shot_data['pointNumber'] = swing_data['Point']

### isBreakPoint Column

In [34]:
shot_data['isBreakPoint'] = swing_data['isBreakPoint']

### shotInRally column

In [35]:
shot_data.shotInRally = swing_data.Shot

### side Column

In [36]:
def side(x, side, xcoord):
    if 'deuce' in x:
        return 'Deuce'
    elif 'ad' in x:
        return 'Ad'
    elif 'center_line' in x: # unique values include deuce, ad and center_line
        if (side == 'near') & (xcoord > 0):
            return 'Deuce'
        else:
            return 'Ad'
    else:
        return ''

shot_data['side'] = swing_data.apply(lambda x: side(x['Hit Zone'], x['Hit Side'], x['Bounce (x)']), axis = 1)

### Players

In [37]:
# ucla roster 23-24 men and womens
ucla_roster_23 = ["Gianluca Ballotta", 
                   "Jeffrey Fradkin", 
                   "Alexander Hoogmartens",
                   "Spencer Johnson",
                   "Stefan Leustian",
                   "Timothy Li",
                   "Govind Nanda",
                   "Jorge Plans Gonzalez",
                   "Giacomo Revelli",
                   "Aadarsh Tripathi",
                   "Emon van Loben Sels",
                   "Azuma Visaya",
                   "Rudy Quan",
                   "Leo Von Bismark",
                   
                   "Tian Fangran",
                   "Bianca Fernandez",
                   "Ahmani Guichard",
                   "Kimmi Hance",
                   "Mia Jovic",
                   "Anne-Christine Lutkemeyer",
                   "Vanessa Ong",
                   "Sasha Vagramov",
                   "Elise Wagle"]

In [38]:
# list of names who are playing in match
players = swing_data['Player'].unique()

# checks which one is UCLA player
is_ucla_player = [any([name in roster_name for roster_name in ucla_roster_23]) for name in players]

In [39]:
# assigns ucla player to player 1, and non ucla to player 2
shot_data.loc[0, "player1Name"] = players[is_ucla_player]
shot_data.loc[0, "player2Name"] = players[np.invert(is_ucla_player)]

### serverName Column

In [40]:
def assign_server_name(stroke, server):
    if stroke != 'Serve':
        return ''
    
    if server.startswith(players[is_ucla_player][0]):
        return 'Player1'
    elif server.startswith(players[np.invert(is_ucla_player)][0]):
        return 'Player2'
    
shot_data['serverName'] = swing_data.apply(lambda x: assign_server_name(x['Stroke'], x['Player']), axis=1)
shot_data['serverName'].replace(['', 'na'], pd.NaT, inplace=True)
shot_data['serverName'] = shot_data['serverName'].ffill()

### serverFarNear Column

In [41]:
shot_data.serverFarNear = np.where((swing_data.Stroke == 'Serve'), np.where(swing_data['Hit Side'] == 'far', 'Far', 'Near'), '')
shot_data['serverFarNear'].replace(['', 'na'], pd.NaT, inplace=True)
shot_data['serverFarNear'] = shot_data['serverFarNear'].ffill()

### firstServeIn and secondServeIn Columns

In [42]:
shot_data.firstServeIn = np.where((swing_data.Type == 'first_serve'),np.where((shot_data.isPointStart == 1) & (swing_data['Result'] == 'In'), 1, 0), np.nan)
shot_data.secondServeIn =np.where((swing_data.Type == 'second_serve') & (shot_data.isPointStart == 1), np.where(swing_data['Result'] == 'In', 1,0), np.nan)

### SwingVision Coord Transformation
court coordinates
swing vision - meters, near side center marks (0,0)
singles court x [-4.1148, 4.1148], y [0, 23.7744]
doubles court x [-5.485, 5.485]

our coordinates - center of net (0,0)
singles court x [-157.5, 157.5], y [-455, 455]

shot_x = (157.5/4.1148) * swing_x
shot_y = (455/11.8872) * swing_y + 455
ratio = 38.2764654418

### firstServeXCoord, firstYServeYCoord, secondServeXCoord, and secondServeyCoord Columns

In [43]:
def first_serve_x_coordinates(stroke, x):
    if stroke == 'first_serve':
        return x * 38.2764654418
    else:
        return np.nan

def first_serve_y_coordinates(stroke, y):
    if stroke == 'first_serve':
        return (y - 11.8872) * 38.2764654418
    else:
        return np.nan
    
shot_data['firstServeXCoord'] = swing_data.apply(lambda row: first_serve_x_coordinates(row['Type'], row['Bounce (x)']), axis=1)
shot_data['firstServeYCoord'] = swing_data.apply(lambda row: first_serve_y_coordinates(row['Type'], row['Bounce (y)']), axis=1)


def second_serve_x_coordinates(stroke, x):
    if stroke == 'second_serve':
        return x * 38.2764654418
    else:
        return np.nan

def second_serve_y_coordinates(stroke, y):
    if stroke == 'second_serve':
        return (y - 11.8872) * 38.2764654418
    else:
        return np.nan
    
shot_data['secondServeXCoord'] = swing_data.apply(lambda row: second_serve_x_coordinates(row['Type'], row['Bounce (x)']), axis=1)
shot_data['secondServeYCoord'] = swing_data.apply(lambda row: second_serve_y_coordinates(row['Type'], row['Bounce (y)']), axis=1)

### firstServeZone and secondServeZone Columns
- serving zones: T, Body, Wide
- Wide: x in [-inf, -105] u [105, inf]
- Body: x in [-105, -52.5] u [52.5, 105]
- T: x in [-52.5, 52.5]

In [44]:
def label_zone(x_coord):
    if x_coord != '':
        x_coord = float(x_coord)
        if (x_coord < -105) | (x_coord > 105):
            return 'Wide'
        elif (-105 <= x_coord <= -52.5) | (52.5 <= x_coord <= 105):
            return 'Body'
        elif -52.5 < x_coord < 52.5:
            return 'T'
    return ''

# convert x coord to serve zone
shot_data.firstServeZone = shot_data.firstServeXCoord.apply(label_zone)
shot_data.secondServeZone = shot_data.secondServeXCoord.apply(label_zone)

In [45]:
shot_data.head()

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes
0,0-0,0-0,0-0,1.0,0,,,1,,1,Deuce,Player2,Near,0.0,T,14.501804,186.594745,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Aadarsh Tripathi,Luca Staeheli,,,,,
1,0-0,0-0,0-0,1.0,10000,,,1,,1,Deuce,Player2,Near,,,,,1.0,T,-28.016459,166.115305,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0-0,0-0,0-0,,11000,,,1,,2,Deuce,Player2,Near,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0-0,0-0,0-0,,12000,,,1,,3,Ad,Player2,Near,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0-0,0-0,0-0,,13000,,,1,,4,Ad,Player2,Near,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### isAce Column
- WARNING: Not accurate
- FIX: counts double faults as aces

In [46]:
shot_data['isAce'] = np.where((swing_data.Stroke == 'Serve') & 
                              (shot_data.isPointEnd == 1) & 
                              (shot_data.secondServeIn != 0), 1, np.nan)

### shotContactX and shotContactY Columns

In [47]:
# Functions to transform the swingvision coordinates
def transf_x_coord_sv_to_shot(sv_col) :
    return sv_col * 38.2764654418
def transf_y_coord_sv_to_shot(sv_col) :
    return (sv_col - 11.8872) * 38.2764654418

# want to convert swingvision coordinates into our own
shot_data['shotContactX'] = transf_x_coord_sv_to_shot(swing_data['Hit (x)'])
shot_data['shotContactY'] = transf_y_coord_sv_to_shot(swing_data['Hit (y)'])

### shotFhBh Column

In [48]:
def classify_shot(stroke):
    if stroke == 'FH Volley':
        return 'Forehand'
    elif stroke == 'BH Volley':
        return 'Backhand'
    elif stroke == 'Forehand':
        return 'Forehand'
    elif stroke == 'Backhand':
        return 'Backhand'
    elif stroke == 'Overhead':
        return 'Forehand'
    else:
        return ''

# Applying the function to the DataFrame
shot_data['shotFhBh'] = swing_data['Stroke'].apply(classify_shot)

### isSlice, isTopspin, isFlat, isKick Columns

In [49]:
shot_data['isSlice'] = swing_data['Spin'].apply(lambda x: '1' if x == 'Slice' else '')
shot_data['isTopspin'] = swing_data['Spin'].apply(lambda x: '1' if x == 'Topspin' else '') # added these metrics
shot_data['isFlat'] = swing_data['Spin'].apply(lambda x: '1' if x == 'Flat' else '') # added these metrics
shot_data['isKick'] = swing_data['Spin'].apply(lambda x: '1' if x == 'Kick' else '') # added these metrics

### isVolley Column
- Volleys are inaccurate

In [50]:
shot_data['isVolley'] = swing_data['Stroke'].apply(lambda x: 1 if x in ['FH Volley', 'BH Volley'] else '') # need to classify shotFhBh when doing isVolley

### isOverhead Column
- Overheads are inaccurate

In [51]:
shot_data['isOverhead'] = swing_data['Stroke'].apply(lambda x: 1 if x == 'Overhead' else '')

### isApproach Column

In [52]:
# maybe run model to predict 

# features to consider:
# player is inside the court

# Workflow:
# watch all points and tag all points that have _____
# subset df with points (testing x and y)


### isDropshot Column

In [53]:
# maybe run model to predict
# features to consider: 
# shotlocationY if close to the net
# shotContactY is close to the net
# speed of the ball (in swingvision data)

### isExcitingPoint

In [54]:
# maybe run model to predict
# features to consider:
# rally length is long (maybe take _% percintile of rallies)
# point ends in a winner
# either player moves a lot
# amount of volleys, overheads
# breakpoint factor

### atNetPlayer1 and atNetPlayer2 Columns

In [55]:
# aggregated in STP

### isLob Column

In [56]:
# maybe run model to predict
# features to consider:
# opponent is at the net
# speed of the ball (in swingvision data)

### shotLocationX and shotLocationY Columns

In [57]:
# Functions to transform the swingvision coordinates
def transf_x_loc(stroke, sv_col):
    if stroke != 'first_serve' and stroke != 'second_serve':
        return sv_col * 38.2764654418
    return np.nan
    
def transf_y_loc(stroke, sv_col):
    if stroke != 'first_serve' and stroke != 'second_serve':
        return (sv_col - 11.8872) * 38.2764654418
    return np.nan

# want to convert swingvision coordinates into our own
shot_data['shotLocationX'] = swing_data.apply(lambda x: transf_x_loc(x['Type'], x['Bounce (x)']), axis=1)
shot_data['shotLocationY'] = swing_data.apply(lambda x: transf_y_loc(x['Type'], x['Bounce (y)']), axis=1)

### shotDirection column

In [58]:
# down the line --> switches btwn deuce and ad
# crosscourt --> remains on same side
shot_data['shotDirection'] = np.where((shot_data.shotContactX * shot_data.shotLocationX > 0) & (shot_data.shotInRally != 1), 
"Down the Line", 
    np.where((shot_data.shotInRally != 1), 'Crosscourt', ''))

### isWinner Column

In [59]:
shot_data.isWinner = np.where((shot_data.isPointEnd == 1) & (shot_data.secondServeIn != '0') &
                              (swing_data.Result == 'In'), 1, np.nan)


### isErrorWideR Column

In [60]:
def wide_right_function(side, x, y, end):
    if (side == 'far' and x < -157.5 and end == 1) or (side == 'near' and x > 157.5 and end == 1):
        return 1
    return np.nan

# Assign 'isErrorWideR' using values from both 'swing_data' and 'shot_data'
shot_data['isErrorWideR'] = shot_data.apply(lambda x: wide_right_function(swing_data.loc[x.name, 'Hit Side'], 
                                                                x['shotLocationX'], x['shotLocationY'], x['isPointEnd']), axis=1)


### isErrorWideL Column

In [61]:
def wide_left_function(side, x, y, end):
    if (side == 'far' and x > 157.5 and end == 1) or (side == 'near' and x < -157.5 and end == 1):
        return 1
    return np.nan

# Assign 'isErrorWideR' using values from both 'swing_data' and 'shot_data'
shot_data['isErrorWideL'] = shot_data.apply(lambda x: wide_left_function(swing_data.loc[x.name, 'Hit Side'], 
                                                                x['shotLocationX'], x['shotLocationY'], x['isPointEnd']), axis=1)


### isErrorNet Column

In [62]:
shot_data.isErrorNet = np.where((swing_data.Result == 'Net'), 1, np.nan)

### isErrorLong Column

In [63]:
shot_data['isErrorLong'] = np.where((swing_data['Result'] == 'Out') & (shot_data['shotLocationY'].abs() > 455), 1, np.nan)

### Group First Serve and Second Serve Columns

In [64]:
# All columns
default_cols = ['pointScore', 'gameScore', 'setScore', 'isPointStart', 'pointStartTime',
       'isPointEnd', 'pointEndTime', 'pointNumber', 'isBreakPoint',
       'shotInRally', 'side', 'serverName', 'serverFarNear', 'firstServeIn',
       'firstServeZone', 'firstServeXCoord', 'firstServeYCoord',
       'secondServeIn', 'secondServeZone', 'secondServeXCoord',
       'secondServeYCoord', 'isAce', 'shotContactX', 'shotContactY',
       'shotDirection', 'shotFhBh', 'isSlice', 'isVolley', 'isOverhead',
       'isApproach', 'isDropshot', 'isExcitingPoint', 'atNetPlayer1',
       'atNetPlayer2', 'isLob', 'shotLocationX', 'shotLocationY', 'isWinner',
       'isErrorWideR', 'isErrorWideL', 'isErrorNet', 'isErrorLong',
       'clientTeam', 'Date', 'Division', 'Event', 'lineupPosition',
       'matchDetails', 'matchVenue', 'opponentTeam', 'player1Name',
       'player2Name', 'player1Hand', 'player2Hand', 'Round', 'Surface',
       'Notes', 'isTopspin', 'isFlat', 'isKick']

# Assign all columns to have value be taken from the first serve row
agg_dict = {col: 'first' for col in default_cols}

# Reassign select columns to have value be taken form the second serve row
agg_dict.update({'isPointEnd': 'last', 
                 'pointEndTime': 'last', 
                 'secondServeIn' : 'last',
                 'secondServeZone' : 'last',
                 'secondServeXCoord' : 'last', 
                 'secondServeYCoord' : 'last',
                 'isAce' : 'last', 
                 'shotContactX' : 'last', 
                 'shotContactY' : 'last',
                 'isWinner' : 'last',
                 'isErrorWideR' : 'last',
                 'isErrorWideL' : 'last', 
                 'isErrorNet' : 'last',
                 'isErrorLong' : 'last'
                })

# Group by isPointStart and pointNumber
grouped_df = shot_data.groupby(['shotInRally', 'pointNumber'], as_index=False).agg(agg_dict)
shot_data = grouped_df.sort_values(by=['pointNumber', 'shotInRally'], ascending=[True, True]).reset_index(drop = True)

### Save as CSV

In [65]:
player1NameNoSpace = str(shot_data.iloc[0]['player1Name']).replace(" ", "")
player2NameNoSpace = str(shot_data.iloc[0]['player2Name']).replace(" ", "")

shot_data.to_csv(f'swingvision_{player1NameNoSpace}_{player2NameNoSpace}.csv', index=False)

## Temporarily input csv

In [66]:
file = 'Cleaned_w_EliseWagle_TexasTech.csv'
example_data = pd.read_csv(file)

FileNotFoundError: [Errno 2] No such file or directory: 'Cleaned_w_EliseWagle_TexasTech.csv'

## Errors in Swingvision data

#### Chcek all the rows where isPointEnd != 1 and there is  isWinner, isErrorWideL, isErrorWideR, isErrorNet, isErrorLong

In [None]:
point_error = shot_data[(shot_data['isPointEnd'] != 1) & (shot_data['isPointStart'] != 1) &
          ((shot_data['isWinner'] == 1) | 
          (shot_data['isErrorNet'] == 1) | 
          (shot_data['isErrorLong'] == 1) |
          (shot_data['isErrorWideL'] == 1) |
          (shot_data['isErrorWideR'] == 1))]

point_error_numbers = point_error['pointNumber'].to_list()

if len(point_error) > 0:
    display(point_error)
    raise ValueError('Manually check points', point_error_numbers)

In [None]:
shot_data[shot_data['pointNumber'] == 75]

#### Check all the rows where there is isPointEnd == 1 but there is no isWinner, isErrorWideL, isErrorWideR, isErrorNet, isErrorLong
- Cj reccomendation: have this error check autmatically fill in how the point ends based on coordinate data

In [None]:
point_error = shot_data[(shot_data['isPointEnd'] == 1) &
                          (shot_data['isWinner'] != 1) &
                          (shot_data['isErrorWideL'] != 1) &
                          (shot_data['isErrorWideR'] != 1) &
                          (shot_data['isErrorNet'] != 1) & 
                          (shot_data['isErrorLong'] != 1) &
                          (shot_data['firstServeIn'] != 0) & 
                          (shot_data['secondServeIn'] != 0)]

point_error_numbers = point_error['pointNumber'].to_list()

if point_error.empty:
    print('Check Passed ✓')
else:

    display(point_error)
    raise ValueError('Manually check points', point_error_numbers)

In [None]:
# should return Check passed

#### Volleys

In [None]:
shot_data.query('isVolley == 1')

#### Overheads

In [None]:
shot_data.query('isOverhead == 1')

### Points

In [None]:
# # ad scoring?

# ad_scoring = False

In [None]:
# # want to record the score every time a point ends
# # points: server - returner
# # games: ucla (player1) - opp
# # sets: ucla (player1) - opp
# points = np.zeros(2)
# games = np.zeros(2)
# sets = np.zeros(2)
# pt_values = [0, 15, 30, 40]



# shot_data.loc[0,"pointScore"] = f"{pt_values[int(points[0])]} - {pt_values[int(points[1])]}"
# shot_data.loc[0,"gameScore"] = f"{games[0]} - {games[1]}"
# shot_data.loc[0,"setScore"] = f"{sets[0]} - {sets[1]}"

# shot_data["isBreakPoint"] = ''

# error_cols = [x for x in shot_data.columns if "isError" in x]

# for i in range(0, len(shot_data.pointScore) - 1):
#     if shot_data.loc[i+1, "isPointStart"] == 1: # means we gotta update pts
#         # determine point score by checking last shot
#         if shot_data.loc[i, "isWinner"] == "1":
#             # check if player 1 or 2 won pt
#             pt_winner_player_num = (np.where(shot_data.loc[i, "Player"] == shot_data.loc[0,"player1Name"], '1', '2'))
#         elif shot_data.loc[i, 'secondServeIn'] == "0": # double fault
#             pt_winner_player_num = (np.where(shot_data.loc[i, "Player"] == shot_data.loc[0,"player1Name"], '2', '1'))
#         elif any(shot_data.loc[i,error_cols] == "1"):
#             # winner is the player who did NOT hit that shot
#             pt_winner_player_num = (np.where(shot_data.loc[i, "Player"] == shot_data.loc[0,"player1Name"], '2', '1'))
#         else:
#             print("no pt recorded at row ", i)

#         if shot_data.loc[i, "serverName"] is not None:
#             didServerWinPt = shot_data.loc[i, "serverName"][-1] == pt_winner_player_num
#         else:
#             print(f"Server name is None at row {i}. Skipping this point.")
#             continue  # Skip this point if server name is None
        
#         if didServerWinPt:
#             points[0] += 1
#         else:
#             points[1] += 1


#         if ad_scoring: # checks if need to win by 2
#             if any(points > 3) and abs(points[0] - points[1]) >= 2:
#                 game_winner = np.argmax(points)  # Find who won the game
#                 games[game_winner] += 1
#                 points = np.zeros(2)  # Reset point values
#             if any(games > 5) and abs(games[0] - games[1]) >= 2:
#                 set_winner = np.argmax(games)  # Find who won the set
#                 sets[set_winner] += 1
#                 games = np.zeros(2)  # Reset game values   
#         else:
#             if points[1] == 3: # if the returner has 40 pts and can win the game
#                 shot_data.loc[i + 1, 'isBreakPoint'] = '1'
#             if any(points > 3):
#                 game_winner = np.argmax(points)  # Find who won the game
#                 games[game_winner] += 1
#                 points = np.zeros(2)  # Reset point values
#             if any(games > 5):
#                 set_winner = np.argmax(games)  # Find who won the set
#                 sets[set_winner] += 1
#                 games = np.zeros(2)  # Reset game values   

#     # Update the scores in the shot_data DataFrame
#     shot_data.loc[i+1,"pointScore"] = f"{pt_values[int(points[0])]} - {pt_values[int(points[1])]}"
#     shot_data.loc[i+1,"gameScore"] = f"{int(games[0])} - {int(games[1])}"
#     shot_data.loc[i+1,"setScore"] = f"{int(sets[0])} - {int(sets[1])}"


# # Additional comments for further updates:
# # - Tiebreak scenarios are not yet handled and need to be accounted for in future versions.