# Swing Vision Transformation
#### Converting SwingVision data into UCLA Tennis Consulting format
#### Run all cells ONCE; restart Kernel and Run All again if needed

#### TODO
- add firstServeLocation and isLet Columns
- Classification Models for isDropshot, isLob, isApproach - Leo's team

## Notebook Start

In [7]:
import pandas as pd
import numpy as np
import os 
import re

# Option to display max rows/columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

### Load in data

In [8]:
# Input file name here
your_file_name = 'GianlucaBallotta_MaxWestphal_Columbia.xlsx'
swing_data = pd.read_excel(your_file_name, sheet_name='Shots')
swing_data.shape

(545, 25)

In [9]:
swing_data.head()

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time
0,Gianluca Ballotta,1,first_serve,Serve,Slice,98.378799,1,1,1,short,deuce,far,-3.095711,18.234331,deep,deuce,near,0.69874,-0.083398,2.605832,out wide,In,False,03:56:43,4.22
1,Max Westphal,2,first_return,Backhand,Topspin,33.853065,1,1,1,short,deuce_alley,far,-4.698338,17.903118,out,deuce_alley,far,-5.395302,25.62948,0.802931,cross court,Net,False,03:56:44,4.99
2,Max Westphal,0,none,Feed,Flat,18.186579,2,1,1,short,deuce,near,0.734488,6.336324,short,ad,far,0.806848,14.83873,2.732491,---,In,False,03:56:52,12.67
3,Gianluca Ballotta,1,first_serve,Serve,Slice,93.113174,2,1,1,short,ad,far,1.08691,16.193851,deep,ad,near,-1.533032,0.108099,2.562802,down the T,In,False,03:57:06,26.92
4,Max Westphal,2,first_return,Backhand,Topspin,41.675724,2,1,1,short,ad_alley,near,-4.890435,7.557028,out,ad,far,2.795515,26.246744,1.041161,inside in,Out,False,03:57:07,27.75


### Clean swing vision data

In [11]:
swing_data.query('Stroke == "Feed"').shape

(48, 25)

In [12]:
swing_data.query('Shot == 0').shape

(40, 25)

In [13]:
swing_data.query('Type == "none"').shape

(68, 25)

#### Remove all the rows where it is a feed

In [14]:
swing_data.query('Type == "none"')

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time
2,Max Westphal,0,none,Feed,Flat,18.186579,2,1,1,short,deuce,near,0.734488,6.336324,short,ad,far,0.806848,14.83873,2.732491,---,In,False,03:56:52,12.67
30,Gianluca Ballotta,0,none,Feed,Flat,18.186579,8,2,1,short,deuce,near,0.090735,7.948148,short,ad,far,1.279749,15.706609,2.473414,---,In,False,04:00:45,245.669998
31,Max Westphal,1,none,Feed,Flat,21.650688,8,2,1,deep,deuce,near,2.294115,3.986853,deep,deuce,near,2.63314,0.432438,0.897675,---,Net,False,04:00:52,252.669998
33,Gianluca Ballotta,3,none,Backhand,Flat,34.404015,8,2,1,deep,ad_out,near,-6.406847,5.039276,out,deuce,far,-3.642152,25.722755,0.629421,inside in,Out,False,04:01:11,271.619995
39,Gianluca Ballotta,0,none,Feed,Flat,22.516716,10,2,1,short,deuce,near,1.484568,7.114281,deep,ad_alley,far,4.516692,23.640087,0.762688,---,In,False,04:01:54,314.920013
41,Max Westphal,0,none,Feed,Flat,25.114799,11,2,1,short,ad,far,0.748516,16.888189,deep,ad,near,-3.790935,3.560779,1.211715,---,In,False,04:02:27,348.369995
42,Max Westphal,1,none,Backhand,Topspin,46.298294,11,2,1,deep,deuce,far,-2.611703,19.183266,deep,ad,near,-1.349187,0.964761,1.134319,inside in,In,False,04:02:38,358.470001
48,Max Westphal,1,none,Forehand,Slice,35.31847,13,3,1,deep,deuce,far,-1.247325,20.873165,deep,ad,near,-3.805094,0.860601,1.054601,down the line,In,False,04:03:15,396.220001
49,Gianluca Ballotta,2,none,Backhand,Topspin,75.164177,13,3,1,deep,ad,near,-3.354789,3.74126,deep,deuce,far,-2.743649,23.750149,1.350938,inside in,In,False,04:03:17,398.279999
50,Max Westphal,3,none,Forehand,Topspin,26.694437,13,3,1,short,ad,near,-0.677218,10.786504,out,ad_alley,near,-4.31667,-1.322562,0.867638,down the line,Net,False,04:03:19,399.470001


In [15]:
rows_to_drop = swing_data.query('Type == "none"').index
swing_data = swing_data.drop(rows_to_drop)
swing_data = swing_data.reset_index(drop=True) # Important to reindex to avoid missing indicies
swing_data.shape

(477, 25)

In [16]:
swing_data.head()

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time
0,Gianluca Ballotta,1,first_serve,Serve,Slice,98.378799,1,1,1,short,deuce,far,-3.095711,18.234331,deep,deuce,near,0.69874,-0.083398,2.605832,out wide,In,False,03:56:43,4.22
1,Max Westphal,2,first_return,Backhand,Topspin,33.853065,1,1,1,short,deuce_alley,far,-4.698338,17.903118,out,deuce_alley,far,-5.395302,25.62948,0.802931,cross court,Net,False,03:56:44,4.99
2,Gianluca Ballotta,1,first_serve,Serve,Slice,93.113174,2,1,1,short,ad,far,1.08691,16.193851,deep,ad,near,-1.533032,0.108099,2.562802,down the T,In,False,03:57:06,26.92
3,Max Westphal,2,first_return,Backhand,Topspin,41.675724,2,1,1,short,ad_alley,near,-4.890435,7.557028,out,ad,far,2.795515,26.246744,1.041161,inside in,Out,False,03:57:07,27.75
4,Gianluca Ballotta,3,serve_plus_one,Backhand,Topspin,33.080677,2,1,1,deep,deuce,far,-1.746811,19.188076,short,ad,near,-1.13587,10.734202,3.925683,down the line,In,False,03:57:10,30.870001


### Load in Points data

In [17]:
swing_data_points = pd.read_excel(your_file_name, sheet_name='Points')
swing_data_points.shape

(101, 15)

In [18]:
def create_point(server, player1score, player2score):
    if server == "host":
        return str(player1score) + "-" + str(player2score)
    else:
        return str(player2score) + "-" + str(player1score)
    
swing_data_points['pointScore'] = swing_data_points.apply(lambda x: create_point(x['Match Server'], x['Host Game Score'], x['Guest Game Score']), axis=1)

In [19]:
swing_data_points = swing_data_points.rename(columns={'Break Point' : 'isBreakPoint'})
swing_data_points['isBreakPoint'] = swing_data_points['isBreakPoint'].replace(False, '')
swing_data_points['isBreakPoint'] = swing_data_points['isBreakPoint'].replace(True, 1)

In [20]:
swing_data_points = swing_data_points[['Point', 'pointScore', 'isBreakPoint']]
swing_data_points.head()

Unnamed: 0,Point,pointScore,isBreakPoint
0,1,0-0,
1,2,15-0,
2,3,30-0,
3,4,40-0,
4,5,40-15,


In [21]:
swing_data = pd.merge(swing_data, swing_data_points, on='Point')

### Load in Games data

In [22]:
swing_data_games = pd.read_excel(your_file_name, sheet_name='Games')
swing_data_games.shape

(18, 9)

In [23]:
swing_data_games.head()

Unnamed: 0,Game,Set,Server,Host Set Score,Guest Set Score,Game Winner,Start Time,Video Time,Duration
0,1,1,host,0,0,host,03:56:45,6.22,113.129997
1,2,1,guest,1,0,guest,03:58:38,119.349998,255.160004
2,3,1,host,1,1,guest,04:02:54,374.519989,294.600006
3,4,1,guest,1,2,host,04:07:48,669.109985,157.600006
4,5,1,host,2,2,host,04:10:26,826.710022,233.949997


In [24]:
def create_game(player1game, player2game):
        return str(player1game) + "-" + str(player2game)

    
swing_data_games['gameScore'] = swing_data_games.apply(lambda x: create_game(x['Host Set Score'], x['Guest Set Score']),  axis=1)

In [25]:
swing_data_games

Unnamed: 0,Game,Set,Server,Host Set Score,Guest Set Score,Game Winner,Start Time,Video Time,Duration,gameScore
0,1,1,host,0,0,host,03:56:45,6.22,113.129997,0-0
1,2,1,guest,1,0,guest,03:58:38,119.349998,255.160004,1-0
2,3,1,host,1,1,guest,04:02:54,374.519989,294.600006,1-1
3,4,1,guest,1,2,host,04:07:48,669.109985,157.600006,1-2
4,5,1,host,2,2,host,04:10:26,826.710022,233.949997,2-2
5,6,1,guest,3,2,guest,04:14:20,1060.660034,324.579987,3-2
6,7,1,host,3,3,host,04:19:44,1385.23999,291.410004,3-3
7,8,1,guest,4,3,guest,04:24:36,1676.650024,197.199997,4-3
8,9,1,host,4,4,guest,04:27:53,1873.849976,234.800003,4-4
9,10,1,guest,4,5,guest,04:31:48,2108.639893,212.800003,4-5


In [26]:
swing_data_games = swing_data_games[['Game', 'gameScore']]

In [27]:
swing_data = pd.merge(swing_data, swing_data_games, on="Game")

In [28]:
swing_data.head()

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time,pointScore,isBreakPoint,gameScore
0,Gianluca Ballotta,1,first_serve,Serve,Slice,98.378799,1,1,1,short,deuce,far,-3.095711,18.234331,deep,deuce,near,0.69874,-0.083398,2.605832,out wide,In,False,03:56:43,4.22,0-0,,0-0
1,Max Westphal,2,first_return,Backhand,Topspin,33.853065,1,1,1,short,deuce_alley,far,-4.698338,17.903118,out,deuce_alley,far,-5.395302,25.62948,0.802931,cross court,Net,False,03:56:44,4.99,0-0,,0-0
2,Gianluca Ballotta,1,first_serve,Serve,Slice,93.113174,2,1,1,short,ad,far,1.08691,16.193851,deep,ad,near,-1.533032,0.108099,2.562802,down the T,In,False,03:57:06,26.92,15-0,,0-0
3,Max Westphal,2,first_return,Backhand,Topspin,41.675724,2,1,1,short,ad_alley,near,-4.890435,7.557028,out,ad,far,2.795515,26.246744,1.041161,inside in,Out,False,03:57:07,27.75,15-0,,0-0
4,Gianluca Ballotta,3,serve_plus_one,Backhand,Topspin,33.080677,2,1,1,deep,deuce,far,-1.746811,19.188076,short,ad,near,-1.13587,10.734202,3.925683,down the line,In,False,03:57:10,30.870001,15-0,,0-0


### Load in Sets data

In [29]:
swing_data_sets = pd.read_excel(your_file_name, sheet_name='Sets')
swing_data_sets.shape

(2, 10)

In [30]:
swing_data_sets

Unnamed: 0,Set,Host Score,Guest Score,Host Tiebreak Score,Guest Tiebreak Score,Set Winner,Super Tiebreak,Start Time,Video Time,Duration
0,1,4,6,0,0,guest,False,03:56:45,6.22,2315.219971
1,2,2,6,0,0,guest,False,04:35:21,2321.439941,1949.640015


In [31]:
host_set_score = 0
guest_set_score = 0

def create_set(set_winner):
        global host_set_score, guest_set_score  # Declare global variables
        if set_winner == "host":
                host_set_score += 1
        else:        
                guest_set_score += 1
        
        return str(host_set_score) + "-" + str(guest_set_score)

swing_data_sets['setScore'] = None
swing_data_sets.at[0, 'setScore'] = "0-0"
    
swing_data_sets.iloc[1:, swing_data_sets.columns.get_loc('setScore')] = swing_data_sets.iloc[1:].apply(lambda x: create_set(x['Set Winner']),  axis=1)


In [32]:
swing_data_sets = swing_data_sets[['Set', 'setScore']]


In [33]:
swing_data = pd.merge(swing_data, swing_data_sets, on="Set")
swing_data.head()

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time,pointScore,isBreakPoint,gameScore,setScore
0,Gianluca Ballotta,1,first_serve,Serve,Slice,98.378799,1,1,1,short,deuce,far,-3.095711,18.234331,deep,deuce,near,0.69874,-0.083398,2.605832,out wide,In,False,03:56:43,4.22,0-0,,0-0,0-0
1,Max Westphal,2,first_return,Backhand,Topspin,33.853065,1,1,1,short,deuce_alley,far,-4.698338,17.903118,out,deuce_alley,far,-5.395302,25.62948,0.802931,cross court,Net,False,03:56:44,4.99,0-0,,0-0,0-0
2,Gianluca Ballotta,1,first_serve,Serve,Slice,93.113174,2,1,1,short,ad,far,1.08691,16.193851,deep,ad,near,-1.533032,0.108099,2.562802,down the T,In,False,03:57:06,26.92,15-0,,0-0,0-0
3,Max Westphal,2,first_return,Backhand,Topspin,41.675724,2,1,1,short,ad_alley,near,-4.890435,7.557028,out,ad,far,2.795515,26.246744,1.041161,inside in,Out,False,03:57:07,27.75,15-0,,0-0,0-0
4,Gianluca Ballotta,3,serve_plus_one,Backhand,Topspin,33.080677,2,1,1,deep,deuce,far,-1.746811,19.188076,short,ad,near,-1.13587,10.734202,3.925683,down the line,In,False,03:57:10,30.870001,15-0,,0-0,0-0


### Create shot data csv

In [34]:
# Check existing columns
swing_data.columns

Index(['Player', 'Shot', 'Type', 'Stroke', 'Spin', 'Speed (MPH)', 'Point',
       'Game', 'Set', 'Bounce Depth', 'Bounce Zone', 'Bounce Side',
       'Bounce (x)', 'Bounce (y)', 'Hit Depth', 'Hit Zone', 'Hit Side',
       'Hit (x)', 'Hit (y)', 'Hit (z)', 'Direction', 'Result', 'Favorited',
       'Start Time', 'Video Time', 'pointScore', 'isBreakPoint', 'gameScore',
       'setScore'],
      dtype='object')

In [35]:
swing_data.head()

Unnamed: 0,Player,Shot,Type,Stroke,Spin,Speed (MPH),Point,Game,Set,Bounce Depth,Bounce Zone,Bounce Side,Bounce (x),Bounce (y),Hit Depth,Hit Zone,Hit Side,Hit (x),Hit (y),Hit (z),Direction,Result,Favorited,Start Time,Video Time,pointScore,isBreakPoint,gameScore,setScore
0,Gianluca Ballotta,1,first_serve,Serve,Slice,98.378799,1,1,1,short,deuce,far,-3.095711,18.234331,deep,deuce,near,0.69874,-0.083398,2.605832,out wide,In,False,03:56:43,4.22,0-0,,0-0,0-0
1,Max Westphal,2,first_return,Backhand,Topspin,33.853065,1,1,1,short,deuce_alley,far,-4.698338,17.903118,out,deuce_alley,far,-5.395302,25.62948,0.802931,cross court,Net,False,03:56:44,4.99,0-0,,0-0,0-0
2,Gianluca Ballotta,1,first_serve,Serve,Slice,93.113174,2,1,1,short,ad,far,1.08691,16.193851,deep,ad,near,-1.533032,0.108099,2.562802,down the T,In,False,03:57:06,26.92,15-0,,0-0,0-0
3,Max Westphal,2,first_return,Backhand,Topspin,41.675724,2,1,1,short,ad_alley,near,-4.890435,7.557028,out,ad,far,2.795515,26.246744,1.041161,inside in,Out,False,03:57:07,27.75,15-0,,0-0,0-0
4,Gianluca Ballotta,3,serve_plus_one,Backhand,Topspin,33.080677,2,1,1,deep,deuce,far,-1.746811,19.188076,short,ad,near,-1.13587,10.734202,3.925683,down the line,In,False,03:57:10,30.870001,15-0,,0-0,0-0


In [36]:
# add in all desired column labels, with swingvision labels at end

columm_names = (['pointScore', 'gameScore', 'setScore',
                'isPointStart', 'pointStartTime', 'isPointEnd', 'pointEndTime','pointNumber',
                'isBreakPoint','shotInRally','side','serverName',
                'serverFarNear','firstServeIn','firstServeZone',
                'firstServeXCoord','firstServeYCoord',
                'secondServeIn','secondServeZone','secondServeXCoord',
                'secondServeYCoord','isAce','shotContactX',
                'shotContactY','shotDirection','shotFhBh',
                'isSlice','isVolley','isOverhead','isApproach','isDropshot', 'isLet',
                'isExcitingPoint','atNetPlayer1','atNetPlayer2','isLob',
                'shotLocationX','shotLocationY','isWinner','isErrorWideR', 'isErrorWideL',
                'isErrorNet','isErrorLong','clientTeam',
                'Date', 'Division', 'Event', 'lineupPosition','matchDetails',
                'matchVenue' , 'opponentTeam', 
                'player1Name', 'player2Name','player1Hand','player2Hand',
            'Round','Surface','Notes'])

shot_data = pd.DataFrame(columns=columm_names)
shot_data

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isLet,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes


### Score Columns

In [37]:
shot_data['pointScore'] = swing_data['pointScore']
shot_data['gameScore'] = swing_data['gameScore']
shot_data['setScore'] = swing_data['setScore']

In [38]:
shot_data.head(10)

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isLet,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes
0,0-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,15-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,15-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,15-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,30-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,30-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,30-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,30-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,40-0,0-0,0-0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### isPointStart and isPointEnd columns 

In [39]:
def assign_pointstart(x):
    if (x == 'first_serve') | (x == 'second_serve'):
        return 1
    
    return ''

shot_data['isPointStart'] = swing_data['Type'].apply(assign_pointstart)


index_list = []

for i in swing_data['Point'].unique().tolist():
    last_point_index = swing_data[swing_data['Point'] == i].index[-1]
    index_list.append(last_point_index)
    
shot_data.loc[index_list,'isPointEnd'] = 1
shot_data['isPointEnd'] = shot_data['isPointEnd'].fillna('')

### pointStartTime and pointEndTime Columns

In [40]:
def convert_time(time):
    return int(time * 1000)

# def convert_time(time):
shot_data['pointStartTime'] = swing_data['Video Time'].apply(convert_time)

# Assigns last shot time to pointEndTime column
shot_data['pointEndTime'] = np.where(shot_data['isPointEnd'] == 1, shot_data['pointStartTime'], '')

### pointNumber Column

In [41]:
shot_data['pointNumber'] = swing_data['Point']

### isBreakPoint Column

In [42]:
shot_data['isBreakPoint'] = swing_data['isBreakPoint']

### shotInRally column

In [43]:
shot_data.shotInRally = swing_data.Shot

### side Column

In [44]:
def side(x, side, xcoord):
    if 'deuce' in x:
        return 'Deuce'
    elif 'ad' in x:
        return 'Ad'
    elif 'center_line' in x: # unique values include deuce, ad and center_line
        if (side == 'near') & (xcoord > 0):
            return 'Deuce'
        else:
            return 'Ad'
    else:
        return ''

shot_data['side'] = swing_data.apply(lambda x: side(x['Hit Zone'], x['Hit Side'], x['Bounce (x)']), axis = 1)

### Players

In [45]:
# ucla roster 23-24 men and womens
ucla_roster_23 = ["Gianluca Ballotta", 
                   "Jeffrey Fradkin", 
                   "Alexander Hoogmartens",
                   "Spencer Johnson",
                   "Stefan Leustian",
                   "Timothy Li",
                   "Govind Nanda",
                   "Jorge Plans Gonzalez",
                   "Giacomo Revelli",
                   "Aadarsh Tripathi",
                   "Emon van Loben Sels",
                   "Azuma Visaya",
                   "Rudy Quan",
                   "Leo Von Bismark",
                   
                   "Tian Fangran",
                   "Bianca Fernandez",
                   "Ahmani Guichard",
                   "Kimmi Hance",
                   "Mia Jovic",
                   "Anne-Christine Lutkemeyer",
                   "Vanessa Ong",
                   "Sasha Vagramov",
                   "Elise Wagle"]

In [46]:
# list of names who are playing in match
players = swing_data['Player'].unique()

# checks which one is UCLA player
is_ucla_player = [any([name in roster_name for roster_name in ucla_roster_23]) for name in players]

In [47]:
# assigns ucla player to player 1, and non ucla to player 2
shot_data.loc[0, "player1Name"] = players[is_ucla_player]
shot_data.loc[0, "player2Name"] = players[np.invert(is_ucla_player)]

### serverName Column

In [48]:
def assign_server_name(stroke, server):
    if stroke != 'Serve':
        return ''
    
    if server.startswith(players[is_ucla_player][0]):
        return 'Player1'
    elif server.startswith(players[np.invert(is_ucla_player)][0]):
        return 'Player2'
    
shot_data['serverName'] = swing_data.apply(lambda x: assign_server_name(x['Stroke'], x['Player']), axis=1)
shot_data['serverName'].replace(['', 'na'], pd.NaT, inplace=True)
shot_data['serverName'] = shot_data['serverName'].ffill()

### serverFarNear Column

In [49]:
shot_data.serverFarNear = np.where((swing_data.Stroke == 'Serve'), np.where(swing_data['Hit Side'] == 'far', 'Far', 'Near'), '')
shot_data['serverFarNear'].replace(['', 'na'], pd.NaT, inplace=True)
shot_data['serverFarNear'] = shot_data['serverFarNear'].ffill()

### firstServeIn and secondServeIn Columns

In [50]:
shot_data.firstServeIn = np.where((swing_data.Type == 'first_serve'),np.where((shot_data.isPointStart == 1) & (swing_data['Result'] == 'In'), 1, 0), np.nan)
shot_data.secondServeIn =np.where((swing_data.Type == 'second_serve') & (shot_data.isPointStart == 1), np.where(swing_data['Result'] == 'In', 1,0), np.nan)

### SwingVision Coord Transformation
court coordinates
swing vision - meters, near side center marks (0,0)
singles court x [-4.1148, 4.1148], y [0, 23.7744]
doubles court x [-5.485, 5.485]

our coordinates - center of net (0,0)
singles court x [-157.5, 157.5], y [-455, 455]

shot_x = (157.5/4.1148) * swing_x
shot_y = (455/11.8872) * swing_y + 455
ratio = 38.2764654418

### firstServeXCoord, firstYServeYCoord, secondServeXCoord, and secondServeyCoord Columns

In [51]:
def first_serve_x_coordinates(stroke, x):
    if stroke == 'first_serve':
        return x * 38.2764654418
    else:
        return np.nan

def first_serve_y_coordinates(stroke, y):
    if stroke == 'first_serve':
        return (y - 11.8872) * 38.2764654418
    else:
        return np.nan
    
shot_data['firstServeXCoord'] = swing_data.apply(lambda row: first_serve_x_coordinates(row['Type'], row['Bounce (x)']), axis=1)
shot_data['firstServeYCoord'] = swing_data.apply(lambda row: first_serve_y_coordinates(row['Type'], row['Bounce (y)']), axis=1)


def second_serve_x_coordinates(stroke, x):
    if stroke == 'second_serve':
        return x * 38.2764654418
    else:
        return np.nan

def second_serve_y_coordinates(stroke, y):
    if stroke == 'second_serve':
        return (y - 11.8872) * 38.2764654418
    else:
        return np.nan
    
shot_data['secondServeXCoord'] = swing_data.apply(lambda row: second_serve_x_coordinates(row['Type'], row['Bounce (x)']), axis=1)
shot_data['secondServeYCoord'] = swing_data.apply(lambda row: second_serve_y_coordinates(row['Type'], row['Bounce (y)']), axis=1)

### firstServeZone and secondServeZone Columns
- serving zones: T, Body, Wide
- Wide: x in [-inf, -105] u [105, inf]
- Body: x in [-105, -52.5] u [52.5, 105]
- T: x in [-52.5, 52.5]

In [52]:
def label_zone(x_coord):
    if x_coord != '':
        x_coord = float(x_coord)
        if (x_coord < -105) | (x_coord > 105):
            return 'Wide'
        elif (-105 <= x_coord <= -52.5) | (52.5 <= x_coord <= 105):
            return 'Body'
        elif -52.5 < x_coord < 52.5:
            return 'T'
    return ''

# convert x coord to serve zone
shot_data.firstServeZone = shot_data.firstServeXCoord.apply(label_zone)
shot_data.secondServeZone = shot_data.secondServeXCoord.apply(label_zone)

In [53]:
shot_data.head()

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isLet,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes
0,0-0,0-0,0-0,1.0,4220,,,1,,1,Deuce,Player1,Near,1.0,Wide,-118.492875,242.94574,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Gianluca Ballotta,Max Westphal,,,,,
1,0-0,0-0,0-0,,4990,1.0,4990.0,1,,2,Deuce,Player1,Near,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,15-0,0-0,0-0,1.0,26920,,,2,,1,Ad,Player1,Near,1.0,T,41.603073,164.843378,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,15-0,0-0,0-0,,27750,,,2,,2,Ad,Player1,Near,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,15-0,0-0,0-0,,30870,1.0,30870.0,2,,3,Ad,Player1,Near,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### isAce Column

In [54]:
shot_data['isAce'] = np.where((swing_data.Stroke == 'Serve') & 
                              (shot_data.isPointEnd == 1) & 
                              (shot_data.secondServeIn != 0), 1, np.nan)

### shotContactX and shotContactY Columns

In [55]:
# Functions to transform the swingvision coordinates
def transf_x_coord_sv_to_shot(sv_col) :
    return sv_col * 38.2764654418
def transf_y_coord_sv_to_shot(sv_col) :
    return (sv_col - 11.8872) * 38.2764654418

# want to convert swingvision coordinates into our own
shot_data['shotContactX'] = transf_x_coord_sv_to_shot(swing_data['Hit (x)'])
shot_data['shotContactY'] = transf_y_coord_sv_to_shot(swing_data['Hit (y)'])

### shotFhBh Column

In [56]:
def classify_shot(stroke):
    if stroke == 'FH Volley':
        return 'Forehand'
    elif stroke == 'BH Volley':
        return 'Backhand'
    elif stroke == 'Forehand':
        return 'Forehand'
    elif stroke == 'Backhand':
        return 'Backhand'
    elif stroke == 'Overhead':
        return 'Forehand'
    else:
        return ''

# Applying the function to the DataFrame
shot_data['shotFhBh'] = swing_data['Stroke'].apply(classify_shot)

### isSlice, isTopspin, isFlat, isKick Columns

In [57]:
shot_data['isSlice'] = swing_data['Spin'].apply(lambda x: '1' if x == 'Slice' else '')
shot_data['isTopspin'] = swing_data['Spin'].apply(lambda x: '1' if x == 'Topspin' else '') # added these metrics
shot_data['isFlat'] = swing_data['Spin'].apply(lambda x: '1' if x == 'Flat' else '') # added these metrics
shot_data['isKick'] = swing_data['Spin'].apply(lambda x: '1' if x == 'Kick' else '') # added these metrics

### isVolley Column

In [58]:
shot_data['isVolley'] = swing_data['Stroke'].apply(lambda x: 1 if x in ['FH Volley', 'BH Volley', 'Volley'] else '') # need to classify shotFhBh when doing isVolley

### isOverhead Column

In [59]:
shot_data['isOverhead'] = swing_data['Stroke'].apply(lambda x: 1 if x == 'Overhead' else '')

### isApproach Column

In [60]:
# maybe run model to predict 

# features to consider:
# player is inside the court

# Workflow:
# watch all points and tag all points that have _____
# subset df with points (testing x and y)


### isDropshot Column

In [61]:
# maybe run model to predict
# features to consider: 
# shotlocationY if close to the net
# shotContactY is close to the net
# speed of the ball (in swingvision data)

### isLet Column

In [62]:
# maybe run model to predict OR get from swingvision data
# features to consider: 


### isExcitingPoint

In [63]:
# maybe run model to predict
# features to consider:
# rally length is long (maybe take _% percintile of rallies)
# point ends in a winner
# either player moves a lot
# amount of volleys, overheads
# breakpoint factor

### atNetPlayer1 and atNetPlayer2 Columns

In [64]:
# aggregated in STP

### isLob Column

In [65]:
# maybe run model to predict
# features to consider:
# opponent is at the net
# speed of the ball (in swingvision data)

### shotLocationX and shotLocationY Columns

In [66]:
# Functions to transform the swingvision coordinates
def transf_x_loc(stroke, sv_col):
    if stroke != 'first_serve' and stroke != 'second_serve':
        return sv_col * 38.2764654418
    return np.nan
    
def transf_y_loc(stroke, sv_col):
    if stroke != 'first_serve' and stroke != 'second_serve':
        return (sv_col - 11.8872) * 38.2764654418
    return np.nan

# want to convert swingvision coordinates into our own
shot_data['shotLocationX'] = swing_data.apply(lambda x: transf_x_loc(x['Type'], x['Bounce (x)']), axis=1)
shot_data['shotLocationY'] = swing_data.apply(lambda x: transf_y_loc(x['Type'], x['Bounce (y)']), axis=1)

### shotDirection column

In [67]:
# down the line --> switches btwn deuce and ad
# crosscourt --> remains on same side
shot_data['shotDirection'] = np.where((shot_data.shotContactX * shot_data.shotLocationX > 0) & (shot_data.shotInRally != 1), 
"Down the Line", 
    np.where((shot_data.shotInRally != 1), 'Crosscourt', ''))

### isWinner Column

In [68]:
shot_data.isWinner = np.where((shot_data.isPointEnd == 1) & (shot_data.secondServeIn != '0') &
                              (swing_data.Result == 'In'), 1, np.nan)


### isErrorWideR Column

In [69]:
def wide_right_function(side, x, y, end):
    if (side == 'far' and x < -157.5 and end == 1) or (side == 'near' and x > 157.5 and end == 1):
        return 1
    return np.nan

# Assign 'isErrorWideR' using values from both 'swing_data' and 'shot_data'
shot_data['isErrorWideR'] = shot_data.apply(lambda x: wide_right_function(swing_data.loc[x.name, 'Hit Side'], 
                                                                x['shotLocationX'], x['shotLocationY'], x['isPointEnd']), axis=1)


### isErrorWideL Column

In [70]:
def wide_left_function(side, x, y, end):
    if (side == 'far' and x > 157.5 and end == 1) or (side == 'near' and x < -157.5 and end == 1):
        return 1
    return np.nan

# Assign 'isErrorWideR' using values from both 'swing_data' and 'shot_data'
shot_data['isErrorWideL'] = shot_data.apply(lambda x: wide_left_function(swing_data.loc[x.name, 'Hit Side'], 
                                                                x['shotLocationX'], x['shotLocationY'], x['isPointEnd']), axis=1)


### isErrorNet Column

In [71]:
shot_data.isErrorNet = np.where((swing_data.Result == 'Net'), 1, np.nan)

### isErrorLong Column

In [72]:
shot_data['isErrorLong'] = np.where((swing_data['Result'] == 'Out') & (shot_data['shotLocationY'].abs() > 455), 1, np.nan)

### Group First Serve and Second Serve Columns

In [73]:
# All columns
default_cols = ['pointScore', 'gameScore', 'setScore', 'isPointStart', 'pointStartTime',
       'isPointEnd', 'pointEndTime', 'pointNumber', 'isBreakPoint',
       'shotInRally', 'side', 'serverName', 'serverFarNear', 'firstServeIn',
       'firstServeZone', 'firstServeXCoord', 'firstServeYCoord',
       'secondServeIn', 'secondServeZone', 'secondServeXCoord',
       'secondServeYCoord', 'isAce', 'shotContactX', 'shotContactY',
       'shotDirection', 'shotFhBh', 'isSlice', 'isVolley', 'isOverhead',
       'isApproach', 'isDropshot', 'isExcitingPoint', 'atNetPlayer1',
       'atNetPlayer2', 'isLob', 'shotLocationX', 'shotLocationY', 'isWinner',
       'isErrorWideR', 'isErrorWideL', 'isErrorNet', 'isErrorLong',
       'clientTeam', 'Date', 'Division', 'Event', 'lineupPosition',
       'matchDetails', 'matchVenue', 'opponentTeam', 'player1Name',
       'player2Name', 'player1Hand', 'player2Hand', 'Round', 'Surface',
       'Notes', 'isTopspin', 'isFlat', 'isKick']

# Assign all columns to have value be taken from the first serve row
agg_dict = {col: 'first' for col in default_cols}

# Reassign select columns to have value be taken form the second serve row
agg_dict.update({'isPointEnd': 'last', 
                 'pointEndTime': 'last', 
                 'secondServeIn' : 'last',
                 'secondServeZone' : 'last',
                 'secondServeXCoord' : 'last', 
                 'secondServeYCoord' : 'last',
                 'isAce' : 'last', 
                 'shotContactX' : 'last', 
                 'shotContactY' : 'last',
                 'isWinner' : 'last',
                 'isErrorWideR' : 'last',
                 'isErrorWideL' : 'last', 
                 'isErrorNet' : 'last',
                 'isErrorLong' : 'last'
                })

# Group by isPointStart and pointNumber
grouped_df = shot_data.groupby(['shotInRally', 'pointNumber'], as_index=False).agg(agg_dict)
shot_data = grouped_df.sort_values(by=['pointNumber', 'shotInRally'], ascending=[True, True]).reset_index(drop = True)

### Save as CSV

In [74]:
player1NameNoSpace = str(shot_data.iloc[0]['player1Name']).replace(" ", "")
player2NameNoSpace = str(shot_data.iloc[0]['player2Name']).replace(" ", "")

shot_data.to_csv(f'swingvision_{player1NameNoSpace}_{player2NameNoSpace}.csv', index=False)
print(f'swingvision_{player1NameNoSpace}_{player2NameNoSpace}.csv')

swingvision_GianlucaBallotta_MaxWestphal.csv


### Notebook End

## Errors in Swingvision Data Exploration

#### Chcek all the rows where isPointEnd != 1 and there is  isWinner, isErrorWideL, isErrorWideR, isErrorNet, isErrorLong

In [75]:
point_error = shot_data[(shot_data['isPointEnd'] != 1) & (shot_data['isPointStart'] != 1) &
          ((shot_data['isWinner'] == 1) | 
          (shot_data['isErrorNet'] == 1) | 
          (shot_data['isErrorLong'] == 1) |
          (shot_data['isErrorWideL'] == 1) |
          (shot_data['isErrorWideR'] == 1))]

point_error_numbers = point_error['pointNumber'].to_list()

if len(point_error) > 0:
    display(point_error)
    raise ValueError('Manually check points', point_error_numbers)

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes,isTopspin,isFlat,isKick
71,0-40,1-2,0-0,,820179,,,22,1.0,2,Ad,Player2,Far,,,,,,,,,,-179.484853,-503.735165,Down the Line,Backhand,,,,,,,,,,-47.275454,-208.067842,,,,1.0,,,,,,,,,,,,,,,,,,1.0,
100,30-15,2-2,0-0,,985369,,,26,,3,Deuce,Player1,Near,,,,,,,,,,8.295237,-517.557715,Crosscourt,Backhand,,,,,,,,,,-98.599142,506.599623,,,,,1.0,,,,,,,,,,,,,,,,1.0,,
117,0-15,3-2,0-0,,1215510,,,31,,2,Ad,Player2,Near,,,,,,,,,,222.492252,492.152056,Down the Line,Backhand,,,,,,,,,,187.943914,236.949273,,,,1.0,,,,,,,,,,,,,,,,,,1.0,
175,40-15,3-3,0-0,,1620050,,,41,,4,Deuce,Player1,Far,,,,,,,,,,140.848589,-539.831173,Down the Line,Forehand,,,,,,,,,,65.017476,457.309372,,,,,1.0,,,,,,,,,,,,,,,,1.0,,
290,0-15,0-2,0-1,,2899429,,,68,,2,Ad,Player1,Near,,,,,,,,,,196.366803,499.997315,Crosscourt,Forehand,,,,,,,,,,-142.504506,-463.785253,,,,,1.0,,,,,,,,,,,,,,,,1.0,,
392,40-30,1-4,0-1,,3819719,,,90,,5,Ad,Player2,Far,,,,,,,,,,57.875776,305.829648,Crosscourt,,,1.0,,,,,,,,-6.110493,256.987385,,,,1.0,,,,,,,,,,,,,,,,,,1.0,
425,40-15,1-5,0-1,,4046399,,,96,,9,Deuce,Player1,Near,,,,,,,,,,47.024092,-438.431802,Down the Line,Backhand,,,,,,,,,,102.066071,456.441683,,,,,1.0,,,,,,,,,,,,,,,,,1.0,


ValueError: ('Manually check points', [22, 26, 31, 41, 68, 90, 96])

In [76]:
shot_data[shot_data['pointNumber'] == 2]

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes,isTopspin,isFlat,isKick
3,15-0,0-0,0-0,1,35349,1,69970,2,,1,Ad,Player1,Near,0.0,Body,99.205173,-42.625667,0.0,T,3.78294,-182.774524,,-13.142607,-442.920292,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,1,


#### Check all the rows where there is isPointEnd == 1 but there is no isWinner, isErrorWideL, isErrorWideR, isErrorNet, isErrorLong
- Cj reccomendation: have this error check autmatically fill in how the point ends based on coordinate data

In [69]:
point_error = shot_data[(shot_data['isPointEnd'] == 1) &
                          (shot_data['isWinner'] != 1) &
                          (shot_data['isErrorWideL'] != 1) &
                          (shot_data['isErrorWideR'] != 1) &
                          (shot_data['isErrorNet'] != 1) & 
                          (shot_data['isErrorLong'] != 1) &
                          (shot_data['firstServeIn'] != 0) & 
                          (shot_data['secondServeIn'] != 0)]

point_error_numbers = point_error['pointNumber'].to_list()

if point_error.empty:
    print('Check Passed ✓')
else:

    display(point_error)
    raise ValueError('Manually check points', point_error_numbers)

Check Passed ✓


#### Volleys


In [91]:
shot_data.query('isVolley == 1')

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes,isTopspin,isFlat,isKick
13,15-40,0-0,0-0,,166270,,,5,1.0,5,Deuce,Player1,Near,,,,,,,,,,30.445254,-41.593313,Down the Line,,,1,,,,,,,,81.13715,321.410504,,,,,,,,,,,,,,,,,,,,,,1,
99,15-0,1-3,0-0,,870559,1.0,870559.0,21,,13,Deuce,Player1,Near,,,,,,,,,,109.099792,-120.824256,Down the Line,,,1,,,,,,,,255.08738,810.177045,,1.0,,,1.0,,,,,,,,,,,,,,,,,1,
105,15-15,1-3,0-0,,926859,1.0,926859.0,22,,6,Ad,Player1,Near,,,,,,,,,,80.570352,198.068843,Crosscourt,,,1,,,,,,,,-43.965382,3.457207,,,,1.0,,,,,,,,,,,,,,,,,,1,
164,0-0,2-5,0-0,,1631349,,,35,,4,Ad,Player2,Far,,,,,,,,,,-46.296036,-131.563369,Down the Line,,,1,,,,,,,,-136.689813,363.498502,,,,,,,,,,,,,,,,,,,,,,1,
166,0-0,2-5,0-0,,1634449,1.0,1634449.0,35,,6,Ad,Player2,Far,,,,,,,,,,-48.706037,-69.452991,Down the Line,,,1,,,,,,,,-90.117995,209.273436,1.0,,,,,,,,,,,,,,,,,,,,,1,
196,0-15,0-0,0-1,,2076939,1.0,2076939.0,41,,5,Ad,Player1,Near,,,,,,,,,,-160.115737,-50.026919,Down the Line,,,1,,,,,,,,-48.237648,215.668515,1.0,,,,,,,,,,,,,,,,,,,,,1,
212,0-0,0-1,0-1,,2317840,1.0,2317840.0,46,,4,Deuce,Player2,Near,,,,,,,,,,-59.008913,256.007355,Down the Line,,,1,,,,,,,,-94.731152,315.709263,,,,1.0,,,,,,,,,,,,,,,,,,1,
217,15-0,0-1,0-1,,2355540,,,47,,5,Deuce,Player2,Near,,,,,,,,,,29.210608,-131.969712,Crosscourt,,,1,,,,,,,,-89.226919,245.341656,,,,,,,,,,,,,,,,,,,,,,1,
219,15-0,0-1,0-1,,2356939,,,47,,7,Deuce,Player2,Near,,,,,,,,,,24.936505,-155.016355,Down the Line,,,1,,,,,,,,157.292771,572.256584,,,,,1.0,,,,,,,,,,,,,,,,,1,
307,30-30,1-3,0-1,,3171030,,,67,,3,Ad,Player1,Near,,,,,,,,,,-76.386964,-137.598496,Crosscourt,,,1,,,,,,,,127.752258,225.651668,,,,,,,,,,,,,,,,,,,,,,1,


In [93]:
shot_data[355:500]

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes,isTopspin,isFlat,isKick
355,0-0,2-4,0-1,1.0,3623590,,,77,,1,Deuce,Player1,Far,1.0,Wide,108.740491,-201.160089,,,,,,-32.517771,472.761581,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,
356,0-0,2-4,0-1,,3624419,1.0,3624419.0,77,,2,Deuce,Player1,Far,,,,,,,,,,163.385007,-508.552105,Down the Line,Forehand,,,,,,,,,,146.224404,241.133005,1.0,,,,,,,,,,,,,,,,,,,,1.0,,
357,15-0,2-4,0-1,1.0,3650889,,,78,,1,Ad,Player1,Far,0.0,Body,-61.966918,1.771243,1.0,T,-46.523857,-187.289616,,6.223677,431.001958,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,1.0,
358,15-0,2-4,0-1,,3670639,,,78,,2,Ad,Player1,Far,,,,,,,,,,-49.961505,-357.821528,Crosscourt,Forehand,,,,,,,,,,152.61799,400.289419,,,,,,,,,,,,,,,,,,,,,,1.0,
359,15-0,2-4,0-1,,3672020,,,78,,3,Ad,Player1,Far,,,,,,,,,,163.00473,512.629353,Crosscourt,Backhand,,,,,,,,,,-25.76986,-265.882759,,,,,,,,,,,,,,,,,,,,,1.0,,
360,15-0,2-4,0-1,,3673169,,,78,,4,Ad,Player1,Far,,,,,,,,,,-50.974606,-468.526176,Down the Line,Forehand,,,,,,,,,,-147.412735,240.04404,,,,,,,,,,,,,,,,,,,,,1.0,,
361,15-0,2-4,0-1,,3674750,,,78,,5,Deuce,Player1,Far,,,,,,,,,,-197.972922,555.386538,Down the Line,Forehand,,,,,,,,,,-76.803068,-76.985455,,,,,,,,,,,,,,,,,,,,,,1.0,
362,15-0,2-4,0-1,,3676419,,,78,,6,Deuce,Player1,Far,,,,,,,,,,28.517613,-163.356797,Down the Line,,,1.0,,,,,,,,0.360909,398.554998,,,,,,,,,,,,,,,,,,,,,,1.0,
363,15-0,2-4,0-1,,3677389,1.0,3677389.0,78,,7,Ad,Player1,Far,,,,,,,,,,90.760351,465.265174,Crosscourt,Backhand,,,,,,,,,,-205.742011,-496.280556,,1.0,,,1.0,,,,,,,,,,,,,,,,,1.0,
364,15-15,2-4,0-1,1.0,3717419,,,79,,1,Deuce,Player1,Far,1.0,Wide,156.376241,-166.451219,,,,,,-33.577685,424.522135,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,


#### Overheads

In [72]:
shot_data.query('isOverhead == 1')

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes,isTopspin,isFlat,isKick
221,15-0,0-1,0-1,,2360239,1,2360239,47,,9,Deuce,Player2,Near,,,,,,,,,,118.944193,-252.597649,Crosscourt,Forehand,,,1,,,,,,,-160.746265,454.59982,,,1.0,,,,,,,,,,,,,,,,,,,1,


#### Aces
- WARNING: Not accurate
- FIX: counts double faults as aces

In [73]:
shot_data.query('isAce == 1')

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes,isTopspin,isFlat,isKick
79,30-0,1-2,0-0,1,775929,1,775929,18,,1,Deuce,Player2,Far,1.0,T,1.201996,-227.85134,,,,,1.0,-48.704735,429.271479,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,1,


#### Double Faults

In [88]:
shot_data.query('firstServeIn == 0').query('secondServeIn == 0')

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes,isTopspin,isFlat,isKick
3,15-0,0-0,0-0,1,35349,1.0,69970.0,2,,1,Ad,Player1,Near,0.0,Body,99.205173,-42.625667,0.0,T,3.78294,-182.774524,,-13.142607,-442.920292,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,1.0,
34,40-15,0-1,0-0,1,370380,,,10,,1,Deuce,Player2,Near,0.0,T,11.307059,-25.63884,0.0,Body,-52.542295,263.234181,,49.729741,-446.557169,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,1.0,
106,30-15,1-3,0-0,1,955270,1.0,964309.0,23,,1,Ad,Player1,Near,0.0,Body,97.435154,-42.945888,0.0,T,33.42856,-44.775312,,-28.20543,-471.140573,,,1.0,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,
265,40-40,0-2,0-1,1,2710100,1.0,2719899.0,57,1.0,1,Ad,Player1,Far,0.0,Wide,-191.688807,-220.564955,0.0,T,47.150252,-248.594198,,32.941262,456.42576,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,


In [85]:
shot_data[shot_data['pointNumber'] == 10]

Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,pointNumber,isBreakPoint,shotInRally,side,serverName,serverFarNear,firstServeIn,firstServeZone,firstServeXCoord,firstServeYCoord,secondServeIn,secondServeZone,secondServeXCoord,secondServeYCoord,isAce,shotContactX,shotContactY,shotDirection,shotFhBh,isSlice,isVolley,isOverhead,isApproach,isDropshot,isExcitingPoint,atNetPlayer1,atNetPlayer2,isLob,shotLocationX,shotLocationY,isWinner,isErrorWideR,isErrorWideL,isErrorNet,isErrorLong,clientTeam,Date,Division,Event,lineupPosition,matchDetails,matchVenue,opponentTeam,player1Name,player2Name,player1Hand,player2Hand,Round,Surface,Notes,isTopspin,isFlat,isKick
34,40-15,0-1,0-0,1.0,370380,,,10,,1,Deuce,Player2,Near,0.0,T,11.307059,-25.63884,0.0,Body,-52.542295,263.234181,,49.729741,-446.557169,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,1.0,
35,40-15,0-1,0-0,,383809,,,10,,2,Deuce,Player2,Near,,,,,,,,,,-73.238955,498.070669,Crosscourt,Backhand,,,,,,,,,,25.331097,-360.356655,,,,,,,,,,,,,,,,,,,,,,1.0,
36,40-15,0-1,0-0,,384850,,,10,,3,Deuce,Player2,Near,,,,,,,,,,15.186647,-469.531966,Crosscourt,Forehand,,,,,,,,,,-78.02623,406.023502,,,,,,,,,,,,,,,,,,,,,1.0,,
37,40-15,0-1,0-0,,386179,,,10,,4,Deuce,Player2,Near,,,,,,,,,,-73.286341,512.967984,Crosscourt,Forehand,,,,,,,,,,87.54398,-327.437019,,,,,,,,,,,,,,,,,,,,,1.0,,
38,40-15,0-1,0-0,,387649,,,10,,5,Deuce,Player2,Near,,,,,,,,,,115.323239,-518.601132,Crosscourt,Forehand,,,,,,,,,,-142.419341,345.483607,,,,,,,,,,,,,,,,,,,,,1.0,,
39,40-15,0-1,0-0,,388850,,,10,,6,Deuce,Player2,Near,,,,,,,,,,-176.819434,536.631299,Crosscourt,Forehand,,,,,,,,,,112.653456,-386.184542,,,,,,,,,,,,,,,,,,,,,,1.0,
40,40-15,0-1,0-0,,390079,,,10,,7,Deuce,Player2,Near,,,,,,,,,,99.680184,-492.940322,Down the Line,Forehand,,,,,,,,,,45.869904,311.200372,,,,,,,,,,,,,,,,,,,,,1.0,,
41,40-15,0-1,0-0,,391660,1.0,391660.0,10,,8,Ad,Player2,Near,,,,,,,,,,44.211806,479.083858,Crosscourt,Backhand,,,,,,,,,,-96.317635,-75.251187,1.0,,,,,,,,,,,,,,,,,,,,1.0,,


### Check all points where double fault occurs (firstServeIn == 0 & secondServeIn == 0) but len(shotInRally) > 1
- Check double fault but the point continues

### Check all the points where everytime the server changes, the first pointScore should be "0-0". If not output error
- Govind Nanda vs Cooper Williams (Harvard) row 380

### Points

In [None]:
# # ad scoring?

# ad_scoring = False

In [None]:
# # want to record the score every time a point ends
# # points: server - returner
# # games: ucla (player1) - opp
# # sets: ucla (player1) - opp
# points = np.zeros(2)
# games = np.zeros(2)
# sets = np.zeros(2)
# pt_values = [0, 15, 30, 40]



# shot_data.loc[0,"pointScore"] = f"{pt_values[int(points[0])]} - {pt_values[int(points[1])]}"
# shot_data.loc[0,"gameScore"] = f"{games[0]} - {games[1]}"
# shot_data.loc[0,"setScore"] = f"{sets[0]} - {sets[1]}"

# shot_data["isBreakPoint"] = ''

# error_cols = [x for x in shot_data.columns if "isError" in x]

# for i in range(0, len(shot_data.pointScore) - 1):
#     if shot_data.loc[i+1, "isPointStart"] == 1: # means we gotta update pts
#         # determine point score by checking last shot
#         if shot_data.loc[i, "isWinner"] == "1":
#             # check if player 1 or 2 won pt
#             pt_winner_player_num = (np.where(shot_data.loc[i, "Player"] == shot_data.loc[0,"player1Name"], '1', '2'))
#         elif shot_data.loc[i, 'secondServeIn'] == "0": # double fault
#             pt_winner_player_num = (np.where(shot_data.loc[i, "Player"] == shot_data.loc[0,"player1Name"], '2', '1'))
#         elif any(shot_data.loc[i,error_cols] == "1"):
#             # winner is the player who did NOT hit that shot
#             pt_winner_player_num = (np.where(shot_data.loc[i, "Player"] == shot_data.loc[0,"player1Name"], '2', '1'))
#         else:
#             print("no pt recorded at row ", i)

#         if shot_data.loc[i, "serverName"] is not None:
#             didServerWinPt = shot_data.loc[i, "serverName"][-1] == pt_winner_player_num
#         else:
#             print(f"Server name is None at row {i}. Skipping this point.")
#             continue  # Skip this point if server name is None
        
#         if didServerWinPt:
#             points[0] += 1
#         else:
#             points[1] += 1


#         if ad_scoring: # checks if need to win by 2
#             if any(points > 3) and abs(points[0] - points[1]) >= 2:
#                 game_winner = np.argmax(points)  # Find who won the game
#                 games[game_winner] += 1
#                 points = np.zeros(2)  # Reset point values
#             if any(games > 5) and abs(games[0] - games[1]) >= 2:
#                 set_winner = np.argmax(games)  # Find who won the set
#                 sets[set_winner] += 1
#                 games = np.zeros(2)  # Reset game values   
#         else:
#             if points[1] == 3: # if the returner has 40 pts and can win the game
#                 shot_data.loc[i + 1, 'isBreakPoint'] = '1'
#             if any(points > 3):
#                 game_winner = np.argmax(points)  # Find who won the game
#                 games[game_winner] += 1
#                 points = np.zeros(2)  # Reset point values
#             if any(games > 5):
#                 set_winner = np.argmax(games)  # Find who won the set
#                 sets[set_winner] += 1
#                 games = np.zeros(2)  # Reset game values   

#     # Update the scores in the shot_data DataFrame
#     shot_data.loc[i+1,"pointScore"] = f"{pt_values[int(points[0])]} - {pt_values[int(points[1])]}"
#     shot_data.loc[i+1,"gameScore"] = f"{int(games[0])} - {int(games[1])}"
#     shot_data.loc[i+1,"setScore"] = f"{int(sets[0])} - {int(sets[1])}"


# # Additional comments for further updates:
# # - Tiebreak scenarios are not yet handled and need to be accounted for in future versions.