In [2]:
import pandas as pd
import numpy as np

# Load the data
events = pd.read_csv("../Shot_Visuals_EliseWagle_MotkaKomac.csv")

# Rename columns
events = events.rename(columns={
    'pointNumber': 'rallyid',
    'shotContactX': 'hitter_x',
    'shotContactY': 'hitter_y',
    'shotInRally': 'strokeid',
    'shotHitBy': 'hitter'
})

# Add and mutate columns
events['server'] = events['hitter']
events['receiver_x'] = 0
events['receiver_y'] = 0
events['receiver'] = events['returnerName']

# Set default serve coordinates
events['hitter_x'] = np.where(
    (events['strokeid'] == 1) & (events['serverFarNear'] == 'Near') & (events['side'] == 'Deuce'), 50,
    np.where(
        (events['strokeid'] == 1) & (events['serverFarNear'] == 'Near') & (events['side'] == 'Ad'), -50,
        np.where(
            (events['strokeid'] == 1) & (events['serverFarNear'] == 'Far') & (events['side'] == 'Deuce'), -50,
            np.where(
                (events['strokeid'] == 1) & (events['serverFarNear'] == 'Far') & (events['side'] == 'Ad'), 50,
                events['hitter_y']
            )
        )
    )
)

events['hitter_y'] = np.where(
    events['strokeid'] == 1,
    np.where(events['serverFarNear'] == 'Near', -475, 475),
    events['hitter_y']
)

events['score'] = events['gameScore'] + ', ' + events['pointScore']

# Filter points
points = events[events['isPointEnd'] == 1.0]

# Filter and mutate serves
serves = events.groupby('rallyid').apply(lambda df: pd.Series({
    'rallyid': df['rallyid'].iloc[0],
    'server': df['server'].iloc[0],
    'x': df['firstServeXCoord'].iloc[0] if df['firstServeIn'].iloc[0] == 1.0 else df['secondServeXCoord'].iloc[0],
    'y': df['firstServeYCoord'].iloc[0] if df['firstServeIn'].iloc[0] == 1.0 else df['secondServeYCoord'].iloc[0],
    'serveIn': (df['firstServeIn'].iloc[0] == 1.0) or (df['secondServeIn'].iloc[0] == 1.0),
    'side': df['side'].iloc[0],
    'serveInPlacement': df['serveInPlacement'].iloc[0],
    'pointWonByUCLA': (df['pointWonBy'].iloc[-1] == 'Elise Wagle'),
    'isAce': df['isAce'].iloc[0]
})).reset_index(drop=True)

serves = serves[serves['serveIn']]
events

  serves = events.groupby('rallyid').apply(lambda df: pd.Series({


Unnamed: 0,pointScore,gameScore,setScore,isPointStart,pointStartTime,isPointEnd,pointEndTime,rallyid,isBreakPoint,strokeid,...,isDoubleFault,pointWonBy,lastShotError,serveResult,serveInPlacement,server,receiver_x,receiver_y,receiver,score
0,0-0,0-0,0-0,1.0,6749,,,1,,1,...,,,0,1st Serve In,Body,Motka Komac,0,0,Elise Wagle,"0-0, 0-0"
1,0-0,0-0,0-0,,10039,,,1,,2,...,,,0,,,Elise Wagle,0,0,Elise Wagle,"0-0, 0-0"
2,0-0,0-0,0-0,,11629,,,1,,3,...,,,0,,,Motka Komac,0,0,Elise Wagle,"0-0, 0-0"
3,0-0,0-0,0-0,,13157,,,1,,4,...,,,0,,,Elise Wagle,0,0,Elise Wagle,"0-0, 0-0"
4,0-0,0-0,0-0,,14863,,,1,,5,...,,,0,,,Motka Komac,0,0,Elise Wagle,"0-0, 0-0"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
680,30-30,5-3,1-0,1.0,6113969,,,129,,1,...,,,0,1st Serve In,T,Elise Wagle,0,0,Motka Komac,"5-3, 30-30"
681,30-30,5-3,1-0,,6145675,1.0,,129,,2,...,,Elise Wagle,1,,,Motka Komac,0,0,Motka Komac,"5-3, 30-30"
682,40-30,5-3,1-0,1.0,6148559,,,130,,1,...,,,0,2nd Serve In,Wide,Elise Wagle,0,0,Motka Komac,"5-3, 40-30"
683,40-30,5-3,1-0,,6192885,,,130,,2,...,,,0,,,Motka Komac,0,0,Motka Komac,"5-3, 40-30"


In [4]:
# Cleaning for coordinate data

# Filter serves where server is "Elise Wagle" and serve was in
serves_ucla = serves[(serves['server'] == 'Elise Wagle') & (serves['serveIn'])].copy()

# Modify the coordinates based on the y-value
serves_ucla['x'] = np.where(serves_ucla['y'] > 0, -serves_ucla['x'], serves_ucla['x'])
serves_ucla['y'] = np.where(serves_ucla['y'] > 0, -serves_ucla['y'], serves_ucla['y'])

# Define the serve outcome based on conditions
serves_ucla['serveOutcome'] = np.where(
    serves_ucla['isAce'] == 1.0, 'Ace',
    np.where(serves_ucla['pointWonByUCLA'], 'Won', 'Lost')
)

In [5]:
# Cleaning for Win % and Freq.
distribution = serves_ucla.groupby(['side', 'serveInPlacement']).agg(
    count=('rallyid', 'size'),
    serves_won=('pointWonByUCLA', 'sum')
).reset_index()

# Calculate the win percentage (proportion)
distribution['proportion'] = distribution['serves_won'] / distribution['count']

# Find the minimum and maximum proportions
min_proportion = distribution['proportion'].min()
max_proportion = distribution['proportion'].max()

# Create labels DataFrame and determine if each value is max, min, or neither
labels = distribution.copy()
labels['proportion_label'] = (labels['proportion'] * 100).round(1).astype(str) + "%"
labels['count_label'] = labels['count']

# Add the x positions based on side and serveInPlacement
labels['x'] = np.where(
    (labels['side'] == 'Ad') & (labels['serveInPlacement'] == 'Wide'), -131.25,
    np.where(
        (labels['side'] == 'Ad') & (labels['serveInPlacement'] == 'Body'), -78.75,
        np.where(
            (labels['side'] == 'Ad') & (labels['serveInPlacement'] == 'T'), -26.25,
            np.where(
                (labels['side'] == 'Deuce') & (labels['serveInPlacement'] == 'T'), 26.25,
                np.where(
                    (labels['side'] == 'Deuce') & (labels['serveInPlacement'] == 'Body'), 78.75,
                    np.where(
                        (labels['side'] == 'Deuce') & (labels['serveInPlacement'] == 'Wide'), 131.25,
                        np.nan
                    )
                )
            )
        )
    )
)

# Determine text color and max/min status
labels['text_color'] = np.where(
    labels['proportion'] == min_proportion, "darkred",
    np.where(labels['proportion'] == max_proportion, "darkgreen", "black")
)

labels['max_min'] = np.where(
    labels['proportion'] == max_proportion, "max",
    np.where(labels['proportion'] == min_proportion, "min", "no")
)

# Resulting DataFrame 'labels' now has the required columns
print(labels)

    side serveInPlacement  count  serves_won  proportion proportion_label  \
0     Ad             Body     13           7    0.538462            53.8%   
1     Ad                T      5           3    0.600000            60.0%   
2     Ad             Wide     13          11    0.846154            84.6%   
3  Deuce             Body     16          11    0.687500            68.8%   
4  Deuce                T      7           5    0.714286            71.4%   
5  Deuce             Wide      6           4    0.666667            66.7%   

   count_label       x text_color max_min  
0           13  -78.75    darkred     min  
1            5  -26.25      black      no  
2           13 -131.25  darkgreen     max  
3           16   78.75      black      no  
4            7   26.25      black      no  
5            6  131.25      black      no  


In [6]:
# Export data frames as JSON
serves_ucla_json = serves_ucla.to_json(orient='records')
labels_json = labels.to_json(orient='records')


with open('serves_ucla.json', 'w') as f:
    f.write(serves_ucla_json)

with open('labels.json', 'w') as f:
    f.write(labels_json)