In [1]:
#import common libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import scipy
import os
import csv


In [2]:
#read the csv data, you can change the file name according to yours
raw_data = pd.read_csv(r'C:\Users\user\Desktop\dataset\train.csv', delimiter=',')

In [3]:
#encode categorical features into numbers, for lose_reason, if no result yet, set to 0
cleanup_nums1 = {"type": {"short service": 0, "long service": 1, "clear": 2, "push/rush": 3, "smash": 4, "defensive shot": 5,
                          "drive": 6, "net shot": 7, "lob": 8, "drop": 9}}
cleanup_nums2 = {"lose_reason": {"out": 1, "touched the net": 2, "not pass over the net": 3, "opponent's ball landed": 4, "misjudged": 5}}
data = raw_data.replace(cleanup_nums1)
data = data.replace(cleanup_nums2)
data['lose_reason'].fillna(0, inplace=True)

In [4]:
#these are the features that may be important for prediction, to see what method was used for feature selection, check the description on Inter-Stroke Features Selection
selected = ['match_id', 'set', 'rally_id', 'ball_round', 'rally_length', 
            'landing_height', 'landing_area', 
            'player_location_y','landing_x', 'landing_y','type']

In [6]:
#some of the column may need further scaling
selected_data = data[selected]
selected_data.iloc[:,5:-1]

Unnamed: 0,landing_height,landing_area,player_location_y,landing_x,landing_y
0,2.0,7,308.95,0.773171,0.519687
1,2.0,7,594.37,0.086585,-0.237240
2,1.0,3,378.28,1.402439,1.638490
3,1.0,4,809.08,1.358537,-1.434167
4,2.0,1,268.22,1.487805,0.359844
...,...,...,...,...,...
30167,2.0,7,659.34,0.051220,-0.436198
30168,2.0,7,349.63,0.145122,0.290938
30169,2.0,7,583.50,-0.097561,-0.277240
30170,1.0,9,401.02,-0.080488,1.515417


In [7]:
train = selected_data
from sklearn.preprocessing import MinMaxScaler

# Select the columns that need scaling
columns_to_scale = selected_data.columns[5:-1]

# Create an instance of MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1))

# Fit and transform the selected columns using the scaler
train[columns_to_scale] = scaler.fit_transform(train[columns_to_scale])
train.iloc[:,5:-1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train[columns_to_scale] = scaler.fit_transform(train[columns_to_scale])


Unnamed: 0,landing_height,landing_area,player_location_y,landing_x,landing_y
0,1.0,0.333333,-0.476638,0.214696,0.267701
1,1.0,0.333333,0.346836,0.018631,-0.118990
2,-1.0,-0.555556,-0.276612,0.394393,0.839262
3,-1.0,-0.333333,0.966302,0.381856,-0.730463
4,1.0,-1.000000,-0.594149,0.418771,0.186042
...,...,...,...,...,...
30167,1.0,0.333333,0.534283,0.008532,-0.220632
30168,1.0,0.333333,-0.359271,0.035347,0.150839
30169,1.0,0.333333,0.315474,-0.033954,-0.139425
30170,-1.0,0.777778,-0.211004,-0.029079,0.776388


In [9]:
#group the data by the same rally for we need to make sure the sequence our model observe is on the same rally
grouped_data = train.groupby(['set', 'match_id', 'rally_id'])

In [21]:
#generate past information(past 4 strokes) and future information(we need to predict future stroke)
sequence_length = 1
future_length = 1
sequences = []
futures = []
for _, group in grouped_data:
    rally_data = group[[ 'landing_height', 'landing_area', 
            'player_location_y','landing_y', 'type']]  # Select the relevant features for the input
    series_shift= rally_data.shift()
    for col in series_shift.columns:
        series_shift.rename(columns={col: 'prev_' + col}, inplace=True)
    merged = pd.merge(rally_data, series_shift, left_index=True, right_index=True)
    merged = merged.dropna(axis=0)
    merged = merged[['prev_landing_height', 'prev_landing_area', 
            'prev_player_location_y','prev_landing_y', 'prev_type','landing_height', 'landing_area', 
            'player_location_y','landing_y', 'type']]
    num_samples = len(merged)
    for i in range(num_samples):
        sequence = merged.iloc[i]
        sequences.append(sequence) 
        

In [35]:
train = np.array(sequences)


In [36]:
train.shape

(27899, 10)

In [40]:
X = train[:, :-1]
y = train[:, -1]


In [39]:
y.shape

(27899,)

In [41]:
#split the datasets 70% training, 20% testing, 10% validating

from sklearn.model_selection import train_test_split

# Shuffle the data
indices = np.arange(X.shape[0])
np.random.shuffle(indices)
X_shuffled = X[indices]
y_shuffled = y[indices]

# Split the shuffled data into train, test, and validation sets
X_train, X_test, y_train, y_test = train_test_split(X_shuffled, y_shuffled, test_size=0.3, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.33, random_state=42)

# Verify the shapes of the data splits
print("Train data shape:", X_train.shape)
print("Test data shape:", X_test.shape)
print("Validation data shape:", X_val.shape)
print("Train target shape:", y_train.shape)
print("Test target shape:", y_test.shape)
print("Validation target shape:", y_val.shape)

Train data shape: (19529, 9)
Test data shape: (5607, 9)
Validation data shape: (2763, 9)
Train target shape: (19529,)
Test target shape: (5607,)
Validation target shape: (2763,)


In [95]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

model = Sequential()
model.add(Dense(128, activation='elu', input_shape=(9,)))
model.add(Dropout(0.1))
model.add(Dense(64, activation='elu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='elu'))
model.add(Dropout(0.1))
model.add(Dense(10, activation='softmax'))

model.summary()


Model: "sequential_24"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_80 (Dense)            (None, 128)               1280      
                                                                 
 dropout_23 (Dropout)        (None, 128)               0         
                                                                 
 dense_81 (Dense)            (None, 64)                8256      
                                                                 
 dropout_24 (Dropout)        (None, 64)                0         
                                                                 
 dense_82 (Dense)            (None, 32)                2080      
                                                                 
 dropout_25 (Dropout)        (None, 32)                0         
                                                                 
 dense_83 (Dense)            (None, 10)              

In [96]:
optimizer = tf.keras.optimizers.Adam(learning_rate = 1e-3)
model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])


# Training the model
history = model.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_val, y_val))

# Evaluating the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [97]:
model.save('model_type_1.h5')