# Import

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor


## Load and Process Badminton Data

This code snippet performs the following tasks:

1. Loads data from an Excel file named 'badmintondata.xlsx' using the `read_excel` function.
2. Identifies consecutive rows of all zeros in the loaded data using a boolean mask.
3. Creates groups based on the mask values, where each group represents a sequence of non-zero rows followed by zero rows.
4. Filters out consecutive rows of all zeros from the original dataframe using the inverted mask.

The resulting grouped dataframe can be used for further analysis or processing.


In [16]:
# Load the data from Excel
df = pd.read_csv('badmintondata.csv')
columns = ['HUMAN PLAYER POSITION (X) metres',	'HUMAN PLAYER POSITION (Y) metres',	'INITITAL VELOCITY OF SHUTTELCOCK(m/s)',	'INITIAL SHUTTELCOCK FIRING ANGLE (DEGREE)',	'SHUTTELCOCK SLANT ANGLE TO SIDELINE(DEGREE)',	'SHUTTLECOCK POSITIION IN AIR(X) metres',	'SHUTTLECOCK POSITIION IN AIR(Y) metres',	'SHUTTLECOCK POSITIION IN AIR(Z) metres']
# Identify consecutive rows of all zeros
mask = (df == 0).all(axis=1)  # Create a boolean mask indicating rows where all values are zeros
groups = mask.cumsum()  # Create groups by cumulatively summing the mask values (1 for non-zero rows, increasing by 1 for zero rows)

# Filter out consecutive rows of all zeros
filtered_df = df[~mask]  # Filter the original dataframe to exclude consecutive rows of all zeros

filtered_df

Unnamed: 0,HUMAN PLAYER POSITION (X) metres,HUMAN PLAYER POSITION (Y) metres,INITITAL VELOCITY OF SHUTTELCOCK(m/s),INITIAL SHUTTELCOCK FIRING ANGLE (DEGREE),SHUTTELCOCK SLANT ANGLE TO SIDELINE(DEGREE),SHUTTLECOCK POSITIION IN AIR(X) metres,SHUTTLECOCK POSITIION IN AIR(Y) metres,SHUTTLECOCK POSITIION IN AIR(Z) metres
0,4,1,10,40,0,4.075543,1.000000,1.662912
1,4,1,10,40,0,4.152007,1.000000,1.724866
2,4,1,10,40,0,4.228906,1.000000,1.788139
3,4,1,10,40,0,4.302100,1.000000,1.845245
4,4,1,10,40,0,4.376877,1.000000,1.904128
...,...,...,...,...,...,...,...,...
72634,4,4,70,70,15,9.217823,5.398111,2.591053
72635,4,4,70,70,15,9.275177,5.413480,2.041410
72636,4,4,70,70,15,9.326363,5.427195,1.463594
72637,4,4,70,70,15,9.370879,5.439123,0.862842


## Add Time Field to Grouped Data

This code snippet extends the previous code and performs the following tasks:

1. Groups the filtered dataframe based on the groups created earlier.
2. Defines a function named `add_time_field` that adds a new column named 'Time' to each group.
3. The 'Time' column is calculated as the index position within each group multiplied by 10, representing time in milliseconds.
4. Applies the `add_time_field` function to each group in the grouped dataframe using the `apply` method.

The resulting `grouped_df` will now include the 'Time' column added to each group, representing the time in milliseconds for each row within the group.

This step is useful for adding a time-based dimension to the grouped data for further analysis or visualization.


In [17]:
# Group the data before each group of zeros
grouped_df = filtered_df.groupby(groups, group_keys=False)

# Function to add a Time field to a group
def add_time_field(group):
    group['Time'] = np.arange(len(group)) * 10  # Multiply by 10 to get time in ms
    return group

grouped_df = grouped_df.apply(add_time_field)
grouped_df

Unnamed: 0,HUMAN PLAYER POSITION (X) metres,HUMAN PLAYER POSITION (Y) metres,INITITAL VELOCITY OF SHUTTELCOCK(m/s),INITIAL SHUTTELCOCK FIRING ANGLE (DEGREE),SHUTTELCOCK SLANT ANGLE TO SIDELINE(DEGREE),SHUTTLECOCK POSITIION IN AIR(X) metres,SHUTTLECOCK POSITIION IN AIR(Y) metres,SHUTTLECOCK POSITIION IN AIR(Z) metres,Time
0,4,1,10,40,0,4.075543,1.000000,1.662912,0
1,4,1,10,40,0,4.152007,1.000000,1.724866,10
2,4,1,10,40,0,4.228906,1.000000,1.788139,20
3,4,1,10,40,0,4.302100,1.000000,1.845245,30
4,4,1,10,40,0,4.376877,1.000000,1.904128,40
...,...,...,...,...,...,...,...,...,...
72634,4,4,70,70,15,9.217823,5.398111,2.591053,340
72635,4,4,70,70,15,9.275177,5.413480,2.041410,350
72636,4,4,70,70,15,9.326363,5.427195,1.463594,360
72637,4,4,70,70,15,9.370879,5.439123,0.862842,370


The code snippet provided creates a figure with three subplots, each representing the position of a shuttlecock in a different axis (X, Y, Z) over time. The code accomplishes the following:

1. It sets up the figure and axes using the `subplots` function, creating a layout of 3 rows and 1 column.
2. For each subplot, it plots the corresponding shuttlecock position data against time using scatter plots.
3. It sets the labels for the x-axis and y-axis of each subplot.
4. It sets titles for each subplot indicating the shuttlecock position axis.
5. It adjusts the layout to prevent overlapping of plot elements using the `tight_layout` function.
6. Finally, it displays the plot using the `show` function.

The scatter plots visualize the position of the shuttlecock in each axis (X, Y, Z) over time, with time represented on the x-axis in milliseconds and position represented on the y-axis in meters. Each scatter plot uses markers in the shape of "x" and has a small size (s=2) and moderate transparency (alpha=0.8) to avoid overcrowding the plot.

This code can be used to analyze and visualize the trajectory or movement patterns of a shuttlecock in three-dimensional space over a given period of time.


In [45]:

from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import linear_model

def addRidgeAndLasso(nameOfColumn , predictingColumn ):
    columns = ['Model', 'Train error', 'Test error', 'Sum of Absolute Weights']

    # Clone so that data change will not affect others
    whole_data_df = filtered_df.copy().drop(predictingColumn, axis=1)
    predicting_df = filtered_df[predictingColumn]

    X_train, X_test, Y_train, Y_test = train_test_split(whole_data_df, predicting_df, test_size=0.3, random_state=12)

    # Begin Ridge
    ridge = linear_model.Ridge(alpha=0.5)
    ridge.fit(X_train, Y_train)
    Y_pred_train_ridge = ridge.predict(X_train)
    Y_pred_test_ridge = ridge.predict(X_test)
    # Format Ridge model
    ridge_model = "%.2f X + %.2f X2 + %.2f X3 + %.2f X4 + %.2f X5 + %.2f X6 + %.2f X7 + %.2f" % (ridge.coef_[0], 
                ridge.coef_[1], ridge.coef_[2], 
                ridge.coef_[3], ridge.coef_[4], ridge.coef_[5], ridge.coef_[6], ridge.intercept_)
    
    ridge_result = [ ridge_model, np.sqrt(mean_squared_error(Y_train, Y_pred_train_ridge)), 
                np.sqrt(mean_squared_error(Y_test, Y_pred_test_ridge)),
                np.absolute(ridge.coef_).sum() + np.absolute(ridge.intercept_)]
    
    ridge_results = pd.DataFrame([ridge_result], columns=columns, index=['Ridge'+nameOfColumn])

    pd.concat([ridge_results])

    # Begin Lasso

    lasso = linear_model.Lasso(alpha=0.01)
    lasso.fit(X_train, Y_train)
    Y_pred_train_lasso = lasso.predict(X_train)
    Y_pred_test_lasso = lasso.predict(X_test)

    # Format Lasso model
    lasso_model = "%.2f X + %.2f X2 + %.2f X3 + %.2f X4 + %.2f X5 + %.2f X6 + %.2f X7 + %.2f" % (lasso.coef_[0], 
                lasso.coef_[1], lasso.coef_[2], 
                lasso.coef_[3], lasso.coef_[4], ridge.coef_[5], ridge.coef_[6], lasso.intercept_)
    
    lasso_result = [ lasso_model, np.sqrt(mean_squared_error(Y_train, Y_pred_train_lasso)), 
                np.sqrt(mean_squared_error(Y_test, Y_pred_test_lasso)),
                np.absolute(lasso.coef_).sum() + np.absolute(lasso.intercept_)]
    
    lasso_results = pd.DataFrame([lasso_result], columns=columns, index=['Lasso'+nameOfColumn])

    # Combine and print result
    return pd.concat([ ridge_results, lasso_results])
        
resultX = addRidgeAndLasso( " regularization for X",'SHUTTLECOCK POSITIION IN AIR(X) metres')
resultY = addRidgeAndLasso( " regularization for Y",'SHUTTLECOCK POSITIION IN AIR(Y) metres')
resultZ = addRidgeAndLasso( " regularization for Z",'SHUTTLECOCK POSITIION IN AIR(Z) metres')
print(resultX)
print(resultY)
print(resultZ)

                                                                                                   Model  \
Ridge regularization for X  0.00 X + -2.49 X2 + 0.00 X3 + -0.01 X4 + -0.15 X5 + 2.50 X6 + 0.02 X7 + 7.70   
Lasso regularization for X  0.00 X + -2.39 X2 + 0.00 X3 + -0.01 X4 + -0.14 X5 + 2.50 X6 + 0.02 X7 + 7.69   

                            Train error  Test error  Sum of Absolute Weights  
Ridge regularization for X     0.893315    0.899689                12.864220  
Lasso regularization for X     0.894424    0.901223                12.658021  
                                                                                                  Model  \
Ridge regularization for Y  0.00 X + 1.00 X2 + -0.00 X3 + 0.00 X4 + 0.06 X5 + 0.26 X6 + 0.00 X7 + -1.91   
Lasso regularization for Y  0.00 X + 0.99 X2 + -0.00 X3 + 0.00 X4 + 0.06 X5 + 0.26 X6 + 0.00 X7 + -1.84   

                            Train error  Test error  Sum of Absolute Weights  
Ridge regularization for Y     0.2867