In [22]:
import sys
sys.path.append('../')
sys.path.append('../../')

from Datasets.BaseballDataset import BaseballDataset
from BaselineModel.BaselineModel import BaselineModel
from TransformerModel.TransformerModelRedisual import *

import torch
import torch.nn as nn
import torch.optim as optim
import math
import torch.nn.functional as F
from torch.utils.data import DataLoader
import json
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler

In [23]:
data_config_path = "../../data/configv3.json"
full_data_path = "../../data/full_cleaned_94.csv"
full_data = pd.read_csv(full_data_path)

In [24]:
sequence_length = 400


In [25]:
scaler_path = "../../data/full_scalers_94.pkl"
with open(scaler_path, "rb") as file:
    scalers = pickle.load(file)

In [26]:
m_path = "../fixed_large_400/h12_e12_h96_d0_lp0.3_lr1e-05_ep40/transformer_model.pth"
c_path = "../fixed_large_400/h12_e12_h96_d0_lp0.3_lr1e-05_ep40//model_config.json"

transformer_model = TransformerHelper(m_path,c_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [27]:
#transformer model trained on 2017 data, so baseline model will use that as well
from datetime import datetime, timedelta

start_date = pd.to_datetime("2017-01-01")
end_date = pd.to_datetime("2018-01-01")
full_data['game_date'] = pd.to_datetime(full_data['game_date'])
total_days = (end_date - start_date).days
split_date = pd.to_datetime(start_date) + timedelta(days=int(total_days * 0.6))

# Split data into train and validation sets
train = full_data[(full_data['game_date'] > start_date) & (full_data['game_date'] < split_date)].reset_index(drop=True)
valid = full_data[(full_data['game_date'] > "2024-01-01")].reset_index(drop=True)



In [28]:
train_dataset = BaseballDataset(train,data_config_path,sequence_length)
test_dataset = BaseballDataset(valid,data_config_path,sequence_length)

In [8]:
#train baseline model, uses logistic regression for categorical preds and linear regression for continuous preds
baseline_model = BaselineModel(train_dataset, scaler_path, max_iters=100, pred_mode=False, pred_mean=False)
baseline_model.train(batch_size=2000)

Creating matrices
Processing batch 0
Concatenating and appending data
Processing batch 10
Concatenating and appending data
Processing batch 20
Concatenating and appending data
Processing batch 30
Concatenating and appending data
Processing batch 40
Concatenating and appending data
Processing batch 50
Concatenating and appending data
Processing batch 60
Concatenating and appending data
Processing batch 70
Concatenating and appending data
Processing batch 80
Concatenating and appending data
Processing batch 90
Concatenating and appending data
Processing batch 100
Concatenating and appending data
Processing batch 110
Concatenating and appending data
Processing batch 120
Concatenating and appending data
Processing batch 130
Concatenating and appending data
Processing batch 140
Final concatenation
Finished processing all batches
Starting cont train
Starting cat train


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
# make preds with transformer model
trans_preds, true = transformer_model.make_preds(test_dataset, scaler_path, device, 2000, scale=True)


#make preds with baseline model
loader = DataLoader(test_dataset, batch_size=2000, shuffle=False, num_workers=0)
base_preds = []
for seq, _, _ in loader:

    base_preds.append(baseline_model.predict(seq, scale=True))

baseline_preds = pd.concat(base_preds, ignore_index=True)

In [19]:



#both models with share the same true values


# First 4 columns are continuous preds/labels
trans_cont_preds = trans_preds.iloc[:, 0:4]
base_cont_preds = baseline_preds.iloc[:,0:4]
cont_true = true.iloc[:, 0:4]

# Next 10 columns are probabilities for the events categorical feature
trans_events_preds = trans_preds.iloc[:, 4:14]
base_events_preds = baseline_preds.iloc[:, 4:14]
events_true = true.iloc[:, 4:14]


# Last 10 columns are probabilities for the hit_location categorical feature
trans_loc_preds = trans_preds.iloc[:, 14:]
base_loc_preds = baseline_preds.iloc[:,14:]
loc_true = true.iloc[:, 14:]


trans_cont_error = np.mean(np.abs(cont_true - trans_cont_preds), axis=1)
base_cont_error = np.mean(np.abs(cont_true - base_cont_preds), axis=1)



#calculate top k precision
top_k = 4

# Get top-k predictions for each sample, returning the numeric index instead of column names
trans_events_top_k_preds = trans_events_preds.apply(lambda x: x.nlargest(top_k).index.map(lambda name: trans_events_preds.columns.get_loc(name)), axis=1)
trans_loc_top_k_preds = trans_loc_preds.apply(lambda x: x.nlargest(top_k).index.map(lambda name: trans_loc_preds.columns.get_loc(name)), axis=1)

base_events_top_k_preds = base_events_preds.apply(lambda x: x.nlargest(top_k).index.map(lambda name: base_events_preds.columns.get_loc(name)), axis=1)
base_loc_top_k_preds = base_loc_preds.apply(lambda x: x.nlargest(top_k).index.map(lambda name: base_loc_preds.columns.get_loc(name)), axis=1)

# Now the predictions are stored as the numeric indices corresponding to the classes

# Compute precision for each class
trans_events_class_precisions = []
trans_loc_class_precisions = []

base_events_class_precisions = []
base_loc_class_precisions = []

# For each class in events and hit_location, calculate top-k precision
for class_idx in range(10):
    # For events precision
    true_class_mask = events_true.iloc[:, class_idx] == 1  # Find where this class is the true class
    true_class_indices = events_true.index[true_class_mask]
    
    # Check if this class is in the top-k predictions when it's the true class
    trans_event_precision = np.mean([1 if class_idx in trans_events_top_k_preds.iloc[i] else 0 for i in true_class_indices])
    trans_events_class_precisions.append(trans_event_precision)

    base_event_precision = np.mean([1 if class_idx in base_events_top_k_preds.iloc[i] else 0 for i in true_class_indices])
    base_events_class_precisions.append(base_event_precision)

    # For hit location precision
    true_class_mask = loc_true.iloc[:, class_idx] == 1  # Find where this class is the true class
    true_class_indices = loc_true.index[true_class_mask]

    # Check if this class is in the top-k predictions when it's the true class
    trans_loc_precision = np.mean([1 if class_idx in trans_loc_top_k_preds.iloc[i] else 0 for i in true_class_indices])
    trans_loc_class_precisions.append(trans_loc_precision)

    base_loc_precision = np.mean([1 if class_idx in base_loc_top_k_preds.iloc[i] else 0 for i in true_class_indices])
    base_loc_class_precisions.append(base_loc_precision)


# Retrieve the class names for 'events' and 'hit_location'
event_class_names = test_dataset.categorical_label_names[0]
loc_class_names = test_dataset.categorical_label_names[1]

# Identify the column indices for classes to ignore
ignored_event_classes = ['events_S', 'events_B']
ignored_loc_classes = ['hit_location_0.0']

# Get the indices of the ignored classes
ignored_event_indices = [event_class_names.index(cls) for cls in ignored_event_classes]
ignored_loc_indices = [loc_class_names.index(cls) for cls in ignored_loc_classes]

# Determine the true class indices for 'events' and 'hit_location'
true_event_classes = events_true.idxmax(axis=1).map(
    lambda name: events_true.columns.get_loc(name)
).values
true_loc_classes = loc_true.idxmax(axis=1).map(
    lambda name: loc_true.columns.get_loc(name)
).values

# Create masks to exclude ignored classes
# For 'events': Exclude samples where true class is in ignored_event_indices
events_mask = ~np.isin(true_event_classes, ignored_event_indices)

# For 'hit_location': Exclude samples where true class is in ignored_loc_indices
loc_mask = ~np.isin(true_loc_classes, ignored_loc_indices)

# Apply the masks to filter out ignored examples
# Filtered indices for 'events'
filtered_event_indices = np.where(events_mask)[0]

# Filtered indices for 'hit_location'
filtered_loc_indices = np.where(loc_mask)[0]

# Convert top-k predictions to lists for easier processing
trans_events_top_k_list = trans_events_top_k_preds.tolist()
base_events_top_k_list = base_events_top_k_preds.tolist()

trans_loc_top_k_list = trans_loc_top_k_preds.tolist()
base_loc_top_k_list = base_loc_top_k_preds.tolist()

# Calculate Top-K Accuracy for 'events' excluding ignored classes
trans_events_top_k_correct = [
    true_event_classes[i] in trans_events_top_k_list[i]
    for i in filtered_event_indices
]
base_events_top_k_correct = [
    true_event_classes[i] in base_events_top_k_list[i]
    for i in filtered_event_indices
]

# Calculate Top-K Accuracy for 'hit_location' excluding ignored classes
trans_loc_top_k_correct = [
    true_loc_classes[i] in trans_loc_top_k_list[i]
    for i in filtered_loc_indices
]
base_loc_top_k_correct = [
    true_loc_classes[i] in base_loc_top_k_list[i]
    for i in filtered_loc_indices
]

# Calculate the mean accuracy
trans_events_top_k_accuracy = np.mean(trans_events_top_k_correct) if len(trans_events_top_k_correct) > 0 else np.nan
base_events_top_k_accuracy = np.mean(base_events_top_k_correct) if len(base_events_top_k_correct) > 0 else np.nan

trans_loc_top_k_accuracy = np.mean(trans_loc_top_k_correct) if len(trans_loc_top_k_correct) > 0 else np.nan
base_loc_top_k_accuracy = np.mean(base_loc_top_k_correct) if len(base_loc_top_k_correct) > 0 else np.nan

# (Optional) Calculate Combined Top-K Accuracy Across All Categorical Features
# Ensure that both 'events' and 'hit_location' are not in ignored classes for combined accuracy



Starting Batch: 10
Starting Batch: 20
Starting Batch: 30
Starting Batch: 40
Starting Batch: 50
Starting Batch: 60
Starting Batch: 70
Starting Batch: 80
Starting Batch: 90
Starting Batch: 100


In [29]:
# 1. Make Predictions with Transformer Model
# ===========================================
trans_preds, true = transformer_model.make_preds(
    test_dataset, scaler_path, device, 2000, scale=True
)

# 2. Generate Baseline Predictions Based on Class Distributions
# ===============================================================

# Compute class distributions from the training dataset
train_preds, train_true = transformer_model.make_preds(
    train_dataset, scaler_path, device, 2000, scale=True
)

# Split true labels into categorical features
train_events_true = train_true.iloc[:, 4:14]
train_loc_true = train_true.iloc[:, 14:]

# Compute class counts
events_class_counts = train_events_true.sum(axis=0)
loc_class_counts = train_loc_true.sum(axis=0)

# Compute class distributions (probabilities)
events_class_distribution = events_class_counts / events_class_counts.sum()
loc_class_distribution = loc_class_counts / loc_class_counts.sum()

# Convert distributions to numpy arrays for sampling
events_class_probs = events_class_distribution.values
loc_class_probs = loc_class_distribution.values

# Retrieve class names
event_class_names = test_dataset.categorical_label_names[0]
loc_class_names = test_dataset.categorical_label_names[1]

def sample_top_k_classes(class_names, class_probs, top_k=4, num_samples=2000):
    """
    Samples top-k classes without replacement based on the provided class probabilities.

    Args:
        class_names (list): List of class names.
        class_probs (numpy.ndarray): Array of class probabilities.
        top_k (int): Number of classes to sample.
        num_samples (int): Number of samples to generate.

    Returns:
        list of lists: Each sublist contains the indices of the sampled classes for a sample.
    """
    sampled_classes = np.array([
        np.random.choice(len(class_names), size=top_k, replace=False, p=class_probs)
        for _ in range(num_samples)
    ])
    return sampled_classes.tolist()

# Determine the number of test samples
num_test_samples = len(test_dataset)

# Sample top-k classes for 'events' and 'hit_location'
baseline_events_top_k = sample_top_k_classes(
    event_class_names, events_class_probs, top_k=4, num_samples=num_test_samples
)
baseline_loc_top_k = sample_top_k_classes(
    loc_class_names, loc_class_probs, top_k=4, num_samples=num_test_samples
)

# Convert sampled classes to probabilities
# Initialize probability DataFrames with zeros
baseline_events_probs = pd.DataFrame(
    0, index=np.arange(num_test_samples), columns=event_class_names
)
baseline_loc_probs = pd.DataFrame(
    0, index=np.arange(num_test_samples), columns=loc_class_names
)

# Assign equal probability to sampled classes
for i, sampled in enumerate(baseline_events_top_k):
    baseline_events_probs.iloc[i, sampled] = 1.0 / 4  # Equal probability

for i, sampled in enumerate(baseline_loc_top_k):
    baseline_loc_probs.iloc[i, sampled] = 1.0 / 4  # Equal probability

# Handle continuous predictions for baseline
# Option 1: Set to mean of training continuous labels
# Compute mean from training data
trans_cont_mean = train_true.iloc[:, 0:4].mean().values  # Replace with actual mean if different
baseline_cont_preds = pd.DataFrame(
    np.tile(trans_cont_mean, (num_test_samples, 1)),
    columns=true.iloc[:, 0:4].columns
)

# Combine continuous and categorical predictions
baseline_preds = pd.concat([
    baseline_cont_preds.reset_index(drop=True),
    baseline_events_probs.reset_index(drop=True),
    baseline_loc_probs.reset_index(drop=True)
], axis=1)

# 3. Process Transformer and Baseline Predictions
# ================================================

# Split predictions and true values into continuous and categorical parts
# First 4 columns are continuous preds/labels
trans_cont_preds = trans_preds.iloc[:, 0:4]
base_cont_preds = baseline_preds.iloc[:, 0:4]
cont_true = true.iloc[:, 0:4]

# Next 10 columns are probabilities for the 'events' categorical feature
trans_events_preds = trans_preds.iloc[:, 4:14]
base_events_preds = baseline_preds.iloc[:, 4:14]
events_true = true.iloc[:, 4:14]

# Last 10 columns are probabilities for the 'hit_location' categorical feature
trans_loc_preds = trans_preds.iloc[:, 14:]
base_loc_preds = baseline_preds.iloc[:, 14:]
loc_true = true.iloc[:, 14:]

# Calculate mean absolute error for continuous predictions
trans_cont_error = np.mean(np.abs(cont_true - trans_cont_preds), axis=1)
base_cont_error = np.mean(np.abs(cont_true - base_cont_preds), axis=1)

# 4. Calculate Top-K Precision for Categorical Features
# =======================================================

top_k = 4

# Get top-k predictions for each sample, returning the numeric index instead of column names
trans_events_top_k_preds = trans_events_preds.apply(
    lambda x: x.nlargest(top_k).index.map(lambda name: trans_events_preds.columns.get_loc(name)),
    axis=1
)
trans_loc_top_k_preds = trans_loc_preds.apply(
    lambda x: x.nlargest(top_k).index.map(lambda name: trans_loc_preds.columns.get_loc(name)),
    axis=1
)

base_events_top_k_preds = base_events_preds.apply(
    lambda x: x.nlargest(top_k).index.map(lambda name: base_events_preds.columns.get_loc(name)),
    axis=1
)
base_loc_top_k_preds = base_loc_preds.apply(
    lambda x: x.nlargest(top_k).index.map(lambda name: base_loc_preds.columns.get_loc(name)),
    axis=1
)

# Compute precision for each class
trans_events_class_precisions = []
trans_loc_class_precisions = []

base_events_class_precisions = []
base_loc_class_precisions = []

# For each class in 'events' and 'hit_location', calculate top-k precision
for class_idx in range(10):
    # For 'events' precision
    true_class_mask = events_true.iloc[:, class_idx] == 1  # Find where this class is the true class
    true_class_indices = events_true.index[true_class_mask]

    # Check if this class is in the top-k predictions when it's the true class
    trans_event_precision = np.mean([
        1 if class_idx in trans_events_top_k_preds.iloc[i] else 0 
        for i in true_class_indices
    ])
    trans_events_class_precisions.append(trans_event_precision)

    base_event_precision = np.mean([
        1 if class_idx in base_events_top_k_preds.iloc[i] else 0 
        for i in true_class_indices
    ])
    base_events_class_precisions.append(base_event_precision)

    # For 'hit_location' precision
    true_class_mask = loc_true.iloc[:, class_idx] == 1  # Find where this class is the true class
    true_class_indices = loc_true.index[true_class_mask]

    # Check if this class is in the top-k predictions when it's the true class
    trans_loc_precision = np.mean([
        1 if class_idx in trans_loc_top_k_preds.iloc[i] else 0 
        for i in true_class_indices
    ])
    trans_loc_class_precisions.append(trans_loc_precision)

    base_loc_precision = np.mean([
        1 if class_idx in base_loc_top_k_preds.iloc[i] else 0 
        for i in true_class_indices
    ])
    base_loc_class_precisions.append(base_loc_precision)

# 5. Exclude Certain Majority Classes from Evaluation
# ======================================================

# Identify the column indices for classes to ignore
ignored_event_classes = ['events_S', 'events_B']
ignored_loc_classes = ['hit_location_0.0']

# Get the indices of the ignored classes
ignored_event_indices = [event_class_names.index(cls) for cls in ignored_event_classes]
ignored_loc_indices = [loc_class_names.index(cls) for cls in ignored_loc_classes]

# Determine the true class indices for 'events' and 'hit_location'
true_event_classes = events_true.idxmax(axis=1).map(
    lambda name: events_true.columns.get_loc(name)
).values
true_loc_classes = loc_true.idxmax(axis=1).map(
    lambda name: loc_true.columns.get_loc(name)
).values

# Create masks to exclude ignored classes
# For 'events': Exclude samples where true class is in ignored_event_indices
events_mask = ~np.isin(true_event_classes, ignored_event_indices)

# For 'hit_location': Exclude samples where true class is in ignored_loc_indices
loc_mask = ~np.isin(true_loc_classes, ignored_loc_indices)

# Apply the masks to filter out ignored examples
# Filtered indices for 'events'
filtered_event_indices = np.where(events_mask)[0]

# Filtered indices for 'hit_location'
filtered_loc_indices = np.where(loc_mask)[0]

# Convert top-k predictions to lists for easier processing
trans_events_top_k_list = trans_events_top_k_preds.tolist()
base_events_top_k_list = base_events_top_k_preds.tolist()

trans_loc_top_k_list = trans_loc_top_k_preds.tolist()
base_loc_top_k_list = base_loc_top_k_preds.tolist()

# Calculate Top-K Accuracy for 'events' excluding ignored classes
trans_events_top_k_correct = [
    true_event_classes[i] in trans_events_top_k_list[i]
    for i in filtered_event_indices
]
base_events_top_k_correct = [
    true_event_classes[i] in base_events_top_k_list[i]
    for i in filtered_event_indices
]

# Calculate Top-K Accuracy for 'hit_location' excluding ignored classes
trans_loc_top_k_correct = [
    true_loc_classes[i] in trans_loc_top_k_list[i]
    for i in filtered_loc_indices
]
base_loc_top_k_correct = [
    true_loc_classes[i] in base_loc_top_k_list[i]
    for i in filtered_loc_indices
]

# Calculate the mean accuracy
trans_events_top_k_accuracy = np.mean(trans_events_top_k_correct) if len(trans_events_top_k_correct) > 0 else np.nan
base_events_top_k_accuracy = np.mean(base_events_top_k_correct) if len(base_events_top_k_correct) > 0 else np.nan

trans_loc_top_k_accuracy = np.mean(trans_loc_top_k_correct) if len(trans_loc_top_k_correct) > 0 else np.nan
base_loc_top_k_accuracy = np.mean(base_loc_top_k_correct) if len(base_loc_top_k_correct) > 0 else np.nan

Starting Batch: 10
Starting Batch: 20
Starting Batch: 30
Starting Batch: 40
Starting Batch: 50
Starting Batch: 60
Starting Batch: 70
Starting Batch: 80
Starting Batch: 90
Starting Batch: 100
Starting Batch: 10
Starting Batch: 20
Starting Batch: 30
Starting Batch: 40
Starting Batch: 50
Starting Batch: 60
Starting Batch: 70
Starting Batch: 80
Starting Batch: 90
Starting Batch: 100
Starting Batch: 110
Starting Batch: 120
Starting Batch: 130
Starting Batch: 140


  baseline_events_probs.iloc[i, sampled] = 1.0 / 4  # Equal probability
  baseline_loc_probs.iloc[i, sampled] = 1.0 / 4  # Equal probability


In [30]:
print(f"=== Top-{top_k} Accuracy (Excluding Majority Classes) ===\n")

print(f"--- 'Events' Category ---")
print(f"Transformer Model - Top-{top_k} Accuracy: {trans_events_top_k_accuracy:.4f}")
print(f"Baseline Model    - Top-{top_k} Accuracy: {base_events_top_k_accuracy:.4f}\n")

print(f"--- 'Hit Location' Category ---")
print(f"Transformer Model - Top-{top_k} Accuracy: {trans_loc_top_k_accuracy:.4f}")
print(f"Baseline Model    - Top-{top_k} Accuracy: {base_loc_top_k_accuracy:.4f}\n")

=== Top-4 Accuracy (Excluding Majority Classes) ===

--- 'Events' Category ---
Transformer Model - Top-4 Accuracy: 0.9433
Baseline Model    - Top-4 Accuracy: 0.5378

--- 'Hit Location' Category ---
Transformer Model - Top-4 Accuracy: 0.6383
Baseline Model    - Top-4 Accuracy: 0.4083



In [31]:
test_dataset.continuous_label_names

['launch_speed', 'hc_x', 'hc_y', 'launch_angle']

In [32]:
event_class_names = test_dataset.categorical_label_names[0]
loc_class_names = test_dataset.categorical_label_names[1]


# Create DataFrames for the results
trans_event_summary_df = pd.DataFrame({
    'Class': event_class_names,
    f'Event Precision (Top K {top_k})': trans_events_class_precisions

})

# Create DataFrames for the results
base_event_summary_df = pd.DataFrame({
    'Class': event_class_names,
    f'Event Precision (Top K {top_k})': base_events_class_precisions

})



display(trans_event_summary_df)
display(base_event_summary_df)

Unnamed: 0,Class,Event Precision (Top K 4)
0,events_B,0.997692
1,events_S,1.0
2,events_double,0.432077
3,events_field_out,0.999794
4,events_hit_by_pitch,0.956971
5,events_home_run,0.325567
6,events_single,0.969103
7,events_strikeout,0.993971
8,events_triple,0.0
9,events_walk,0.996309


Unnamed: 0,Class,Event Precision (Top K 4)
0,events_B,0.965045
1,events_S,0.975201
2,events_double,0.130149
3,events_field_out,0.759526
4,events_hit_by_pitch,0.015491
5,events_home_run,0.093829
6,events_single,0.351028
7,events_strikeout,0.48333
8,events_triple,0.009091
9,events_walk,0.205955


In [33]:

trans_loc_summary_df = pd.DataFrame({
    'Class': loc_class_names,
    f'Hit Loc Precision (Top K {top_k})': trans_loc_class_precisions

})

# Create DataFrames for the results
base_loc_summary_df = pd.DataFrame({
    'Class': loc_class_names,
    f'Hit Loc Precision (Top K {top_k})': base_loc_class_precisions

})

display(trans_loc_summary_df)
display(base_loc_summary_df)

Unnamed: 0,Class,Hit Loc Precision (Top K 4)
0,hit_location_0.0,1.0
1,hit_location_1.0,0.136722
2,hit_location_2.0,0.156171
3,hit_location_3.0,0.324655
4,hit_location_4.0,0.533186
5,hit_location_5.0,0.520624
6,hit_location_6.0,0.599096
7,hit_location_7.0,0.785664
8,hit_location_8.0,0.836177
9,hit_location_9.0,0.728159


Unnamed: 0,Class,Hit Loc Precision (Top K 4)
0,hit_location_0.0,0.999581
1,hit_location_1.0,0.129711
2,hit_location_2.0,0.035264
3,hit_location_3.0,0.242209
4,hit_location_4.0,0.378888
5,hit_location_5.0,0.318086
6,hit_location_6.0,0.407006
7,hit_location_7.0,0.465129
8,hit_location_8.0,0.510585
9,hit_location_9.0,0.475287


In [14]:
np.abs(cont_true - trans_cont_preds).mean()

launch_speed    4.415579
hc_x            7.245790
hc_y            7.752304
launch_angle    9.569696
dtype: float64