In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

# Model Imports
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error


In [33]:
# Load the dataset and parse game dates
df = pd.read_csv("savant_data_2021_2023.csv")
df["game_date"] = pd.to_datetime(df["game_date"])
df["season"] = df["game_date"].dt.year  # Extract season year


In [35]:
display(df)

Unnamed: 0,pitch_type,game_date,release_speed,release_pos_x,release_pos_z,batter,pitcher,events,description,zone,...,spin_axis,delta_home_win_exp,delta_run_exp,role_key,sp_indicator,rp_indicator,pitch_number_appearance,pitcher_at_bat_number,times_faced,season
0,FF,2021-04-05,93.2,0.84,5.74,c7c83eaa9fe8da2f81c5fce172059af61448b3e7,4f902241478a103f7a818f5be9a7b7ddf43cabb9,strikeout,swinging_strike,13.0,...,166.0,-0.014,-0.134,SP,1,0,97,22,3,2021
1,SL,2021-04-05,82.9,1.03,5.69,c7c83eaa9fe8da2f81c5fce172059af61448b3e7,4f902241478a103f7a818f5be9a7b7ddf43cabb9,,foul,5.0,...,309.0,0.000,0.000,SP,1,0,96,22,3,2021
2,FF,2021-04-05,94.6,-1.85,5.77,514eeb6e6c17085fc1adabf03f1adca32318411f,afb9b85defc6fe5c3f48681480eff4d96ab723c7,caught_stealing_3b,ball,14.0,...,220.0,0.014,-0.202,SP,1,0,95,19,3,2021
3,FF,2021-04-05,97.1,1.86,6.57,875eeca87c6f80182a88c2a7b92c048b9e10b5e5,0f061b9439845159c394a71e55d635b10ca25656,field_out,hit_into_play,13.0,...,147.0,-0.007,-0.264,SP,1,0,95,22,3,2021
4,FF,2021-04-05,93.6,0.60,5.85,c7c83eaa9fe8da2f81c5fce172059af61448b3e7,4f902241478a103f7a818f5be9a7b7ddf43cabb9,,ball,11.0,...,164.0,0.000,0.043,SP,1,0,95,22,3,2021
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2136332,SI,2023-05-26,90.2,-1.70,3.83,ceeeda3bde736c0c656b03be05b4583f1daaf664,da2331420ceeaeed96ab885543ca5a4c6cb97958,,called_strike,5.0,...,245.0,0.000,-0.038,RP,0,1,1,1,1,2023
2136333,SL,2023-05-26,88.3,3.13,5.19,7788ff0375c64348d36ae12869506ad498eefef4,1039ef0b36a4d7d4c2e9b7b6c2084b3cf99c11dc,,called_strike,8.0,...,238.0,0.000,-0.038,RP,0,1,1,1,1,2023
2136334,FF,2023-05-26,89.7,1.10,5.73,0f46bb1ed363d593af42cfddbe027922aff10780,c6020d12585d4a0d7b1babd840c54f37a9159aea,,ball,12.0,...,147.0,0.000,0.036,RP,0,1,1,1,1,2023
2136335,FF,2023-05-26,92.6,1.21,6.25,75174627fd934205ab7d1905d926b580814f9759,9a1d0691f34a0c9c1899f3c29af36638cc7a8af6,,called_strike,13.0,...,184.0,0.000,-0.063,RP,0,1,1,1,1,2023


In [37]:
# Aggregate unique batters faced and games played for pitchers
unique_batters_per_season = (
    df.drop_duplicates(subset=["season", "batter", "game_pk", "pitcher"])
    .groupby(['pitcher', 'season'])
    .agg(
        Batters_Faced_by_pitcher=("batter", "size"),
        games_played_pitcher=("game_pk", "nunique")
    )
    .reset_index()
)

# Aggregate unique at-bats and games played for batters
batting_time_count = (
    df.drop_duplicates(subset=["season", "batter", "game_pk", "at_bat_number"])
    .groupby(["batter", "season"])
    .agg(
        Plate_Appearence_by_batter=("at_bat_number", "size"),
        games_played_batter=("game_pk", "nunique")
    )
    .reset_index()
)

# Count additional metrics for pitchers
pitcher_pitches_thrown = (
    df.groupby(['pitcher', 'season'])
    .agg(pitches_thrown=("batter", "count"))
    .reset_index()
)

# Count additional metrics for batters
batter_balls_faced = (
    df.groupby(['batter', 'season'])
    .agg(balls_faced=("pitcher", "count"))
    .reset_index()
)

# Merge pitches thrown into pitcher stats
unique_batters_per_season = unique_batters_per_season.merge(
    pitcher_pitches_thrown,
    on=['pitcher', 'season'],
    how='left'
)

# Merge balls faced into batter stats
batting_time_count = batting_time_count.merge(
    batter_balls_faced,
    on=['batter', 'season'],
    how='left'
)

# Create strike zone indicator and count strike zone events for pitchers
df["is_in_strike_zone"] = df["zone"].isin([1,2,3,4,5,6,7,8,9])
strike_zone_count = df.groupby(["pitcher", "season"])["is_in_strike_zone"].sum().reset_index()
unique_batters_per_season = unique_batters_per_season.merge(
    strike_zone_count,
    on=['pitcher', 'season'],
    how='left'
)

# Count strikeouts for pitchers
strikeout_count = df.groupby(["pitcher", "season"])["type"].apply(lambda x: (x == 'S').sum()).reset_index(name="strikeouts_by_pitcher")
unique_batters_per_season = unique_batters_per_season.merge(
    strikeout_count,
    on=['pitcher', 'season'],
    how='left'
)


In [39]:
# Identify players who appear as both batters and pitchers
batters_set = set(df["batter"].dropna().unique())
pitchers_set = set(df["pitcher"].dropna().unique())
two_way_players = batters_set.intersection(pitchers_set)


In [41]:
# Rename keys for merging
batting_time_count.rename(columns={"batter": "player"}, inplace=True)
unique_batters_per_season.rename(columns={"pitcher": "player"}, inplace=True)

# Outer merge to combine batting and pitching stats
playing_time = pd.merge(
    batting_time_count,
    unique_batters_per_season,
    on=["player", "season"],
    how="outer"
).fillna(0)

# Calculate target: sum of Plate Appearances and Batters Faced
playing_time["total_playing_time"] = playing_time["Plate_Appearence_by_batter"] + playing_time["Batters_Faced_by_pitcher"]

# Flag two-way players (convert boolean to integer for modeling)
playing_time["two_way_player"] = playing_time["player"].isin(two_way_players).astype(int)


In [43]:
# Use .loc to safely fill missing values
fill_cols = ['woba_value', 'woba_denom', 'babip_value', 'iso_value', 
             'estimated_ba_using_speedangle', 'estimated_woba_using_speedangle']
for col in fill_cols:
    df.loc[:, col] = df[col].fillna(0)

# Create new features
df['total_break'] = np.sqrt(df['pfx_x']**2 + df['pfx_z']**2)
df['velocity_drop'] = df['release_speed'] - df['effective_speed']
df['on_edge'] = ((df['plate_x'].abs() > 0.7) & 
                 (df['plate_z'] > df['sz_bot'] + 0.3) & 
                 (df['plate_z'] < df['sz_top'] - 0.3))
df['strike_zone_location'] = np.sqrt(df['plate_x']**2 + (df['plate_z'] - df['sz_bot'])**2)
df['Run_production_impact'] = df['post_bat_score'] - df['bat_score']

# Map launch speed angle to descriptive categories
mapping = {1: 'Weak', 2: 'Topped', 3: 'Under', 4: 'Flare/Burner', 5: 'Solid Contact', 6: 'Barrel'}
df['launch_speed_angle_new'] = df['launch_speed_angle'].map(mapping)


In [45]:
# Batter season-level metrics
batter_season_metrics = (
    df.groupby(["batter", "season"])
    .agg(
        total_woba_value=("woba_value", "sum"),
        total_woba_denom=("woba_denom", "sum"),
        total_babip_value=("babip_value", "sum"),
        total_iso_value=("iso_value", "sum"),
        total_run_production=("Run_production_impact", "sum"),
        weak_contact_count_pitcher=("launch_speed_angle_new", lambda x: (x == "Weak").sum()),
        solid_contact_count_pitcher=("launch_speed_angle_new", lambda x: (x == "Solid Contact").sum()),
        barrel_count_pitcher=("launch_speed_angle_new", lambda x: (x == "Barrel").sum())
    )
    .reset_index()
)

# Pitcher season-level metrics
pitcher_season_metrics = (
    df.groupby(["pitcher", "season"])
    .agg(
        total_babip_value=("babip_value", "sum"),
        avg_total_break=("total_break", "mean"),
        avg_velocity_drop=("velocity_drop", "mean"),
        strike_zone_location_mean=("strike_zone_location", "mean"),
        pitches_on_edge=("on_edge", "sum"),
        weak_contact_count_batter=("launch_speed_angle_new", lambda x: (x == "Weak").sum()),
        topped_contact_count_batter=("launch_speed_angle_new", lambda x: (x == "Topped").sum()),
        under_contact_count_batter=("launch_speed_angle_new", lambda x: (x == "Under").sum()),
        flare_burner_contact_count_batter=("launch_speed_angle_new", lambda x: (x == "Flare/Burner").sum()),
        solid_contact_count_batter=("launch_speed_angle_new", lambda x: (x == "Solid Contact").sum()),
        barrel_count_batter=("launch_speed_angle_new", lambda x: (x == "Barrel").sum())
    )
    .reset_index()
)


In [53]:
# Merge batter metrics into playing_time
final_df = playing_time.merge(
    batter_season_metrics,
    left_on=['player', 'season'],
    right_on=['batter', 'season'],
    how='left'
)

# Merge pitcher metrics into final_df
final_df = final_df.merge(
    pitcher_season_metrics,
    left_on=['player', 'season'],
    right_on=['pitcher', 'season'],
    how='left'
)

# Fill any remaining missing values with 0
final_df.fillna(0, inplace=True)
# Combine BABIP metrics from batter and pitcher aggregations
final_df['total_babip_value_combined'] = final_df['total_babip_value_x'] + final_df['total_babip_value_y']


In [55]:
display(final_df)

Unnamed: 0,player,season,Plate_Appearence_by_batter,games_played_batter,balls_faced,Batters_Faced_by_pitcher,games_played_pitcher,pitches_thrown,is_in_strike_zone,strikeouts_by_pitcher,...,avg_velocity_drop,strike_zone_location_mean,pitches_on_edge,weak_contact_count_batter,topped_contact_count_batter,under_contact_count_batter,flare_burner_contact_count_batter,solid_contact_count_batter,barrel_count_batter,total_babip_value_combined
0,0014c193005b425aaad55358686fb0dd1a4a0755,2022,182.0,51.0,647.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0
1,0014c193005b425aaad55358686fb0dd1a4a0755,2023,60.0,27.0,220.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0
2,003af1e4636109b822c9acfa703cb517c46d89fc,2021,163.0,44.0,618.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,33.0
3,003af1e4636109b822c9acfa703cb517c46d89fc,2022,140.0,36.0,511.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.0
4,003af1e4636109b822c9acfa703cb517c46d89fc,2023,91.0,28.0,327.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4448,ffc9b6a7a663bc322e74c70a5ed8756787a577d0,2021,0.0,0.0,0.0,275.0,68.0,1076.0,550.0,493.0,...,1.377230,1.273244,240.0,12.0,52.0,49.0,38.0,18.0,16.0,47.0
4449,ffc9b6a7a663bc322e74c70a5ed8756787a577d0,2022,0.0,0.0,0.0,197.0,55.0,817.0,405.0,374.0,...,1.362132,1.234826,142.0,8.0,42.0,35.0,28.0,8.0,7.0,35.0
4450,ffc9b6a7a663bc322e74c70a5ed8756787a577d0,2023,0.0,0.0,0.0,237.0,60.0,969.0,474.0,463.0,...,0.687822,1.254422,207.0,10.0,39.0,36.0,47.0,7.0,11.0,48.0
4451,ffd2fbf22dc9ac7811c1bd9ecf5d5c0392744a18,2021,3.0,2.0,11.0,199.0,22.0,1707.0,850.0,832.0,...,0.190920,1.124256,259.0,14.0,92.0,94.0,95.0,21.0,31.0,97.0


In [57]:
# Sort data and compute 3-year rolling averages for selected features
final_df.sort_values(by=['player', 'season'], inplace=True)
rolling_features = [
    'games_played_batter', 'balls_faced', 'games_played_pitcher',
    'pitches_thrown', 'is_in_strike_zone', 'strikeouts_by_pitcher',
    'total_playing_time', 'two_way_player', 'total_woba_value',
    'total_woba_denom', 'total_babip_value_combined', 'total_iso_value',
    'total_run_production', 'weak_contact_count_pitcher',
    'solid_contact_count_pitcher', 'barrel_count_pitcher',
    'avg_total_break', 'avg_velocity_drop',
    'strike_zone_location_mean', 'pitches_on_edge',
    'weak_contact_count_batter', 'topped_contact_count_batter',
    'under_contact_count_batter', 'flare_burner_contact_count_batter',
    'solid_contact_count_batter', 'barrel_count_batter'
]

for feature in rolling_features:
    final_df[f'rolling_{feature}'] = final_df.groupby('player')[feature].transform(lambda x: x.rolling(3, min_periods=1).mean())


In [79]:
# Use a time-based split: Train on 2021-2022 and use 2023 as validation.
train_data = final_df[final_df['season'] < 2023]  # Training data: 2021-2022
val_data   = final_df[final_df['season'] == 2023]  # Validation data: 2023

# Define features and target.
# Drop columns that contain identifiers or leak target information.
drop_cols = ['player', 'season', 'total_playing_time', 'Plate_Appearence_by_batter', 'Batters_Faced_by_pitcher', 'batter', 'pitcher']

X_train = train_data.drop(columns=drop_cols)
y_train = train_data['total_playing_time']

X_val = val_data.drop(columns=drop_cols)
y_val = val_data['total_playing_time']


In [81]:
# Define and train multiple models
models = {
    "XGBoost": XGBRegressor(n_estimators=200, learning_rate=0.1, max_depth=5, enable_categorical=True, random_state=42),
    "RandomForest": RandomForestRegressor(n_estimators=200, max_depth=5, random_state=42),
    "LinearRegression": LinearRegression()
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    
    mae = mean_absolute_error(y_val, y_pred)
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    
    results[name] = {"MAE": mae, "RMSE": rmse}
    print(f"{name} - MAE: {mae:.2f}, RMSE: {rmse:.2f}")


XGBoost - MAE: 7.19, RMSE: 11.21
RandomForest - MAE: 22.34, RMSE: 34.61
LinearRegression - MAE: 3.80, RMSE: 6.86


In [83]:
# Select the best model based on the lowest RMSE
best_model_name = min(results, key=lambda x: results[x]["RMSE"])
final_model = models[best_model_name]
print(f"Selected model: {best_model_name}")

# Plot feature importance if using a tree-based model
if best_model_name in ["XGBoost", "RandomForest"]:
    feature_importance = final_model.feature_importances_
    sorted_idx = np.argsort(feature_importance)[::-1]
    
    plt.figure(figsize=(10, 5))
    plt.barh(np.array(X_train.columns)[sorted_idx], feature_importance[sorted_idx])
    plt.xlabel("Feature Importance")
    plt.ylabel("Features")
    plt.title(f"Feature Importance - {best_model_name}")
    plt.show()


Selected model: LinearRegression


In [85]:
# Use the 2023 validation data as a base, then simulate the 2024 season by updating the season value.
predict_data = val_data.copy()
predict_data['season'] = 2024  # Simulate future season

X_predict = predict_data.drop(columns=drop_cols)
predict_data['predicted_playing_time'] = np.ceil(final_model.predict(X_predict))

# For players with multiple rows, keep the row with the highest predicted playing time
predict_data = predict_data.sort_values(by=['player', 'predicted_playing_time'], ascending=False)
predict_data = predict_data.drop_duplicates(subset=['player'], keep='first')

# Save predictions to CSV
predict_data[['player', 'predicted_playing_time']].to_csv("predictions_2024.csv", index=False)
print(f"✅ 2024 Predictions using {best_model_name} saved!")


✅ 2024 Predictions using LinearRegression saved!


In [87]:
# Load the sample submission file
df_submission = pd.read_csv("sample_submission.csv")

# Merge predictions with the sample submission on the player/PLAYER_ID column
submission_df = predict_data.merge(
    df_submission,
    left_on='player',
    right_on='PLAYER_ID',
    how='left'
)
display(submission_df[['player', 'predicted_playing_time', 'PLAYER_ID', 'PLAYING_TIME']])


Unnamed: 0,player,predicted_playing_time,PLAYER_ID,PLAYING_TIME
0,ffc9b6a7a663bc322e74c70a5ed8756787a577d0,233.0,,
1,ffa57541bf6d7030fdf7206f4aa0141a2c918647,215.0,ffa57541bf6d7030fdf7206f4aa0141a2c918647,520.0
2,ff3b7d21e60c24b6e926e3ffc3fbeb78cc9e4057,274.0,ff3b7d21e60c24b6e926e3ffc3fbeb78cc9e4057,202.0
3,fe80da2e964fa0782b5d30725105f59444a90261,200.0,fe80da2e964fa0782b5d30725105f59444a90261,108.0
4,fe6eac0778f476c3f1ffe6a1fd637f077573e077,529.0,fe6eac0778f476c3f1ffe6a1fd637f077573e077,532.0
...,...,...,...,...
1449,0084c15ee1d82fb5b793e1ff130f46651dd13e17,62.0,0084c15ee1d82fb5b793e1ff130f46651dd13e17,41.0
1450,007dcc596b82af90fd37f3413e98812b87a6b305,196.0,,
1451,0043ac96d4fde6fcfd5a841b8d902661e69a6009,45.0,0043ac96d4fde6fcfd5a841b8d902661e69a6009,33.0
1452,003af1e4636109b822c9acfa703cb517c46d89fc,91.0,003af1e4636109b822c9acfa703cb517c46d89fc,131.0


In [69]:
from lightgbm import LGBMRegressor

# Instantiate and train LightGBM
lgbm_model = LGBMRegressor(n_estimators=200, learning_rate=0.1, max_depth=5, random_state=42)
lgbm_model.fit(X_train, y_train)

# Make predictions and evaluate
y_pred_lgbm = lgbm_model.predict(X_val)
mae_lgbm = mean_absolute_error(y_val, y_pred_lgbm)
rmse_lgbm = np.sqrt(mean_squared_error(y_val, y_pred_lgbm))
print(f"LGBMRegressor - MAE: {mae_lgbm:.2f}, RMSE: {rmse_lgbm:.2f}")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001366 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8903
[LightGBM] [Info] Number of data points in the train set: 2999, number of used features: 53
[LightGBM] [Info] Start training from score 199.873291
LGBMRegressor - MAE: 7.85, RMSE: 12.24


In [71]:
from sklearn.linear_model import Ridge, Lasso, ElasticNet

# Instantiate models with sample hyperparameters.
# You can adjust the alpha (regularization strength) and l1_ratio (for ElasticNet) as needed.
ridge_model = Ridge(alpha=1.0, random_state=42)
lasso_model = Lasso(alpha=0.1, random_state=42)
elastic_model = ElasticNet(alpha=0.1, l1_ratio=0.5, random_state=42)

# Create a dictionary for easier iteration.
models_reg = {
    "Ridge": ridge_model,
    "Lasso": lasso_model,
    "ElasticNet": elastic_model
}

# Evaluate each model on the validation set.
for name, model in models_reg.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    mae = mean_absolute_error(y_val, y_pred)
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    print(f"{name} - MAE: {mae:.2f}, RMSE: {rmse:.2f}")


Ridge - MAE: 3.80, RMSE: 6.86
Lasso - MAE: 5.62, RMSE: 8.38
ElasticNet - MAE: 5.62, RMSE: 8.38


In [73]:
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.linear_model import Ridge

# Define a Ridge model
ridge = Ridge(random_state=42)

# Set up a grid of alpha values to test
param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}

# Create a TimeSeriesSplit for time-aware CV (using n_splits=2 since training data spans 2021-2022)
tscv = TimeSeriesSplit(n_splits=2)

# Use GridSearchCV to find the best alpha value based on RMSE (using negative RMSE as scoring)
grid_search = GridSearchCV(ridge, param_grid, cv=tscv, scoring='neg_root_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)
print("Best RMSE on training CV:", -grid_search.best_score_)


Best parameters: {'alpha': 100}
Best RMSE on training CV: 3.865848926517412


In [75]:
# Retrieve the best Ridge estimator from the grid search
tuned_ridge = grid_search.best_estimator_

# Predict on the 2023 validation data
y_pred_tuned = tuned_ridge.predict(X_val)
mae_tuned = mean_absolute_error(y_val, y_pred_tuned)
rmse_tuned = np.sqrt(mean_squared_error(y_val, y_pred_tuned))

print(f"Tuned Ridge - MAE: {mae_tuned:.2f}, RMSE: {rmse_tuned:.2f}")


Tuned Ridge - MAE: 3.79, RMSE: 6.85


In [None]:
# Simulate 2024 predictions using the 2023 validation data as a base
predict_data = val_data.copy()
predict_data['season'] = 2024  # Update season to simulate future data

# Prepare the features for prediction by dropping the same columns used during training
X_predict = predict_data.drop(columns=drop_cols)
predict_data['predicted_playing_time'] = np.ceil(tuned_ridge.predict(X_predict))

# For players with multiple rows, keep the one with the highest predicted playing time
predict_data = predict_data.sort_values(by=['player', 'predicted_playing_time'], ascending=False)
predict_data = predict_data.drop_duplicates(subset=['player'], keep='first')

# Save predictions to CSV
predict_data[['player', 'predicted_playing_time']].to_csv("predictions_2024_tuned_ridge.csv", index=False)
print(f"✅ 2024 Predictions using Tuned Ridge (alpha=100) saved!")

In [103]:
# Load the sample submission file
df_submission = pd.read_csv("sample_submission.csv")

# Merge your predictions with the sample submission file using df_submission as the base.
# This ensures that only players in the sample submission (i.e., the required 1149 rows) are included.
submission_df = df_submission.merge(
    predict_data[['player', 'predicted_playing_time']],
    left_on='PLAYER_ID',
    right_on='player',
    how='left'
)

# Check the shape and first few rows to confirm it has 1149 rows
print("Submission shape:", submission_df.shape)
display(submission_df[['PLAYER_ID', 'predicted_playing_time']].head())

# Save the final submission file with exactly the required rows
submission_df[['PLAYER_ID', 'predicted_playing_time']].to_csv("final_submission.csv", index=False)
print("✅ Final submission saved as 'final_submission.csv'")


Submission shape: (1149, 4)


Unnamed: 0,PLAYER_ID,predicted_playing_time
0,9a844e4d2ab1c3791241f7695bc86d1cbf40cba3,305.0
1,82cb9c0e72ca056b5de69fc76b30f69789740cbf,274.0
2,263f19273c6c4c12eceadb8824228fe1eac5e8a4,29.0
3,12dd288e43abf7690d8993f47d2bed8a024c8fef,95.0
4,7d73305e306705436ac4f31f8149a297d95b0040,221.0


✅ Final submission saved as 'final_submission.csv'
