In [None]:
import numpy as np
import pandas as pd

fulldata = pd.read_csv('data/openipf-2025-11-08-c1c550e2.csv')

In [None]:
data = fulldata[['Sex', 'Age', 'BodyweightKg', 'Best3SquatKg', 'Best3BenchKg', 'Best3DeadliftKg', 'Equipment']]
data = data[data['Equipment'] == 'Raw']
data = data.dropna()
data = data.drop(columns=['Equipment'])

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

col = data['Sex']
col = le.fit_transform(col)
data['Sex'] = col

In [None]:
from sklearn.model_selection import train_test_split

X_squat = data.drop('Best3SquatKg', axis=1)
y_squat = data['Best3SquatKg']

X_bench = data.drop('Best3BenchKg', axis=1)
y_bench = data['Best3BenchKg']

X_deadlift = data.drop('Best3DeadliftKg', axis=1)
y_deadlift = data['Best3DeadliftKg']

X_squat_train, X_squat_test, y_squat_train, y_squat_test = train_test_split(X_squat, y_squat, test_size=0.2, random_state=42)
X_bench_train, X_bench_test, y_bench_train, y_bench_test = train_test_split(X_bench, y_bench, test_size=0.2, random_state=42)
X_deadlift_train, X_deadlift_test, y_deadlift_train, y_deadlift_test = train_test_split(X_deadlift, y_deadlift, test_size=0.2, random_state=42)

In [None]:
from sklearn.linear_model import LinearRegression

squat = LinearRegression()
squat.fit(X_squat_train, y_squat_train)
squat_predictions = squat.predict(X_squat_test)

bench = LinearRegression()
bench.fit(X_bench_train, y_bench_train)
bench_predictions = bench.predict(X_bench_test)

deadlift = LinearRegression()
deadlift.fit(X_deadlift_train, y_deadlift_train)
deadlift_predictions = deadlift.predict(X_deadlift_test)

In [None]:
from sklearn.metrics import mean_squared_error

squat_mse = mean_squared_error(y_squat_test, squat_predictions)
bench_mse = mean_squared_error(y_bench_test, bench_predictions)
deadlift_mse = mean_squared_error(y_deadlift_test, deadlift_predictions)

print(f'Squat MSE: {squat_mse}')
print(f'Bench MSE: {bench_mse}')
print(f'Deadlift MSE: {deadlift_mse}')

In [None]:
myData = np.array([[1, 19, 86.5, 180, 97.5, 200]])

squat_prediction = squat.predict(np.delete(myData, 3, axis=1))
bench_prediction = bench.predict(np.delete(myData, 4, axis=1))
deadlift_prediction = deadlift.predict(np.delete(myData, 5, axis=1))

print(f'Predicted Squat: {squat_prediction[0]:.0f} kg')
print(f'Predicted Bench: {bench_prediction[0]:.0f} kg')
print(f'Predicted Deadlift: {deadlift_prediction[0]:.0f} kg')

In [None]:
squat_difference = squat_prediction - myData[0][3]
bench_difference = bench_prediction - myData[0][4]
deadlift_difference = deadlift_prediction - myData[0][5]
differences = np.array([squat_difference[0], bench_difference[0], deadlift_difference[0]])

In [None]:
def return_lift(index):
    if index == 0:
        return 'Squat'
    elif index == 1:
        return 'Bench'
    elif index == 2:
        return 'Deadlift'

In [None]:
print('Your best lift is:', return_lift(np.argmin(differences)))
print('Your worst lift is:', return_lift(np.argmax(differences)))