## Import all the necessary libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

## Read the dataset we are working with

In [2]:
dataset=pd.read_csv('weightlifting_721_workouts.csv')
dataset.head()

Unnamed: 0,Date,Workout Name,Exercise Name,Set Order,Weight,Reps,Distance,Seconds,Notes,Workout Notes
0,2015-10-23 17:06:37,Chest,Incline Bench Press (Barbell),1,135.0,8,0.0,0,,
1,2015-10-23 17:06:37,Chest,Incline Bench Press (Barbell),2,135.0,8,0.0,0,,
2,2015-10-23 17:06:37,Chest,Incline Bench Press (Barbell),3,135.0,5,0.0,0,,
3,2015-10-23 17:06:37,Chest,Incline Bench Press (Barbell),4,185.0,7,0.0,0,,
4,2015-10-23 17:06:37,Chest,Incline Bench Press (Barbell),5,230.0,8,0.0,0,,


In [6]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9932 entries, 0 to 9931
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           9932 non-null   object 
 1   Workout Name   9932 non-null   object 
 2   Exercise Name  9932 non-null   object 
 3   Set Order      9932 non-null   int64  
 4   Weight         9932 non-null   float64
 5   Reps           9932 non-null   int64  
 6   Distance       9932 non-null   float64
 7   Seconds        9932 non-null   int64  
 8   Notes          7 non-null      object 
 9   Workout Notes  3 non-null      object 
dtypes: float64(2), int64(3), object(5)
memory usage: 776.1+ KB


In [7]:
dataset.isnull().sum()

Date                0
Workout Name        0
Exercise Name       0
Set Order           0
Weight              0
Reps                0
Distance            0
Seconds             0
Notes            9925
Workout Notes    9929
dtype: int64

### Removing unnecessary columns: Columns with not values/inputs

In [21]:
dataset.drop(['Notes'], axis = 1, inplace = True)

In [22]:
dataset.drop(['Workout Notes'], axis = 1, inplace = True)

In [28]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9932 entries, 0 to 9931
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           9932 non-null   object 
 1   Workout Name   9932 non-null   object 
 2   Exercise Name  9932 non-null   object 
 3   Set Order      9932 non-null   int64  
 4   Weight         9932 non-null   float64
 5   Reps           9932 non-null   int64  
 6   Distance       9932 non-null   float64
 7   Seconds        9932 non-null   int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 620.9+ KB


### Correlation

In [39]:
dataset['Set Order'].corr(dataset['Weight'])

0.30818792876594236

In [41]:
dataset['Reps'].corr(dataset['Weight'])

-0.37698451262108634

In [42]:
dataset['Distance'].corr(dataset['Weight'])

-0.02458573585897607

In [43]:
dataset['Seconds'].corr(dataset['Weight'])

-0.05193560055052428

###### The negative sign before the correlation suggests as the reps, distance, and seconds increases, the weight decrease

### Split the dataset into independent and dependent variables

In [29]:
dataset.columns

Index(['Date', 'Workout Name', 'Exercise Name', 'Set Order', 'Weight', 'Reps',
       'Distance', 'Seconds'],
      dtype='object')

In [33]:
X=dataset[['Set Order','Reps','Distance', 'Seconds']]
y=dataset[['Weight']]

### Split the dataset into training and test set

In [34]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(X,y,
                                                 test_size=.2,
                                                 random_state=17)

### Training the Multiple Linear Regression model on the Training set

In [35]:
from sklearn.linear_model import LinearRegression
regressor=LinearRegression()
regressor.fit(X_train.values, y_train)

### Intercept and coefficients

In [36]:
print("Coefficients", regressor.coef_)
print("Intercept", regressor.intercept_)

Coefficients [[ 13.10734545 -12.75846022  -0.52846893  -0.28707097]]
Intercept [242.63545728]


### Predicting test set results

In [37]:
y_pred=regressor.predict(X_test.values)

### Calculating RMSE and R-Square

In [38]:
from sklearn.metrics import mean_squared_error, r2_score
import math
print(f"R-Square: {r2_score(y_test, y_pred):.2f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}")

print(f"RMSE: {math.sqrt(mean_squared_error(y_test, y_pred)):.2f}")

R-Square: 0.22
MSE: 9836.40
RMSE: 99.18


# Braden Heuglin and Ben Griffis
## TP3
### Version 1
#### DS 160
##### Spring 2023