# DSA210 Final Project Code

This notebook contains EDA, hypothesis testing, and regression modeling steps for analyzing fitness and supplement data.

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr, ttest_ind
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load data
df = pd.read_excel('/mnt/data/dsa 210 data.xlsx')
df.head()


## Exploratory Data Analysis (EDA)

In [None]:

# Summary statistics
display(df.describe())

# Correlation heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()


## Hypothesis Testing

In [None]:

# Hypothesis test: Does Magnesium intake impact Sleep Hours?
corr, p_value = pearsonr(df['Magnesium_mg'], df['Sleep_Hours'])
print(f"Correlation: {corr:.3f}, P-Value: {p_value:.3f}")


## Machine Learning - Linear Regression Models

In [None]:

# Example: Predicting Weight using selected predictors
features = ['Protein_g', 'Carbs_g', 'Magnesium_mg', 'Sleep_Hours', 'Bench_kg', 'Squat_kg']
X = df[features]
y = df['Weight_kg']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("R2 Score:", r2_score(y_test, y_pred))
print("RMSE:", mean_squared_error(y_test, y_pred, squared=False))
