In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")

DATA_PATH = "../../data/processed/newtons_second_law_cleaned_dataset.csv"

if not os.path.exists(DATA_PATH):
    raise FileNotFoundError(f"Feature-engineered dataset not found at: {DATA_PATH}")

In [None]:
df = pd.read_csv(DATA_PATH)
print(f"Loaded dataset: {df.shape[0]} rows × {df.shape[1]} columns")
df.head()

In [None]:
features = ["Mass_kg", "Acceleration_ms2", "mass_x_acceleration"]
target = "Force_N"

X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f"Train set: {X_train.shape}, Test set: {X_test.shape}")

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

print("✅ Model trained.")
print("Intercept:", model.intercept_)
print("Coefficients:", dict(zip(features, model.coef_)))

In [None]:
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"📈 R² Score: {r2:.4f}")
print(f"📉 RMSE: {rmse:.4f}")

In [None]:
plt.figure(figsize=(12, 6))
sns.scatterplot(x=y_test, y=y_pred, alpha=0.6, color="dodgerblue")
plt.plot([y.min(), y.max()], [y.min(), y.max()], '--', color='gray')
plt.xlabel("Actual Force (N)")
plt.ylabel("Predicted Force (N)")
plt.title("Actual vs Predicted Force")
plt.tight_layout()
plt.show()

In [None]:
import joblib
os.makedirs("../../models/", exist_ok=True)
joblib.dump(model, "../../models/newtons_law_regression_model.pkl")
print("Model saved to models/newtons_law_regression_model.pkl")
