In [1]:
!pip install scikit-learn numpy



In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# 1. Generate a sample dataset
# We'll create some simple data to demonstrate the process.
# X is our feature (e.g., hours studied), and y is our target (e.g., exam score).
X = np.array([5, 15, 25, 35, 45, 55, 65, 75, 85, 95]).reshape(-1, 1)
y = np.array([12, 28, 50, 68, 85, 90, 105, 115, 130, 155])

print("Original Data:")
print(f"Features (X):\n{X.flatten()}")
print(f"Target (y):\n{y}\n")

# 2. Split the data into training and testing sets
# The model learns from the training data and is tested on the unseen testing data.
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Data Split:")
print(f"Training features shape: {X_train.shape}")
print(f"Testing features shape: {X_test.shape}\n")

# 3. Create and train the model
# We create a Linear Regression model instance and train it using the training data.
model = LinearRegression()
model.fit(X_train, y_train)

# 4. Make predictions on the test set
# The trained model is used to predict the target values for the test data.
y_pred = model.predict(X_test)

print("Predictions:")
print(f"Actual values (y_test):\n{y_test}")
print(f"Predicted values (y_pred):\n{y_pred.round(2)}\n")

# 5. Evaluate the model's performance
# We use Mean Squared Error (MSE) to measure the average squared difference
# between the actual and predicted values. A lower value indicates a better fit.
mse = mean_squared_error(y_test, y_pred)
print(f"Model Evaluation:")
print(f"Mean Squared Error: {mse:.2f}")
print(f"Model's R-squared score: {model.score(X_test, y_test):.2f}")

# You can also see the model's learned parameters (slope and intercept)
print(f"Model's learned slope (coefficient): {model.coef_[0]:.2f}")
print(f"Model's learned y-intercept: {model.intercept_:.2f}")

Original Data:
Features (X):
[ 5 15 25 35 45 55 65 75 85 95]
Target (y):
[ 12  28  50  68  85  90 105 115 130 155]

Data Split:
Training features shape: (8, 1)
Testing features shape: (2, 1)

Predictions:
Actual values (y_test):
[130  28]
Predicted values (y_pred):
[137.14  32.86]

Model Evaluation:
Mean Squared Error: 37.29
Model's R-squared score: 0.99
Model's learned slope (coefficient): 1.49
Model's learned y-intercept: 10.52
