In [4]:
import numpy as np # Import the NumPy library, which is fundamental for numerical operations in Python, especially for handling arrays.
from sklearn.linear_model import LinearRegression # Import the LinearRegression model class from scikit-learn.
import joblib # Import the joblib library, which is a popular and efficient tool for saving and loading Python objects, especially scikit-learn models.

# 1. Prepare Data
# Create a simple dataset for a linear regression problem
X_train = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).reshape(-1, 1) # Create a NumPy array for the training features. reshape(-1, 1) converts the 1D array into a 2D array of shape (5, 1), as scikit-learn models expect 2D input.
y_train = np.array([2.0, 4.0, 6.0, 8.0, 10.0, 15.0]) # y = 2x # Create a NumPy array for the training labels (the target values).

# 2. Build and Train the Model
# Scikit-learn combines model building and training into a single step
model = LinearRegression() # Instantiate the LinearRegression model. This creates an object ready to be trained.
print("Starting model training...")
model.fit(X_train, y_train) # Train the model. The .fit() method learns the relationship between the features (X_train) and the labels (y_train).
print("Model training complete.")

# 3. Evaluate the Model (Optional)
# Make a prediction with the trained model
prediction_value = model.predict(np.array([[10.0]])) # Use the trained model's .predict() method to make a prediction for a new input, X=10. The input must be a 2D array.
print(f"Prediction for X=10: {prediction_value[0]:.2f}") # Print the prediction. The predict() method returns a NumPy array, so we access the first element ([0]) and format it to two decimal places.

# 4. Save the Model
# Save the trained model using joblib, a common choice for scikit-learn models
model_save_path = 'my_linear_model.pkl' # Define the file path for saving the model. The .pkl extension is standard for pickle files.
joblib.dump(model, model_save_path) # Use joblib.dump() to serialize and save the entire trained model object to the specified file.
print(f"Model saved to: {model_save_path}")

# 5. Load the Model (Optional, for demonstration)
# Load the saved model
loaded_model = joblib.load(model_save_path) # Use joblib.load() to deserialize and load the model object from the file.
print(f"Model loaded from: {model_save_path}")

# Make a prediction with the loaded model
loaded_prediction_value = loaded_model.predict(np.array([[10.0]])) # Use the loaded model to make a prediction for the same input, X=10.
print(f"Prediction with loaded model for X=10: {loaded_prediction_value[0]:.2f}") # Print the prediction from the loaded model to confirm it was loaded correctly.

Starting model training...
Model training complete.
Prediction for X=10: 23.29
Model saved to: my_linear_model.pkl
Model loaded from: my_linear_model.pkl
Prediction with loaded model for X=10: 23.29
