### Video No. 7

### Multivariate Linear Regression Example
Using combined powerplant data from the UCI Machine Learning Repository.

In [1]:
import numpy as np
import pandas as pd
from ucimlrepo import fetch_ucirepo 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

In [2]:
# fetch dataset 
combined_cycle_power_plant = fetch_ucirepo(id=294) 
# data (as pandas dataframes) 
X = combined_cycle_power_plant.data.features 
y = combined_cycle_power_plant.data.targets 
# metadata 
print(combined_cycle_power_plant.metadata) 
# variable information 
print(combined_cycle_power_plant.variables) 


{'uci_id': 294, 'name': 'Combined Cycle Power Plant', 'repository_url': 'https://archive.ics.uci.edu/dataset/294/combined+cycle+power+plant', 'data_url': 'https://archive.ics.uci.edu/static/public/294/data.csv', 'abstract': 'The dataset contains 9568 data points collected from a Combined Cycle Power Plant over 6 years (2006-2011), when the plant was set to work with full load. ', 'area': 'Computer Science', 'tasks': ['Regression'], 'characteristics': ['Multivariate'], 'num_instances': 9568, 'num_features': 4, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['PE'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2014, 'last_updated': 'Mon Mar 04 2024', 'dataset_doi': '10.24432/C5002N', 'creators': ['Pnar Tfekci', 'Heysem Kaya'], 'intro_paper': {'ID': 406, 'type': 'NATIVE', 'title': 'Prediction of full load electrical power output of a base load operated combined cycle power plant using machine learning methods', 'aut

In [3]:
X.head()

Unnamed: 0,AT,V,AP,RH
0,14.96,41.76,1024.07,73.17
1,25.18,62.96,1020.04,59.08
2,5.11,39.4,1012.16,92.14
3,20.86,57.32,1010.24,76.64
4,10.82,37.5,1009.23,96.62


In [4]:
y.head()

Unnamed: 0,PE
0,463.26
1,444.37
2,488.56
3,446.48
4,473.9


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
def linear_regression(X, y):
    X = np.array(X)
    y = np.array(y)
    # Adding a bias term (intercept)
    X_b = np.c_[np.ones((X.shape[0], 1)), X]  # add x0 = 1 to each instance
    theta_best = np.linalg.inv(X_b.T @ X_b) @ X_b.T @ y
    return theta_best

In [7]:
def fit(X, y):
    model = linear_regression(X, y)
    return model


In [8]:
betas = fit(X_train, y_train)
print("Betas (coefficients):", betas)


Betas (coefficients): [[ 4.54569115e+02]
 [-1.98589969e+00]
 [-2.32093577e-01]
 [ 6.21999093e-02]
 [-1.58117787e-01]]


In [9]:
y_predict = X_test @ betas[1:] + betas[0]  # y = X * betas + intercept
error = y_predict - y_test.to_numpy()
mse = np.mean(error ** 2)
rmse = np.sqrt(mse)
print(f"RMSE: {rmse:.3f} MW")

RMSE: 4.503 MW


In [10]:
# new data
X_new = np.array([[12, 100, 32, 3.2,]])  # Example new data point
X_new_b = np.c_[np.ones((X_new.shape[0], 1)),X_new]  # add x0 = 1 to each instance
y_predict = X_new_b @ betas
print("Predicted value:", (y_predict[0][0]))
# Example usage
# X_new = np.array([[1, 0.5, 0.3, 0.2, 0.1, 0.05, 0.02, 0.01]])  # Example new data point
# X_new_b = np.c_[np.ones((X_new.shape[0], 1)), X_new]  # add x0 = 1 to each instance
# y_predict = X_new_b @ betas
# print("Predicted value:", y_predict[0])

Predicted value: 409.01338068744985


In [11]:
LR_model = LinearRegression()
LR_model.fit(X_train, y_train)


0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [12]:
print("Model coefficients (sklearn):", LR_model.coef_)


Model coefficients (sklearn): [[-1.98589969 -0.23209358  0.06219991 -0.15811779]]


In [13]:
X_new_sklearn = np.array([[12, 100, 32, 3.2]])
y_predict_sklearn_new = LR_model.predict(X_new_sklearn)
print("Predicted value (sklearn):", (y_predict_sklearn_new[0]))


Predicted value (sklearn): [409.01338068]




In [14]:
y_predict_sklearn = LR_model.predict(X_test)
error_sklearn = y_predict_sklearn - y_test.to_numpy()
mse_sklearn = np.mean(error_sklearn ** 2)
rmse_sklearn = np.sqrt(mse_sklearn)
print("Predicted value (sklearn):", (y_predict_sklearn_new[0]))
print(f"Custom RMSE: {rmse:.4f} MW")
print(f"Sklearn RMSE: {rmse_sklearn:.4f} MW")

Predicted value (sklearn): [409.01338068]
Custom RMSE: 4.5026 MW
Sklearn RMSE: 4.5026 MW


In [15]:
# comparison of betas, rmse and predictions
print("Betas (custom):", betas)
print("Betas (sklearn):", LR_model.coef_, LR_model.intercept_)
print("RMSE (custom):", rmse)
print("RMSE (sklearn):", rmse_sklearn)


Betas (custom): [[ 4.54569115e+02]
 [-1.98589969e+00]
 [-2.32093577e-01]
 [ 6.21999093e-02]
 [-1.58117787e-01]]
Betas (sklearn): [[-1.98589969 -0.23209358  0.06219991 -0.15811779]] [454.56911459]
RMSE (custom): 4.5026332295467775
RMSE (sklearn): 4.502633229532185
