In [1]:
# Import Library

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Read the dataset

test = pd.read_csv("dataset/Testing_set_ccpp.csv")
train = pd.read_csv("dataset/Training_set_ccpp.csv")

### Linear Regression using AT to Predict PE

In [22]:
y = train['PE'].values.reshape(-1, 1)
X = train['AT'].values.reshape(-1, 1)

In [23]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42) # 42 is SEED

In [24]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()

In [25]:
regressor.fit(X_train, y_train)

LinearRegression()

In [26]:
inter = regressor.intercept_
print(inter) #as an intercept

[497.18060452]


In [27]:
reg = regressor.coef_
print(reg) #as a slope

[[-2.17705286]]


In [28]:
y_pred = regressor.predict(X_test)

In [29]:
df_preds = pd.DataFrame({'Actual': y_test.squeeze(), 'Predicted': y_pred.squeeze()})
print(df_preds)

      Actual   Predicted
0     432.60  427.928553
1     441.18  446.259338
2     482.99  478.414409
3     437.12  444.408843
4     487.33  485.315666
...      ...         ...
1277  452.30  451.941446
1278  446.84  450.265115
1279  470.88  468.487048
1280  467.20  467.028422
1281  446.08  450.896461

[1282 rows x 2 columns]


In [30]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [31]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

In [32]:
print(f'Mean absolute error: {mae:.2f}')
print(f'Mean squared error: {mse:.2f}')
print(f'Root mean squared error: {rmse:.2f}')

Mean absolute error: 4.26
Mean squared error: 29.45
Root mean squared error: 5.43


### Predict AT based on AT and EV

In [33]:
y = train['PE']
X = train[['AT', 'EV']]

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, 
                                                    random_state=42)

In [35]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

In [36]:
regressor.intercept_

505.5877281179684

In [37]:
regressor.coef_

array([-1.71443086, -0.32229592])

In [38]:
feature_names = X.columns
model_coefficients = regressor.coef_

coefficients_df = pd.DataFrame(data = model_coefficients, 
                              index = feature_names, 
                              columns = ['Coefficient value'])
print(coefficients_df)

    Coefficient value
AT          -1.714431
EV          -0.322296


In [39]:
y_pred = regressor.predict(X_test)

In [40]:
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(results)

      Actual   Predicted
5093  432.60  428.529644
1044  441.18  441.798440
4827  482.99  478.027078
2905  437.12  442.017113
4994  487.33  483.661647
...      ...         ...
5247  452.30  449.811914
509   446.84  454.277014
2908  470.88  470.090024
5478  467.20  467.371774
2191  446.08  449.530444

[1282 rows x 2 columns]


In [41]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f'Mean absolute error: {mae:.2f}')
print(f'Mean squared error: {mse:.2f}')
print(f'Root mean squared error: {rmse:.2f}')

Mean absolute error: 3.86
Mean squared error: 24.25
Root mean squared error: 4.92


In [42]:
actual_minus_predicted = sum((y_test - y_pred)**2)
actual_minus_actual_mean = sum((y_test - y_test.mean())**2)
r2 = 1 - actual_minus_predicted/actual_minus_actual_mean
print('R²:', r2)

R²: 0.915715396967825


In [43]:
regressor.score(X_test, y_test)

0.9157153969678249

In [44]:
regressor.score(X_train, y_train)

0.9166130733734346

### Predict AT based on all variables

In [45]:
y = train['PE']
X = train[['AT', 'AP', 'RH', 'EV']]

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, 
                                                    random_state=42)

In [47]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

In [48]:
regressor.intercept_

463.15851697563994

In [49]:
regressor.coef_

array([-2.00095157,  0.05393161, -0.16050202, -0.22694278])

In [50]:
feature_names = X.columns
model_coefficients = regressor.coef_

coefficients_df = pd.DataFrame(data = model_coefficients, 
                              index = feature_names, 
                              columns = ['Coefficient value'])
print(coefficients_df)

    Coefficient value
AT          -2.000952
AP           0.053932
RH          -0.160502
EV          -0.226943


In [51]:
y_pred = regressor.predict(X_test)

In [52]:
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(results)

      Actual   Predicted
5093  432.60  429.035648
1044  441.18  440.019562
4827  482.99  477.280683
2905  437.12  440.976739
4994  487.33  486.040825
...      ...         ...
5247  452.30  450.505213
509   446.84  452.697677
2908  470.88  469.382644
5478  467.20  467.892903
2191  446.08  449.084286

[1282 rows x 2 columns]


In [53]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f'Mean absolute error: {mae:.2f}')
print(f'Mean squared error: {mse:.2f}')
print(f'Root mean squared error: {rmse:.2f}')

Mean absolute error: 3.58
Mean squared error: 20.49
Root mean squared error: 4.53


In [54]:
actual_minus_predicted = sum((y_test - y_pred)**2)
actual_minus_actual_mean = sum((y_test - y_test.mean())**2)
r2 = 1 - actual_minus_predicted/actual_minus_actual_mean
print('R²:', r2)

R²: 0.9287738984353706


In [55]:
regressor.score(X_test, y_test)

0.9287738984353705

In [56]:
regressor.score(X_train, y_train)

0.9295453761111792

#### Eq 1
$$ PE = {(-2.18AT) + 497.18} $$

#### Eq 1
$$ PE = {(-1.71 AT - 0.32 EV)+ 505.59} $$

### Eq 3

$$ PE = {(-2 AT +  0.05 AP  -0.16  RH -0.23  EV) + 463.16} $$