# Linear Regression
### Imports

In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
from power_prediction.util import split_data_train_test
from power_prediction.preprocess_data import read_time_decomposition_remainder_data

### Prepare the datasets
##### Three models are being trained. In the first model, the NE5 data serves as the predictor. In the second model, the NE7 values are used, and in the final model, the combined values are used.

In [2]:
df = read_time_decomposition_remainder_data()
df['NE5_origin'] = df['NE5_remainder'] + df['NE5_yhat']
df['NE7_origin'] = df['NE7_remainder'] + df['NE7_yhat']
df['NETOT_origin'] = df['NETOT_remainder'] + df['NETOT_yhat']
train_df, test_df = split_data_train_test(df)
train_df = train_df.dropna()
test_df = test_df.dropna()

# Train data set
X_train = train_df[['Hr [%Hr]', 'RainDur [min]', 'StrGlo [W/m2]', 'T [°C]','WD [°]', 'WVs [m/s]', 'WVv [m/s]', 'p [hPa]']]
y_train_ne5 = train_df[['NE5_remainder']]
y_train_ne7 = train_df[['NE7_remainder']]
y_train_tot = train_df[['NETOT_remainder']]

# Test data set
X_test = test_df[['Hr [%Hr]', 'RainDur [min]', 'StrGlo [W/m2]', 'T [°C]','WD [°]', 'WVs [m/s]', 'WVv [m/s]', 'p [hPa]']]
y_test_ne5_all = test_df[['NE5_origin']]
y_test_ne7_all = test_df[['NE7_origin']]
y_test_tot_all = test_df[['NETOT_origin']]

y_test_ne5 = test_df[['NE5_remainder']]
y_test_ne7 = test_df[['NE7_remainder']]
y_test_tot = test_df[['NETOT_remainder']]

### Train the models

In [3]:
model_ne5 = LinearRegression()
model_ne5.fit(X_train, y_train_ne5)

model_ne7 = LinearRegression()
model_ne7.fit(X_train, y_train_ne7)

model_tot = LinearRegression()
model_tot.fit(X_train, y_train_tot)
print()




### Evaluation models
##### Model NE5

In [4]:
y_pred_ne5 = model_ne5.predict(X_test)
y_pred_ne5_comp = y_pred_ne5[:, 0] + test_df['NE5_yhat'].to_numpy()

mae = mean_absolute_error(y_test_ne5, y_pred_ne5)
mse = mean_squared_error(y_test_ne5, y_pred_ne5)
r2 = r2_score(y_test_ne5, y_pred_ne5)
mape = mean_absolute_percentage_error(y_test_ne5, y_pred_ne5)

print("Model evaluation")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R² Score: {r2}")
print(f"Mean Absolute Percentage Error (MAPE): {mape}\n")

mae_all = mean_absolute_error(y_test_ne5_all, y_pred_ne5_comp)
mse_all = mean_squared_error(y_test_ne5_all, y_pred_ne5_comp)
r2_all = r2_score(y_test_ne5_all, y_pred_ne5_comp)
mape_all = mean_absolute_percentage_error(y_test_ne5_all, y_pred_ne5_comp)

print("Over all evaluation")
print(f"Mean Absolute Error (MAE): {mae_all}")
print(f"Mean Squared Error (MSE): {mse_all}")
print(f"R² Score: {r2_all}")
print(f"Mean Absolute Percentage Error (MAPE): {mape_all}")

Model evaluation
Mean Absolute Error (MAE): 5867.675211915305
Mean Squared Error (MSE): 59338285.20038373
R² Score: 0.0038287467799986707
Mean Absolute Percentage Error (MAPE): 1.4076772188232152

Over all evaluation
Mean Absolute Error (MAE): 5867.675211915305
Mean Squared Error (MSE): 59338285.20038373
R² Score: 0.8178646586413172
Mean Absolute Percentage Error (MAPE): 0.06572603738435466


##### Model NE7

In [5]:
y_pred_ne7 = model_ne7.predict(X_test)
y_pred_ne7_comp = y_pred_ne7[:, 0] + test_df['NE7_yhat'].to_numpy()

mae = mean_absolute_error(y_test_ne7, y_pred_ne7)
mse = mean_squared_error(y_test_ne7, y_pred_ne7)
r2 = r2_score(y_test_ne7, y_pred_ne7)
mape = mean_absolute_percentage_error(y_test_ne7, y_pred_ne7)

print("Model evaluation")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R² Score: {r2}")
print(f"Mean Absolute Percentage Error (MAPE): {mape}\n")

mae_all = mean_absolute_error(y_test_ne7_all, y_pred_ne7_comp)
mse_all = mean_squared_error(y_test_ne7_all, y_pred_ne7_comp)
r2_all = r2_score(y_test_ne7_all, y_pred_ne7_comp)
mape_all = mean_absolute_percentage_error(y_test_ne7_all, y_pred_ne7_comp)

print("Over all evaluation")
print(f"Mean Absolute Error (MAE): {mae_all}")
print(f"Mean Squared Error (MSE): {mse_all}")
print(f"R² Score: {r2_all}")
print(f"Mean Absolute Percentage Error (MAPE): {mape_all}")

Model evaluation
Mean Absolute Error (MAE): 25100.894583577756
Mean Squared Error (MSE): 886304013.6436092
R² Score: 0.15665214549916817
Mean Absolute Percentage Error (MAPE): 0.31075006852964987

Over all evaluation
Mean Absolute Error (MAE): 25100.894583577756
Mean Squared Error (MSE): 886304013.6436092
R² Score: 0.8424334429512619
Mean Absolute Percentage Error (MAPE): 0.09482412001570112


##### NE5 + NE7

In [6]:
y_pred_ne5_ne7_comp = y_pred_ne5_comp + y_pred_ne7_comp
y_pred_ne5_ne7 = y_pred_ne5 + y_pred_ne7

mae = mean_absolute_error(y_test_tot, y_pred_ne5_ne7)
mse = mean_squared_error(y_test_tot, y_pred_ne5_ne7)
r2 = r2_score(y_test_tot, y_pred_ne5_ne7)
mape = mean_absolute_percentage_error(y_test_tot, y_pred_ne5_ne7)

print("Model evaluation")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R² Score: {r2}")
print(f"Mean Absolute Percentage Error (MAPE): {mape}\n")

mae_all = mean_absolute_error(y_test_tot_all, y_pred_ne5_ne7_comp)
mse_all = mean_squared_error(y_test_tot_all, y_pred_ne5_ne7_comp)
r2_all = r2_score(y_test_tot_all, y_pred_ne5_ne7_comp)
mape_all = mean_absolute_percentage_error(y_test_tot_all, y_pred_ne5_ne7_comp)

print("Over all evaluation")
print(f"Mean Absolute Error (MAE): {mae_all}")
print(f"Mean Squared Error (MSE): {mse_all}")
print(f"R² Score: {r2_all}")
print(f"Mean Absolute Percentage Error (MAPE): {mape_all}")

Model evaluation
Mean Absolute Error (MAE): 102040.00911328646
Mean Squared Error (MSE): 11163279925.594234
R² Score: -21.962901256299464
Mean Absolute Percentage Error (MAPE): 38.94846670147325

Over all evaluation
Mean Absolute Error (MAE): 102042.12403023963
Mean Squared Error (MSE): 11162896379.20598
R² Score: -1.919326820608834
Mean Absolute Percentage Error (MAPE): 0.35931507015899083


##### NETOT

In [7]:
y_pred_tot = model_tot.predict(X_test)
y_pred_tot_comp = y_pred_tot[:, 0] + test_df['NETOT_yhat'].to_numpy()

mae = mean_absolute_error(y_test_tot, y_pred_tot)
mse = mean_squared_error(y_test_tot, y_pred_tot)
r2 = r2_score(y_test_tot, y_pred_tot)
mape = mean_absolute_percentage_error(y_test_tot, y_pred_tot)

print("Model evaluation")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R² Score: {r2}")
print(f"Mean Absolute Percentage Error (MAPE): {mape}\n")

mae_all = mean_absolute_error(y_test_tot_all, y_pred_tot_comp)
mse_all = mean_squared_error(y_test_tot_all, y_pred_tot_comp)
r2_all = r2_score(y_test_tot_all, y_pred_tot_comp)
mape_all = mean_absolute_percentage_error(y_test_tot_all, y_pred_tot_comp)

print("Over all evaluation")
print(f"Mean Absolute Error (MAE): {mae_all}")
print(f"Mean Squared Error (MSE): {mse_all}")
print(f"R² Score: {r2_all}")
print(f"Mean Absolute Percentage Error (MAPE): {mape_all}")

Model evaluation
Mean Absolute Error (MAE): 16870.676055165583
Mean Squared Error (MSE): 475604155.8989019
R² Score: 0.021680783624102307
Mean Absolute Percentage Error (MAPE): 1.2909172333799848

Over all evaluation
Mean Absolute Error (MAE): 16870.676055165583
Mean Squared Error (MSE): 475604155.8989019
R² Score: 0.8756197387180754
Mean Absolute Percentage Error (MAPE): 0.06276003741754559
