In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.pylab as pl
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
%matplotlib inline

In [None]:
power_curve_df = pd.read_csv("./power_curve.csv")
power_curve_df.head()

In [None]:
fig, ax = plt.subplots(1, 1, constrained_layout=True)
fig.set_size_inches([5, 3])

ax.plot(power_curve_df["U"], power_curve_df["Power"], 'k.', markersize=0.75, rasterized=True)
ax.set_xlabel("$U_{norm}$ [--]")
ax.set_ylabel("$P_{norm}$ [--]")
ax.grid(True)

# Apply a few iterations of machine learning filtering

#### First iteration...

In [None]:
inputs = power_curve_df[["U", "TI"]].to_numpy()
output = power_curve_df["Power"].to_numpy()

print(f"Shape of the inputs: {inputs.shape}")
print(f"Shape of the output: {output.shape}")

In [None]:
model_0 = make_pipeline(MinMaxScaler(), GradientBoostingRegressor())
model_0.fit(inputs, output)
pred_0 = model_0.predict(inputs)
err_0 = np.abs(pred_0 - output)

err_0_mean = np.mean(err_0)
err_0_std = np.std(err_0)
err_0_lim = err_0_mean + 2.5 * err_0_std

In [None]:
fig, axes = plt.subplots(1, 2, constrained_layout=True, sharex="all")
fig.set_size_inches([8, 3])

axes[0].plot(inputs[:, 0], err_0, 'k.', markersize=0.75, rasterized=True)
axes[0].axhline(err_0_mean, color="red", linestyle='-', linewidth=2, label="Mean")
axes[0].axhline(err_0_lim, color="red", linestyle='--', linewidth=1, label="St. Dev.")
axes[0].legend()

axes[1].plot(
    inputs[:, 0][err_0 < err_0_lim],
    output[err_0 < err_0_lim],
    'k.', markersize=1.25, rasterized=True,
    label="Inlier"
)
axes[1].plot(
    inputs[:, 0][err_0 >= err_0_lim],
    output[err_0 >= err_0_lim],
    'rx', markersize=1.5, rasterized=True,
    label="Outlier"
)
axes[1].legend()

axes[0].set_ylabel("$|P_{pred}-P_{true}|$ [--]")
axes[1].set_ylabel("$P_{norm}$")
fig.supxlabel("$U_{norm}$ [--]")

for ax in axes:
    ax.grid(True)

#### Second iteration...

In [None]:
inputs_filt_1 = inputs[err_0 < err_0_lim]
output_filt_1 = output[err_0 < err_0_lim]

print(f"Shape of inputs after first filtering iteration:              {inputs_filt_1.shape}")
print(f"Shape of output after first filtering iteration:              {output_filt_1.shape}")
print(f"Number of rejected points from first filtering iteration:     {output.shape[0]-output_filt_1.shape[0]}")

In [None]:
model_1 = make_pipeline(MinMaxScaler(), GradientBoostingRegressor())
model_1.fit(inputs_filt_1, output_filt_1)
pred_1 = model_1.predict(inputs_filt_1)
err_1 = np.abs(pred_1 - output_filt_1)

err_1_mean = np.mean(err_1)
err_1_std = np.std(err_1)
err_1_lim = err_1_mean + 2.5 * err_1_std

In [None]:
fig, axes = plt.subplots(1, 2, constrained_layout=True, sharex="all")
fig.set_size_inches([8, 3])

axes[0].plot(inputs_filt_1[:, 0], err_1, 'k.', markersize=0.75, rasterized=True)
axes[0].axhline(err_1_mean, color="red", linestyle='-', linewidth=2, label="Mean")
axes[0].axhline(err_1_lim, color="red", linestyle='--', linewidth=1, label="St. Dev.")
axes[0].legend()

axes[1].plot(
    inputs_filt_1[:, 0][err_1 < err_1_lim],
    output_filt_1[err_1 < err_1_lim],
    'k.', markersize=1.25, rasterized=True,
    label="Inlier"
)
axes[1].plot(
    inputs_filt_1[:, 0][err_1 >= err_1_lim],
    output_filt_1[err_1 >= err_1_lim],
    'rx', markersize=1.5, rasterized=True,
    label="Outlier"
)
axes[1].legend()

axes[0].set_ylabel("$|P_{pred}-P_{true}|$ [--]")
axes[1].set_ylabel("$P_{norm}$")
fig.supxlabel("$U_{norm}$ [--]")

for ax in axes:
    ax.grid(True)

In [None]:
final_inputs = inputs_filt_1[err_1 < err_1_lim, :]
final_output = output_filt_1[err_1 < err_1_lim]

In [None]:
print(f"Shape of inputs after first filtering iteration:              {final_inputs.shape}")
print(f"Shape of output after first filtering iteration:              {final_output.shape}")
print(f"Number of rejected points from first filtering iteration:     {output_filt_1.shape[0]-final_output.shape[0]}")

# Investigate physics via machine learning...

#### Question: how does TI impact power production?

In [None]:
print(f"Minimum TI: {power_curve_df["TI"].quantile(0.05)}")
print(f"Maximum TI: {power_curve_df["TI"].quantile(0.95)}")

In [None]:
investigate_ti_vals = np.array([5, 10, 15, 20, 25, 30])

base_model_input = final_inputs[:, 0].reshape(-1, 1)
output = final_output

base_model = make_pipeline(MinMaxScaler(), GradientBoostingRegressor())
base_model.fit(base_model_input, output)

full_model_inputs = final_inputs

full_model = make_pipeline(MinMaxScaler(), GradientBoostingRegressor())
full_model.fit(full_model_inputs, output)

In [None]:
ws_vals = np.linspace(np.amin(base_model_input[:, 0]), np.amax(base_model_input[:, 0]), 100).reshape(-1, 1)
base_power_pred = base_model.predict(ws_vals)

ti_preds = np.full((ws_vals.shape[0], investigate_ti_vals.shape[0]), np.nan)

for i in range(investigate_ti_vals.shape[0]):
    inputs = np.concatenate([ws_vals, np.full((ws_vals.shape[0], 1), investigate_ti_vals[i])], axis=-1)
    ti_preds[:, i] = full_model.predict(inputs)

In [None]:
cmap = pl.cm.coolwarm
colors = cmap(np.linspace(0, 1, investigate_ti_vals.shape[0]))

fig, ax = plt.subplots(1, 1, constrained_layout=True)
fig.set_size_inches([5, 3])

for i in range(investigate_ti_vals.shape[0]):
    ax.plot(ws_vals, ti_preds[:, i], '-', linewidth=1.5, color=colors[i])
ax.plot(ws_vals, base_power_pred, 'k--', linewidth=3, label="Basic Pred.")

norm = pl.Normalize(vmin=investigate_ti_vals[0],
                    vmax=investigate_ti_vals[-1])
sm = pl.cm.ScalarMappable(cmap=cmap, norm=norm)
fig.colorbar(sm, label="$TI$ [%]", ax=ax)

ax.grid(True)
ax.set_xlabel("$U_{norm}$ [--]")
ax.set_ylabel("$P_{norm}$ [--]")
ax.legend()