# Functions Module
Evaluating Wind Turbine Performance Improvement Using Regression

#### Loading the Python Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Plot Wind Power Curve Scatter and Line Plots

In [None]:
# plot scatter plots of actual wind speed and power data and line plots of synthetic power curve data
def plot_pc(scatter_x, scatter_y, titles_list, curve_x=None, curve_y=None, curve_columns=None, blue_index=4):
    plot_count = len(scatter_y)
    rows = (plot_count + 1)//2
    fig, axes = plt.subplots(rows, 2, figsize=(12, 4 * rows))
    
    # scatter plots
    for i, (x, y, title) in enumerate(zip(scatter_x, scatter_y, titles_list)):
        if plot_count > 2:
            ax = axes[i // 2, i % 2]
        else:
            ax = axes[i]
        if curve_x is not None and curve_y is not None and curve_columns is not None:
            sp_label = "actual"
        else:
            sp_label = None
        sns.scatterplot(x=x, y=y, alpha=0.1, label=sp_label, color=sns.color_palette('Blues')[blue_index], ax=ax)
        ax.set_title(title)
        ax.set_xlim(0, 25)
    
    # power curves
    if curve_x is not None and curve_y is not None:
        for i, (x, y) in enumerate(zip(curve_x, curve_y)):
            if plot_count > 2:
                ax = axes[i // 2, i % 2]
            else:
                ax = axes[i]
            if curve_columns is not None:
                # multiple power curves
                for c, col in enumerate(curve_columns):
                    sns.lineplot(x=x, y=y[col], label=str(col),
                                 color=sns.color_palette('YlOrRd', n_colors=len(curve_columns))[c], ax=ax)
            else:
                # single power curve
                sns.lineplot(x=x, y=y, color=sns.color_palette('YlOrRd')[3], ax=ax)
    
    # hide last axis if odd number of subplots
    if plot_count % 2 > 0:
        if plot_count > 2:
            ax = axes[rows-1, 1]
        else:
            ax = axes[1]
        ax.axis('off')
    
    # adjust spacing between subplots
    plt.tight_layout()

    # show plots
    plt.show()

### Wind Power Curve Sigmoid Hyperbolic Tangent Equation

It can be observed that the Power vs WindSpeed scatter plots generally concentrate on an S-shaped curve following a ***sigmoid*** function. There are several variations of the sigmoid function (e.g. *logistic function*). After plotting a variety of modified sigmoid functions over actual wind turbine scatter plots, using ***hyperbolic tangent (tanh)*** with the wind speed expressed in a ***fourth-degree polynomial*** was chosen to fit best based on visual approximation.

$$ \hat{power} = maxpower*tanh((a*windspeed+b)^4) $$

<sub> Reference: Wood, T. (n.d.). Sigmoid Function. DeepAI: Machine Learning Glossary and Terms. Retrieved from https://deepai.org/machine-learning-glossary-and-terms/sigmoid-function </sub>

In [None]:
# define the wind power curve sigmoid tanh equation between Power and WindSpeed
def wpc_equation(params, x):
    a, b = params
    return capacity * np.tanh((a*x + b)**4)

### Quantile Regression Error Function
*Quantile Regression* fits the line with the minimum total absolute errors with weights based on the chosen quantile, unlike linear regression which is just based on the ordinary least square error. Using quantiles, extreme values have less impact on quantile regression.

The error function is given by:

$$
\begin{equation}
  \epsilon =
    \begin{cases}
      q * (power - \hat{power}) & \text{if $ (power - \hat{power}) \ge 0$}\\
      (q-1) * (power - \hat{power}) & \text{if $ (power - \hat{power}) < 0$ }\\
    \end{cases}       
\end{equation}
$$

<sub> Reference: Koenker, R., & Hallock, K. F. (2001). Quantile Regression. *Journal of Economic Perspectives*, 15(4), 143-156. </sub>

In [None]:
# define the quantile regression error function to be minimized
def err_function_qr(params, x, y, q):
    y_pred = wpc_equation(params, x)
    err = y - y_pred
    err_abs = np.where(err >= 0, q * err, (q - 1) * err)
    return np.sum(np.abs(err_abs))

### Ordinary Least Squares Error Function

In [None]:
# define the error function using ordinary least squares to be minimized
def err_function_ols(params, x, y):
    y_pred = wpc_equation(params, x)
    err = (y - y_pred)**2
    return np.sum(err)

### Create Synthetic Power Curve Data Frames

In [None]:
# create dataframes based on specified minimum, maximum, and incremental wind speed values
def create_pc_df(column_name, windspeed_min, windspeed_max, increment):
    ws_sequence = np.arange(windspeed_min, windspeed_max + increment, increment)
    return pd.DataFrame({column_name: ws_sequence})

### Outlier Filtering on Synthetic Power Curve

In [None]:
# calculate the power curves of the upper and lower bounds of the range for filtering outliers
def outlier_pc(data, tolerance=0):
    q1 = data.loc[:, qname+"25"]
    q3 = data.loc[:, qname+"75"]
    return [(1-tolerance) * (q1-1.5*(q3-q1)), (1+tolerance) * (q3+1.5*(q3-q1))]

### Outlier Filtering on Main Data

In [None]:
# calculate the upper and lower bounds of the range for filtering outliers
def outlier_filter_range(x, tolerance=0):
    q1 = wpc_equation(opt_params_bef[0], x)
    q3 = wpc_equation(opt_params_bef[1], x)
    return [(1-tolerance) * (q1-1.5*(q3-q1)), (1+tolerance) * (q3+1.5*(q3-q1))]