### Example of expected power module usage

In [50]:
import numpy as np
import pandas as pd

from scipy.interpolate import interp1d
from scada_data_analysis.modules.power_curve_preprocessing import PowerCurveFiltering as pcf

In [2]:
df = pd.read_csv('..\datasets\la-haute-borne-data-2017-2020.zip', sep=';')

In [None]:
class BinningMethod:
    def __init__(self, turbine_label, windspeed_label, power_label, data=None, cut_in_speed=3,
                 bin_interval=0.5, z_coeff=2, filter_cycle=5, method='linear') -> None:
        """
        turbine_label: column name of unique turbine identifier
        windspeed_label: column name of wind speed
        power_label: column name of active power
        data: pandas dataframe of scada data
        cut_in_speed: cut in speed of turbine
        bin_interval: Wind speed bin interval
        z_coeff: threshold of standard deviation used in filter within which operational data is considered normal
        filter_cycle: number of times to pass scada data through filter
        method: method used to calculate the expected power. Available methods are: 'linear', 'quadratic' and 'cubic'.
        Quadratic and cubic methods use spline interpolation of second and third order respectively.
        """
        self.turbine_label = turbine_label
        self.windspeed_label = windspeed_label
        self.power_label = power_label 
        self.data = data
        self.cut_in_speed = cut_in_speed
        self.bin_interval = bin_interval
        self.z_coeff = z_coeff
        self.filter_cycle = filter_cycle
        self.method = method
        
        # Initialize power curve processing class
        pc_filter = pcp.PowerCurvePreprocessing(self.turbine_label, self.windspeed_label, self.power_label,
                                                self.data, self.cut_in_speed, self.bin_interval, self.z_coeff,
                                                self.filter_cycle, self.method)

        # Get data points during normal operating conditions
        normal_df, _ = pc_filter.process()
        
        # For each turbine, get statistics of the wind speed bin and active power

In [6]:
pc_filter = pcf(turbine_label='Wind_turbine_name', windspeed_label='Ws_avg',
                power_label='P_avg', data=df, cut_in_speed=3, bin_interval=0.5,
                z_coeff=2.5, filter_cycle=5)

In [41]:
%%time
normal_df, _ = pc_filter.process()

Wall time: 5.32 s


### For each turbine, get statistics of the wind speed bin and active power

In [42]:
def binning_func(turbine_data, turbine_label='Wind_turbine_name', windspeed_label='Ws_avg',
                power_label='P_avg', data=df, cut_in_speed=3, bin_interval=0.5,
                z_coeff=2.5):
        """
        Bins wind speed into group of values using bin_interval
        
        turbine_data: pandas dataframe containing windspeed and power columns
        
        Returns: Median wind speed, average and standard deviation of produced power for each bin
        """
        turb_df = turbine_data.copy()
        max_windspeed = int(max(turb_df[windspeed_label]))
        
        windspeed_bins = pd.IntervalIndex.from_tuples([(round(bin_interval*a, 2),
                                                        round((bin_interval*a)+bin_interval, 2))\
                                                            for a in range(0, 2*max_windspeed+1)])
        
        turb_df.loc[:, 'windspeed_bin'] = pd.cut(turb_df[windspeed_label], bins=windspeed_bins)
        
        binned_turb_df = turb_df.groupby('windspeed_bin', as_index=False)[[windspeed_label,
                                                                           power_label]].agg({windspeed_label: 'median',
                                                                           power_label: ['mean', 'std']},
                                                                           as_index=False).dropna(subset=[(power_label,
                                                                           'mean')]).fillna({(power_label,
                                                                                              'std'): 0}).reset_index(drop=True)
        
        binned_turb_df.columns = ['windspeed_bin', 'windspeed_bin_median', 'pwr_bin_mean', 'pwr_bin_std']
        
        return binned_turb_df

In [43]:
def expected_power_run(dff, normal_dff, method='linear'):
    dff = dff.copy()
    normal_dff = normal_dff.copy()
    dff['expected_power'] = 0
    
    turbine_names = dff['Wind_turbine_name'].unique()
    
    for turbine_name in turbine_names:
        turb_df = dff[dff.Wind_turbine_name == turbine_name]
        turb_df_index = turb_df.index
        
        normal_turb_df = normal_dff[normal_dff.Wind_turbine_name == turbine_name]
        binned_df = binning_func(normal_turb_df)
        
        f = interp1d(binned_df.windspeed_bin_median, binned_df.pwr_bin_mean, kind=method, fill_value="extrapolate")

        dff.loc[turb_df_index, 'expected_power'] = f(turb_df.Ws_avg)
        
    dff['expected_power'].clip(0, inplace=True)
    
    return dff

In [71]:
%%time
proc_df = expected_power_run(df, normal_df, 'linear')

Wall time: 849 ms


In [72]:
proc_df.shape

(217588, 139)

In [73]:
df.shape

(217588, 138)

In [76]:
np.sum(proc_df.expected_power - proc_df.P_avg)/np.sum(proc_df.expected_power)*100

2.3472891287236393