Wind Exploration
======

Looking at [this dataset](https://opendata-renewables.engie.com/) from [this stackexchange](https://opendata.stackexchange.com/questions/7370/sensor-data-from-wind-turbines).

Ideas:
 - Automatically detect faulty temperature sensors ?

Missing Data:
------
 - all Pas (Pitch angle setpoint)
 - Wa_c (Absolute wind direction) -- average is there though.
 - Na_c (Nacelle angle corrected) -- average is there though. However, it's NA except for a very small chunk of time.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import math

In [None]:
df = pd.read_csv("la-haute-borne-data-2017-2020.csv",delimiter=';')
df['Date_time_dt'] = pd.to_datetime(df['Date_time'],infer_datetime_format=True)
df['Wind_turbine_id'] = df['Wind_turbine_name'].map({'R80711':0, 'R80721':1, 'R80736':2, 'R80790':3})
df = df.sort_values(by='Date_time_dt')
#df = df[:5000]

In [None]:
data_description = pd.read_csv('data_description.csv',delimiter=';')
data_description.set_index('Variable_name',inplace=True)
data_description = data_description.to_dict(orient='index')
#print(data_description)

data_labels = {}
for k in data_description.keys() :
    data_labels[k] = data_description[k]['Variable_long_name'].replace('_',' ')
    unit = data_description[k]['Unit_long_name']
    if unit and type(unit) == type('string') :
        data_labels[k] += ' [{}]'.format(data_description[k]['Unit_long_name'].replace('deg_','°'))

def GetLabel(label) :
    if not label :
        return label
    for k in data_labels.keys() :
        if k == label.replace('_avg','').replace('_std','').replace('_min','').replace('_max','') :
            label = label.replace(k,data_labels[k])
    if '_avg' in label :
        label = 'Avg ' + label.replace('_avg','')

    label = label.replace('temperature','T').replace('power','P')
    return label

temperatures = []
for k in data_labels.keys() :
    if 'temperature' in data_labels[k] :
        temperatures.append(k)

gearboxVars = []
for k in data_labels.keys() :
    if 'Gearbox' in data_labels[k] :
        gearboxVars.append(k)

generatorVars = []
for k in data_labels.keys() :
    if 'Generator' in data_labels[k] :
        generatorVars.append(k)

windVars = []
for k in data_labels.keys() :
    if 'Wind' in data_labels[k] or 'Vane' in data_labels[k] :
        windVars.append(k)

# Print out all of the data types:
for k in temperatures :
    print(k,data_labels[k])
print()
for k in gearboxVars :
    print(k,data_labels[k])
print()
for k in generatorVars :
    print(k,data_labels[k])
print()
for k in windVars :
    print(k,data_labels[k])
print()
for k in data_labels.keys() :
    if k in temperatures + gearboxVars + generatorVars + windVars :
        continue
    print(k,data_labels[k])

Temperatures
=======

Generator Temperatures
-------

 - Dst Generator stator temperature [°C]
 - DCs Generator converter speed [rpm]
 - Ds Generator speed [rpm]
 - Db1t Generator bearing 1 temperature [°C]
 - Db2t Generator bearing 2 temperature [°C]

In [None]:
temperatures = ['Ds','Ot','Yt','Dst','Db1t','Db2t']
avg_temperatures = list(a + '_avg' for a in temperatures)

pairplot = sns.pairplot(df[avg_temperatures],
                        plot_kws=dict(marker="+", linewidth=1),diag_kind="kde")
pairplot.map_lower(sns.kdeplot, levels=4, color=".2")
pairplot.fig.set_size_inches(15,15)

for axis in pairplot.axes.flatten() :
    axis.set_ylabel(GetLabel(axis.get_ylabel()))
    axis.set_xlabel(GetLabel(axis.get_xlabel()))

In [None]:
# Git Gearbox inlet temperature [°C]
# Ot Outdoor temperature [°C]
# Dst Generator stator temperature [°C]
# Yt Nacelle temperature [°C]
# Gost Gearbox oil sump temperature [°C]
# Gb2t Gearbox bearing 2 temperature [°C]
# Db2t Generator bearing 2 temperature [°C]
# Rt Hub temperature [°C]
# Gb1t Gearbox bearing 1 temperature [°C]
# Db1t Generator bearing 1 temperature [°C]
# Rbt Rotor bearing temperature [°C]

temperatures = ['Ot','Gb1t','Db1t','Yt','Rt','Rbt','Dst','Git','Gost','P']
avg_temperatures = list(a + '_avg' for a in temperatures)

pairplot = sns.pairplot(df[avg_temperatures],
                        plot_kws=dict(marker="+", linewidth=1),diag_kind="kde")
pairplot.map_lower(sns.kdeplot, levels=4, color=".2")
pairplot.fig.set_size_inches(15,15)

for axis in pairplot.axes.flatten() :
    axis.set_ylabel(GetLabel(axis.get_ylabel()))
    axis.set_xlabel(GetLabel(axis.get_xlabel()))

In [None]:
variables = ['Dst','Ds']
avg_variables = list(a + '_avg' for a in variables)

pairplot = sns.pairplot(df[avg_variables],
                        plot_kws=dict(marker="+", linewidth=1),diag_kind="kde")
pairplot.map_lower(sns.kdeplot, levels=4, color=".2")
pairplot.fig.set_size_inches(15,15)

for axis in pairplot.axes.flatten() :
    axis.set_ylabel(GetLabel(axis.get_ylabel()))
    axis.set_xlabel(GetLabel(axis.get_xlabel()))

Generator Variables
----------

 - Dst Generator stator temperature [°C]
 - DCs Generator converter speed [rpm]
 - Ds Generator speed [rpm]
 - Db2t Generator bearing 2 temperature [°C]
 - Db1t Generator bearing 1 temperature [°C]

In [None]:
variables = ['Ds','Dst','Db1t','Db2t']
avg_variables = list(a + '_avg' for a in variables)

pairplot = sns.pairplot(df[avg_variables],
                        plot_kws=dict(marker="+", linewidth=1),diag_kind="kde")
pairplot.map_lower(sns.kdeplot, levels=4, color=".2")
pairplot.fig.set_size_inches(15,15)

for axis in pairplot.axes.flatten() :
    axis.set_ylabel(GetLabel(axis.get_ylabel()))
    axis.set_xlabel(GetLabel(axis.get_xlabel()))

Wind speed
=======

In [None]:
# Q Reactive power [kVAr]
# Ws Wind speed [m/s]
# Va2 Vane position 2 [deg]
# Ws2 Wind speed 2 [m/s]
# Nf Grid frequency [Hz]
# Nu Grid voltage [V]
# Wa_c Absolute wind direction corrected [deg]
# DCs Generator converter speed [rpm]
# Na_c Nacelle angle corrected [deg]
# Ya Nacelle angle [deg]
# Rm Torque [Nm]
# Rs Rotor speed [rpm]
# Wa Absolute wind direction [deg]
# Ba Pitch angle [deg]
# Ds Generator speed [rpm]
# Va Vane position [deg]
# Cm Converter torque [Nm]
# Ws1 Wind speed 1 [m/s]
# S Apparent power [kVA]
# P Active power [kW]
# Cosphi Power factor
# Va1 Vane position 1 [deg]
# Pas Pitch angle setpoint

variables = ['P','Ws','Ds','Rs','Cm','Rm']
avg_variables = list(a + '_avg' for a in variables)

pairplot = sns.pairplot(df[avg_variables],
                        plot_kws=dict(marker="+", linewidth=1),diag_kind="kde")
pairplot.map_lower(sns.kdeplot, levels=4, color=".2")
pairplot.fig.set_size_inches(15,15)

for axis in pairplot.axes.flatten() :
    axis.set_ylabel(GetLabel(axis.get_ylabel()))
    axis.set_xlabel(GetLabel(axis.get_xlabel()))

In [None]:
df_tmp = df[df['Rm_avg'] > 0][:]
df_tmp['Torque - Power'] = 0.19*df_tmp['Rm_avg'] - df_tmp['P_avg']

variables = ['S','Rm']
avg_variables = list(a + '_avg' for a in variables)
avg_variables.append('Torque - Power')

pairplot = sns.pairplot(df_tmp[avg_variables],
                        plot_kws=dict(marker="+", linewidth=1),diag_kind="kde")
pairplot.map_lower(sns.kdeplot, levels=4, color=".2")
pairplot.fig.set_size_inches(15,15)

for axis in pairplot.axes.flatten() :
    axis.set_ylabel(GetLabel(axis.get_ylabel()))
    axis.set_xlabel(GetLabel(axis.get_xlabel()))

In [None]:
fig,ax = plt.subplots(2,2,figsize=(16,12))
ax[0][0].scatter(df['Ws1_avg'],df['P_avg'],c=df['Wind_turbine_id'])
ax[0][0].set_xlabel('Average wind speed')
ax[0][0].set_ylabel('Average P')

ax[1][0].scatter(df['P_avg'],df['Q_avg'],c=df['Wind_turbine_id'])
ax[1][0].set_xlabel('Average P')
ax[1][0].set_ylabel('Average S')

ax[0][1].scatter(df['P_avg'],df['Q_avg'],c=df['Wind_turbine_id'])
ax[0][1].set_xlabel('Average P')
ax[0][1].set_ylabel('Average S')

scatter = ax[1][1].scatter(df['Ws1_avg'],df['S_avg'],c=df['Wind_turbine_id'])
ax[1][1].set_xlabel('Average P')
ax[1][1].set_ylabel('Average S')
#ax[1][1].legend()

legend1 = ax[1][1].legend(*scatter.legend_elements(),loc="upper left", title="turbine")
ax[1][1].add_artist(legend1)

# ax[1][1].scatter(df[df['Wind_turbine_id'] == 0]['Ws1_avg'],
#               df[df['Wind_turbine_id'] == 0]['Q_avg'],
#               c=df[df['Wind_turbine_id'] == 0]['Wind_turbine_id'])
# ax[1][1].set_xlabel('Average wind speed')
# ax[1][1].set_ylabel('Average Q')

In [None]:
fig,ax = plt.subplots(figsize=(8,6))

def plotPowerCurve(_ax,_df,turbine_id) :
    power = 'P_avg'
    df_tmp = _df[_df['Wind_turbine_id'] < 3][['Ws_avg',power]]
    bins = np.linspace(0,20,101)
    bin_centers = bins[:-1] + np.diff(bins) / 2
    df_tmp['Ws_bin'] = pd.cut(df_tmp['Ws_avg'],bins,labels=bin_centers)
    gb = df_tmp.groupby('Ws_bin')
    gb.mean()[power]
    _ax.errorbar(gb.groups.keys(),gb.mean()[power],yerr=gb.std()[power], fmt='o',label='Turbine %d'%(turbine_id))
    return
#ax.scatter(df_tmp.groupby('Ws_bin').index,df_tmp.groupby('Ws_bin').mean())
#ax.errorbar(gb.groups.keys(),gb.mean()['P_avg'],yerr=gb.std()['P_avg'], fmt='o',label='asdf')

plotPowerCurve(ax,df,0)
#plotPowerCurve(ax,df,1)
#plotPowerCurve(ax,df,2)
#plotPowerCurve(ax,df,3)
ax.legend()

Understanding the Nacelle temperature
-------

If $T_y = P\times R$, what is $P$ ?

In [None]:
variables = ['Yt','Ot','S']
df = df[:]
df['Yt - Ot'] = df['Yt_avg'] - df['Ot_avg']
avg_variables = list(a + '_avg' for a in variables)
avg_variables.append('Yt - Ot')

pairplot = sns.pairplot(df[avg_variables],
                        plot_kws=dict(marker="+", linewidth=1),diag_kind="kde")
pairplot.map_lower(sns.kdeplot, levels=4, color=".2")
pairplot.fig.set_size_inches(15,15)

for axis in pairplot.axes.flatten() :
    axis.set_ylabel(GetLabel(axis.get_ylabel()))
    axis.set_xlabel(GetLabel(axis.get_xlabel()))

In [None]:
fig,ax = plt.subplots(figsize=(16,6))

tmp_df = df[df['Wind_turbine_id'] == 0]

#ax.plot(tmp_df['Date_time_dt'],tmp_df['Va_avg'].fillna(-99) + 0)
#ax.plot(tmp_df['Date_time_dt'],tmp_df['Va1_avg'].fillna(-99) + 5)
#ax.plot(tmp_df['Date_time_dt'],tmp_df['Va2_avg'].fillna(-99) + 10)
#ax.plot(tmp_df['Date_time_dt'],tmp_df['Va2_std'].fillna(-99)-tmp_df['Va1_std'].fillna(-99))

In [None]:
fig,ax = plt.subplots(figsize=(16,6))

tmp_df = df[df['Wind_turbine_id'] == 3][:2000]

tmp_df['Va_avg_composite'] = tmp_df['Va_avg'].fillna(tmp_df['Va1_avg'])
tmp_df['Va+Ya'] = (tmp_df['Va_avg_composite'] + tmp_df['Ya_avg'])%360


#ax.plot(tmp_df['Date_time_dt'],tmp_df['Va_avg_composite'].fillna(-9a99))
for i,var in enumerate('-Va_avg_composite  -Ya_avg  Wa_avg  -Ba_avg  Va+Ya'.split()) :
#for i,var in enumerate('Ws_std  Ws1_std Ws2_std'.split()) :
    if var.startswith('-') :
        continue
    marker =            'o                 v         ^         x       +     +'.split()[i]
    ax.plot(tmp_df['Date_time_dt'],tmp_df[var].fillna(-50),label=var,marker=marker)

    #ax.plot(tmp_df['Date_time_dt'],tmp_df['Ws2_std'].fillna(-4)/2.,label=var)
ax.legend()

# Angular Data - USEABLE:
# Some composite of Va (vane positions)
# Ya Nacelle angle [deg]
# Wa Absolute wind direction [deg]
# Ba Pitch angle [deg] -- useable, but maybe not useful? Not sure.

# Angular Data - UNUSEABLE:
# Wa_c Absolute wind direction corrected [deg]: average exists, but this variable is only available for ~2 months.
# Na_c Nacelle angle corrected [deg]: average exists, but this variable is only available for ~2 months.
# Ws Wind speed [m/s]: It is the average of Ws1 and Ws2...
#   - ... the problem is that sometimes one reading is wrong, and therefore averaging is not great.
#   - Note also that the standard deviation is the average of standard deviations, which is again quite weird.