In [None]:
# ! module load mambaforge
# ! mamba create -n wind_forecasting_env python=3.12
# ! mamba activate wind_forecasting_env
# ! conda install -c conda-forge jupyterlab mpi4py impi_rt
# ! pip install ./OpenOA # have to change pyproject.toml to allow for python 3.12.7
# ! pip install floris polars windrose netCDF4 statsmodels h5pyd seaborn pyarrow

#%load_ext memory_profiler
from data_loader import DataLoader
from data_filter import DataFilter
from data_inspector import DataInspector
from openoa.utils import qa, plot, filters, power_curve
import polars.selectors as cs
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
from sys import platform
import os

## Print NetCDF Data Structure, Load Data, Transform Datetime Columns

In [2]:
PLOT = False
RELOAD_DATA = False

if platform == "darwin":
    DATA_DIR = "/Users/ahenry/Documents/toolboxes/wind_forecasting/examples/data"
    PL_SAVE_PATH = "/Users/ahenry/Documents/toolboxes/wind_forecasting/examples/data/short_kp.turbine.zo2.b0.raw.parquet"
    FILE_SIGNATURE = "kp.turbine.z02.b0.20220301.*.*.nc"
    MULTIPROCESSOR = "cf"
    TURBINE_INPUT_FILEPATH = "/Users/ahenry/Documents/toolboxes/wind_forecasting/examples/inputs/ge_282_127.yaml"
    FARM_INPUT_FILEPATH = "/Users/ahenry/Documents/toolboxes/wind_forecasting/examples/inputs/gch_KP_v4.yaml"
elif platform == "linux":
    DATA_DIR = "/pl/active/paolab/awaken_data/kp.turbine.z02.b0/"
    PL_SAVE_PATH = "/scratch/alpine/aohe7145/awaken_data/kp.turbine.zo2.b0.raw.parquet"
    FILE_SIGNATURE = "kp.turbine.z02.b0.20220301.*.*.nc"
    MULTIPROCESSOR = "mpi"
    TURBINE_INPUT_FILEPATH = "/projects/aohe7145/toolboxes/wind-forecasting/examples/inputs/ge_282_127.yaml"
    FARM_INPUT_FILEPATH = "/projects/aohe7145/toolboxes/wind-forecasting/examples/inputs/gch_KP_v4.yaml"

DT = 5

data_loader = DataLoader(data_dir=DATA_DIR, file_signature=FILE_SIGNATURE, multiprocessor=MULTIPROCESSOR, save_path=PL_SAVE_PATH, dt=DT,
                         features=["turbine_id", "time", "turbine_status", "wind_direction", "wind_speed", "power_output", "nacelle_direction"])

In [None]:
data_loader.print_netcdf_structure(data_loader.file_paths[0])

In [4]:
if not RELOAD_DATA and os.path.exists(data_loader.save_path):
    # Note that the order of the columns in the provided schema must match the order of the columns in the CSV being read.
    schema = pl.Schema({"turbine_id": pl.String(),
                        "time": pl.Datetime(time_unit="ms"),
                        "turbine_status": pl.Float64,
                        "wind_direction": pl.Float64,
                        "wind_speed": pl.Float64,
                        "power_output": pl.Float64,
                        "nacelle_direction": pl.Float64,
                       })
    
    df_query = pl.scan_parquet(source=data_loader.save_path, hive_schema=schema)
else:
    df_query = data_loader.read_multi_netcdf()

## Plot Wind Farm, Data Distributions

In [5]:
data_inspector = DataInspector(turbine_input_filepath=TURBINE_INPUT_FILEPATH, farm_input_filepath=FARM_INPUT_FILEPATH)

In [6]:
if PLOT:
    data_inspector.plot_wind_farm()

In [7]:
if PLOT:
    data_inspector.plot_wind_speed_power(df_query, turbine_ids=["wt073"])

In [8]:
if PLOT:
    data_inspector.plot_wind_speed_weibull(df_query, turbine_ids=["wt073"])

In [9]:
if PLOT:
    data_inspector.plot_wind_rose(df_query, turbine_ids=["wt073"])

In [10]:
if PLOT:
    data_inspector.plot_correlation(df_query, ["wind_speed", "wind_direction", "nacelle_direction"])

In [11]:
if PLOT:
    data_inspector.plot_boxplot_wind_speed_direction(df_query, turbine_ids=["wt073"])

In [12]:
if PLOT:
    data_inspector.plot_time_series(df_query, turbine_ids=["wt073"])

## OpenOA Data Preparation & Inspection

In [13]:
def collect_data(df, features=None, mask=None, to_pandas=True):

    if features is not None:
        df = df.select(features)
    
    if mask is not None:
        df = df.filter(mask)

    if to_pandas:
        return df.collect(streaming=True).to_pandas()
    else:
        return df.collect(streaming=True)

In [None]:
print(f"Features of interest = {data_loader.features}")
print(f"Available features = {df_query.columns}")
qa.describe(collect_data(df=df_query))

In [15]:
if PLOT:
    plot.column_histograms(collect_data(df=df_query, features=["wind_speed", "wind_direction", "power_output", "nacelle_direction"]))

In [16]:
data_filter = DataFilter(turbine_availability_col=None, turbine_status_col="turbine_status")

### Unresponsive Sensor Filter

In [None]:
ws_frozen_sensor = filters.unresponsive_flag(data=collect_data(df=df_query, features="wind_speed"), threshold=3).values[:, 0]
wd_frozen_sensor = filters.unresponsive_flag(data=collect_data(df=df_query, features="wind_direction"), threshold=3).values[:, 0]
pwr_frozen_sensor = filters.unresponsive_flag(data=collect_data(df=df_query, features="power_output"), threshold=3).values[:, 0]

qa.describe(pl.concat([collect_data(df=df_query, features="wind_speed", mask=ws_frozen_sensor, to_pandas=False),
                             collect_data(df=df_query, features="wind_direction", mask=wd_frozen_sensor, to_pandas=False),
                             collect_data(df=df_query, features="power_output", mask=pwr_frozen_sensor, to_pandas=False)], how="horizontal")\
                                .to_pandas())

In [18]:
# print(df_query.collect(streaming=True).shape)

In [19]:
if PLOT:
    plot.plot_power_curve(
        collect_data(df=df_query, features="wind_speed"),
        collect_data(df=df_query, features="power_output"),
        flag=ws_frozen_sensor,
        flag_labels=(f"Wind Speed Unresponsive Sensors (n={ws_frozen_sensor.sum():,.0f})", "Normal Turbine Operations"),
        xlim=(-1, 15),  # optional input for refining plots
        ylim=(-100, 3000),  # optional input for refining plots
        legend=True,  # optional flag for adding a legend
        scatter_kwargs=dict(alpha=0.4, s=10)  # optional input for refining plots
    )

    plot.plot_power_curve(
        collect_data(df=df_query, features="wind_speed"),
        collect_data(df=df_query, features="power_output"),
        flag=wd_frozen_sensor,
        flag_labels=(f"Wind Direction Unresponsive Sensors (n={wd_frozen_sensor.sum():,.0f})", "Normal Turbine Operations"),
    xlim=(-1, 15),  # optional input for refining plots
    ylim=(-100, 3000),  # optional input for refining plots
    legend=True,  # optional flag for adding a legend
    scatter_kwargs=dict(alpha=0.4, s=10)  # optional input for refining plots
    )

    plot.plot_power_curve(
    collect_data(df=df_query, features="wind_speed"),
        collect_data(df=df_query, features="power_output"),
        flag=pwr_frozen_sensor,
        flag_labels=(f"Power Output Unresponsive Sensors (n={pwr_frozen_sensor.sum():,.0f})", "Normal Turbine Operations"),
        xlim=(-1, 15),  # optional input for refining plots
        ylim=(-100, 3000),  # optional input for refining plots
        legend=True,  # optional flag for adding a legend
        scatter_kwargs=dict(alpha=0.4, s=10)  # optional input for refining plots
    )

In [20]:
# change the values corresponding to frozen sensor measurements to null or interpolate (instead of dropping full row, since other sensors could be functioning properly)

df_query = df_query.with_columns(
                pl.when(~ws_frozen_sensor).then(pl.col("wind_speed")).alias("wind_speed"), 
                pl.when(~wd_frozen_sensor).then(pl.col("wind_direction")).alias("wind_direction"),
                pl.when(~pwr_frozen_sensor).then(pl.col("power_output")).alias("power_output")
                )

In [21]:
# print(df_query.collect(streaming=True).shape)

In [22]:
del ws_frozen_sensor
del wd_frozen_sensor
del pwr_frozen_sensor

In [23]:
df_query = data_filter.resolve_missing_data(df_query, features=["wind_speed", "wind_direction", "power_output"], how="forward_fill")

In [None]:
print(df_query.collect(streaming=True).shape)

## Remove Inoperational Turbine Rows, Interpolate Null Data

In [25]:
# TODO QUESTION ERIC will the wind speed/dir measurements from non-operational turbines still be okay
#df_query = data_filter.filter_inoperational(df_query, status_codes=[1], include_nan=True)
#print(df_query.explain(streaming=True))
#df_query.head().collect(streaming=True)
df_query = data_filter.filter_inoperational(df_query, status_codes=[1], include_nan=True)

In [None]:
print(df_query.collect(streaming=True).shape)

### Wind Speed Range Filter

In [None]:
# TODO QUESTION ERIC will the wind speed/dir measurements from zero power turbines still be okay
# check for wind speed values that are outside of the acceptable range
#print(df_query.collect().shape)
#print(collect_data(df=df_query, features="wind_speed").shape)
#print(df_query.collect().shape)
ws = collect_data(df=df_query, features="wind_speed")
out_of_range = (filters.range_flag(ws, lower=0, upper=70)  & ~ws.isna()).values[:, 0]
del ws
#df_query.filter(out_of_range).collect(streaming=True)
qa.describe(collect_data(df=df_query, features="wind_speed", mask=out_of_range))

In [None]:
print(df_query.collect(streaming=True).shape)

In [29]:
#x = df_query.collect()
#print(x.shape)
#print(out_of_range.shape)

df_query = df_query.filter(~out_of_range)
#df_query.collect(streaming=True)

In [None]:
print(df_query.collect(streaming=True).shape)

In [31]:
del out_of_range

In [None]:
print(df_query.collect(streaming=True).shape)

### Power Curve Window Range Filter

In [None]:
# apply a window range filter to remove data with power values outside of the window from 20 to 2100 kW for wind speeds between 5 and 40 m/s.
ser = df_query.select(["wind_speed", "power_output"]).collect(streaming=True).to_pandas()

out_of_window = filters.window_range_flag(window_col=ser["wind_speed"],
                                         window_start=5., window_end=40., 
                                         value_col=ser["power_output"],
                                         value_min=20., value_max=3000.).values
qa.describe(collect_data(df=df_query, features=["wind_speed", "power_output"], mask=out_of_window))

In [None]:
print(df_query.collect(streaming=True).shape)

In [35]:
if PLOT:
    # plot values that are outside of power-wind speed range
    plot.plot_power_curve(
        collect_data(df_query, "wind_speed"),
        collect_data(df_query, "power_output"),
        flag=out_of_window,
        flag_labels=("Outside Acceptable Window", "Acceptable Power Curve Points"),
        xlim=(-1, 15),
        ylim=(-100, 3000),
        legend=True,
        scatter_kwargs=dict(alpha=0.4, s=10)
    )

In [36]:
#  TODO ASK ERIC doesn't matter since we only care about ws/ws? remove rows corresponding to values that are outside of power-wind speed window range
# remove rows corresponding to values that are outside of power-wind speed window range
df_query = df_query.filter(~out_of_window)

In [None]:
print(df_query.collect(streaming=True).shape)

In [38]:
del out_of_window

In [None]:
print(df_query.collect(streaming=True).shape)

### Power Curve Bin Filter

In [None]:
# apply a bin filter to remove data with power values outside of an envelope around median power curve at each wind speed
max_bin = 0.90 * df_query.select("power_output").max().collect(streaming=True).item()
bin_outliers = filters.bin_filter(bin_col=collect_data(df_query, "power_output")["power_output"], 
                                  value_col=collect_data(df_query, "wind_speed")["wind_speed"], 
                                  bin_width=50, threshold=3,
                                  center_type="median", 
                                  bin_min=20., bin_max=max_bin,
                                  threshold_type="scalar", direction="below",
                                  ).to_numpy()
qa.describe(collect_data(df=df_query, features=["wind_speed", "power_output"], mask=bin_outliers))

In [None]:
if PLOT:
    # plot values outside the power-wind speed bin filter
    plot.plot_power_curve(
        collect_data(df=df_query, features="wind_speed"),
        collect_data(df=df_query, features="power_output"),
        flag=bin_outliers,
        flag_labels=("Anomylous Data", "Normal Wind Speed Sensor Operation"),
        xlim=(-1, 15),
        ylim=(-100, 3000),
        legend=True,
        scatter_kwargs=dict(alpha=0.4, s=10)
    )

In [41]:
# remove rows corresponding to values that are outside of power-wind speed bins 
df_query = df_query.filter(~bin_outliers)

### Power Curve Fitting

In [42]:
# Fit the power curves
iec_curve = power_curve.IEC(collect_data(df=df_query, features="wind_speed")["wind_speed"], collect_data(df=df_query, features="power_output")["power_output"])
l5p_curve = power_curve.logistic_5_parametric(collect_data(df=df_query, features="wind_speed")["wind_speed"], collect_data(df=df_query, features="power_output")["power_output"])
spline_curve = power_curve.gam(collect_data(df=df_query, features="wind_speed")["wind_speed"], collect_data(df=df_query, features="power_output")["power_output"], n_splines=20)

In [None]:
if PLOT:
    fig, ax = plot.plot_power_curve(
        collect_data(df=df_query, features="wind_speed"),
        collect_data(df=df_query, features="power_output"),
        flag=np.zeros(collect_data(df=df_query, features="wind_speed").shape, dtype=bool),
        flag_labels=("", "Filtered Power Curve"),
        xlim=(-1, 15),  # optional input for refining plots
        ylim=(-100, 3000),  # optional input for refining plots
        legend=False,  # optional flag for adding a legend
        scatter_kwargs=dict(alpha=0.4, s=10),  # optional input for refining plots
        return_fig=True,
    )

    x = np.linspace(0, 20, 100)
    ax.plot(x, iec_curve(x), color="red", label = "IEC", linewidth = 3)
    ax.plot(x, spline_curve(x), color="C1", label = "Spline", linewidth = 3)
    ax.plot(x, l5p_curve(x), color="C2", label = "L5P", linewidth = 3)

    ax.legend()

    fig.tight_layout()
    plt.show()

## Nacelle Calibration

### Find and correct wind direction offsets from median wind plant wind direction for each turbine

In [None]:
import pandas as pd


In [None]:

turbine_ids = df_query.select("turbine_id").unique().collect(streaming=True).to_numpy()[:, 0]

# add the 3 degrees back to the wind direction signal
offset = 3.0
df_query2 = df_query.with_columns((pl.col("wind_direction") + 3.0 % 360.0).alias("wind_direction"))

# TODO make sure that all power values are >= 0 at this point
wd_median = collect_data(df_query2, ["time", "turbine_id", "wind_direction"]).groupby("time")["wind_direction"].median()
wd_median = np.degrees(np.arctan2(np.sin(np.radians(wd_median)), np.cos(np.radians(wd_median))))
wd_median = pd.concat([
    collect_data(df_query2, ["time", "turbine_id", "wind_direction", "power_output"])\
    .pivot(index="time", columns="turbine_id", values="power_output")\
        .rename(columns={old_col: f"power_output_{old_col}" for old_col in turbine_ids}), 
    wd_median], axis=1)

yaw_median = collect_data(df_query2, ["time", "turbine_id", "nacelle_direction"]).groupby("time")["nacelle_direction"].median()
yaw_median = np.degrees(np.arctan2(np.sin(np.radians(yaw_median)), np.cos(np.radians(yaw_median))))
yaw_median = pd.concat([
    collect_data(df_query2, ["time", "turbine_id", "wind_direction", "power_output"])\
    .pivot(index="time", columns="turbine_id", values="power_output")\
        .rename(columns={old_col: f"power_output_{old_col}" for old_col in turbine_ids}), 
    yaw_median], axis=1)

fig, ax = plt.subplots(1, 1)
for turbine_id in turbine_ids:
    df = collect_data(df=df_query2, 
                        features=["time", "turbine_id", "wind_direction", "power_output"], 
                        mask=((pl.col("turbine_id") == turbine_id) & (pl.col("power_output") >= 0)))
                        
    ax.plot(df["time"], DataFilter.wrap_180(
                        df.pivot(index="time", columns="turbine_id", values="wind_direction").values 
                        - wd_median.loc[(wd_median[f"power_output_{turbine_id}"] >= 0), "wind_direction"].values[:, np.newaxis]),
                        label=f"{turbine_id}")

ax.legend()
ax.set_xlabel("Time (s)")
ax.set_ylabel("Wind Direction - Median Wind Direction (deg)")

ax.set_title("Original")

In [None]:
#df_query2.select(["time", "turbine_id", "wind_direction", "power_output", "nacelle_direction"]).filter(((pl.col("turbine_id") == turbine_id) & (pl.col("power_output") >= 0))).collect(streaming=True)
# collect_data(df=df_query2, 
#                         features=["time", "turbine_id", "wind_direction", "power_output", "nacelle_direction"], 
#                         mask=((pl.col("turbine_id") == turbine_id) & (pl.col("power_output") >= 0)))

In [None]:
df_offsets = {"turbine_id": [], "northing_bias": []}

# remove biases from median direction
for turbine_id in turbine_ids:
    df = collect_data(df=df_query2, 
                        features=["time", "turbine_id", "wind_direction", "power_output", "nacelle_direction"], 
                        mask=((pl.col("turbine_id") == turbine_id) & (pl.col("power_output") >= 0)))

    if (turbine_id == f"wt_001"):
        wd_bias = DataFilter.wrap_180(DataFilter.circ_mean(df.loc[(df['time'] >= "2021-08-03 19:20"), "wind_direction"] 
            - wd_median.loc[(df['time'] >= "2021-08-03 19:20") & (wd_median[f'power_output_{turbine_id}'] >= 0)]))
        yaw_bias = DataFilter.wrap_180(DataFilter.circ_mean(df.loc[(df['time'] >= "2021-08-03 19:20"), "nacelle_direction"] 
        - yaw_median.loc[(df['time'] >= "2021-08-03 19:20") & (yaw_median[f'power_output{turbine_id}' % i] >= 0)]))
    else:
        wd_bias = DataFilter.wrap_180(DataFilter.circ_mean(df["wind_direction"] - wd_median.loc[wd_median[f"power_output_{turbine_id}"] >= 0, "wind_direction"]))
        yaw_bias = DataFilter.wrap_180(DataFilter.circ_mean(df["nacelle_direction"] - yaw_median.loc[yaw_median[f"power_output_{turbine_id}"] >= 0, "nacelle_direction"]))

    df_offsets["turbine_id"].append(turbine_id)
    df_offsets["northing_bias"].append(np.round(0.5 * (wd_bias + yaw_bias), 2))
        
    if (turbine_id != f"wt_040"):
        df.loc[df["turbine_id"] == turbine_id, "wind_direction"] = (df.loc[df["turbine_id"] == turbine_id, "wind_direction"] - 0.5 * (wd_bias + yaw_bias)) % 360
        df.loc[df["turbine_id"] == turbine_id, "nacelle_direction"] = (df.loc[df["turbine_id"] == turbine_id, "nacelle_direction"] - 0.5 * (wd_bias + yaw_bias)) % 360
        print(f"Turbine {turbine_id} bias from median wind direction: {np.round(0.5 * (wd_bias + yaw_bias), 2)} deg.")

df_offsets = pd.DataFrame(df_offsets)
# handle special case of turbine 39 with a couple change points
"""
tid = "wd_040"
df = collect_data(df=df_query2, 
                        features=["time", "turbine_id", "wind_direction", "power_output", "nacelle_direction"], 
                        mask=((pl.col("turbine_id") == tid) & (pl.col("power_output") >= 0)))
wd_bias_1 = DataFilter.wrap_180(DataFilter.circ_mean(df.loc[
    (df['time'] <= "2021-06-09 19:30"), "wind_direction"].values \
        - wd_median.loc[
            (wd_median['time'] <= "2021-06-09 19:30") 
        & (wd_median[f"power_output_{tid}"] >= 0), "wind_direction"].values))

wd_bias_2 = DataFilter.wrap_180(DataFilter.circ_mean(df.loc[
    (df['time'] >= "2021-06-09 19:40")
    & (df['time'] <= "2021-09-14 19:50"), "wind_direction"].values \
        - wd_median.loc[
            (wd_median['time'] >= "2021-06-09 19:30")
            & (wd_median['time'] <= "2021-09-14 19:50")   
        & (wd_median[f"power_output_{tid}"] >= 0), "wind_direction"].values))

wd_bias_3 = DataFilter.wrap_180(DataFilter.circ_mean(df.loc[
    (df['time'] >= "2021-09-14 20:00"), "wind_direction"].values \
        - wd_median.loc[
            (wd_median['time'] >= "2021-09-14 20:00")
        & (wd_median[f"power_output_{tid}"] >= 0), "wind_direction"].values))

yaw_bias_1 = DataFilter.wrap_180(DataFilter.circ_mean(df.loc[
    (df['time'] <= "2021-06-09 19:30"), "nacelle_direction"].values \
        - yaw_median.loc[
            (yaw_median['time'] <= "2021-06-09 19:30") 
        & (yaw_median[f"power_output_{tid}"] >= 0), "wind_direction"].values))

yaw_bias_2 = DataFilter.wrap_180(DataFilter.circ_mean(df.loc[
    (df['time'] >= "2021-06-09 19:40")
    & (df['time'] <= "2021-09-14 19:50"), "nacelle_direction"].values \
        - yaw_median.loc[
            (yaw_median['time'] >= "2021-06-09 19:30")
            & (yaw_median['time'] <= "2021-09-14 19:50")   
        & (yaw_median[f"power_output_{tid}"] >= 0), "wind_direction"].values))

yaw_bias_3 = DataFilter.wrap_180(DataFilter.circ_mean(df.loc[
    (df['time'] >= "2021-09-14 20:00"), "nacelle_direction"].values \
        - yaw_median.loc[
            (yaw_median['time'] >= "2021-09-14 20:00")
        & (yaw_median[f"power_output_{tid}"] >= 0), "wind_direction"].values))

cond = (df['time'] <= "2021-06-09 19:30")
df.loc[cond, "wind_direction"] = (df.loc[cond, "wind_direction"] - 0.5 * (wd_bias_1 + yaw_bias_1)) % 360
df.loc[cond, "nacelle_direction"] = (df[cond, "nacelle_direction"] - 0.5 * (wd_bias_1 + yaw_bias_1)) % 360

cond = (df['time'] >= "2021-06-09 19:40") & (df['time'] <= "2021-09-14 19:50")
df.loc[cond, "wind_direction"] = (df.loc[cond, "wind_direction"] - 0.5 * (wd_bias_2 + yaw_bias_2)) % 360
df.loc[cond, "nacelle_direction"] = (df.loc[cond, "nacelle_direction"] - 0.5 * (wd_bias_2 + yaw_bias_2)) % 360

cond = (df['time'] >= "2021-09-14 20:00")
df.loc[cond, "wind_direction"] = (df.loc[cond, "wind_direction"] - 0.5 * (wd_bias_3 + yaw_bias_3)) % 360
df.loc[cond, "nacelle_direction"] = (df.loc[cond, "nacelle_direction"] - 0.5 * (wd_bias_3 + yaw_bias_3)) % 360

print("Biases from median wind direction for turbine 39:")

print(f"wd_bias_1: {wd_bias_1}")
print(f"wd_bias_2: {wd_bias_2}")
print(f"wd_bias_3: {wd_bias_3}")

print(f"yaw_bias_1: {yaw_bias_1}")
print(f"yaw_bias_2: {yaw_bias_2}")
print(f"yaw_bias_3: {yaw_bias_3}")

plt.figure()
for turbine_id in turbine_ids:
    plt.plot(df["time"], 
    DataFilter.wrap_180(df["wind_direction"].values - wd_median.loc[wd_median[f"power_output_{turbine_id}"] >= 0, "wind_direction"].values))

plt.xlabel("Time (s)")
plt.ylabel("Wind Direction - Median Wind Direction (deg)")
plt.title("Corrected")

# specific time of changepoints for turbine 39: 6/9 19:35:55; 9/14 19:55:02
"""
# make sure we have corrected the bias between wind direction and yaw position by adding 3 deg. to the wind direction
bias = 0
for turbine_id in turbine_ids:
    df = collect_data(df=df_query2, 
                        features=["time", "turbine_id", "wind_direction", "power_output", "nacelle_direction"], 
                        mask=((pl.col("turbine_id") == turbine_id) & (pl.col("power_output") >= 0)))[["wind_direction", "nacelle_direction"]]
    bias += DataFilter.wrap_180(DataFilter.circ_mean(df["wind_direction"] - df["nacelle_direction"]))
    
print(f"Average Bias = {bias / len(turbine_ids)}")

### Find offset to true North using wake loss profiles

In [162]:
# TODO Optimization function for finding waked direction
def gauss_corr(gauss_params, power_ratio):
    xs = np.array(range(-int((len(power_ratio) - 1) / 2), int((len(power_ratio) + 1) / 2), 1))
    gauss = -1 * gauss_params[2] * np.exp(-0.5 * ((xs-gauss_params[0]) / gauss_params[1])**2) + 1.
    return -1 * np.corrcoef(gauss, power_ratio)[0,1]

In [None]:
# TODO Find offsets between direction of alignment between pairs of turbines 
# and direction of peak wake losses. Use the average offset found this way 
# to identify the Northing correction that should be applied to all turbines 
# in the wind farm.
from scipy.stats import norm
from scipy.optimize import minimize

from floris import FlorisModel
fi = FlorisModel(data_inspector.farm_input_filepath)

p_min = 100
p_max = 2500

prat_hfwdth = 30

prat_turbine_pairs = [(61,60), (51,50), (43,42), (41,40), (18,19), (34,33), (17,16), (21,22), (87,86), (62,63), (32,33), (59,60), (42,43)]

dir_offsets = []

for i in range(len(prat_turbine_pairs)):
    i_up = prat_turbine_pairs[i][0]
    i_down = prat_turbine_pairs[i][1]

    dir_align = np.degrees(np.arctan2(fi.layout_x[i_up] - fi.layout_x[i_down], fi.layout_y[i_up] - fi.layout_y[i_down])) % 360

    # df_sub = df_10min.loc[(df_10min['pow_%03d' % i_up] >= p_min) & (df_10min['pow_%03d' % i_up] <= p_max) & (df_10min['pow_%03d' % i_down] >= 0)]

    df_sub = df_query.filter(((pl.col("turbine_id") == f'wt{i_up:03d}') 
                              & (pl.col("power_output") >= p_min) 
                              & (pl.col("power_output") <= p_max)) 
                  | ((pl.col("turbine_id") == f'wt{i_down:03d}') 
                     & (pl.col("power_output") >= 0)))
    print(df_query.filter(pl.col("turbine_id") == f'wt{i_up:03d}').collect(streaming=True))
    
    # df_sub.loc[df_sub['wd_%03d' % i_up] >= 359.5,'wd_%03d' % i_up] = df_sub.loc[df_sub['wd_%03d' % i_up] >= 359.5,'wd_%03d' % i_up] - 360.0
    df_sub = df_sub.with_columns(pl.when((pl.col("turbine_id") == f'wt{i_up:03d}') & (pl.col("wind_direction") >= 359.5))\
                                     .then(pl.col("wind_direction") - 360.0)\
                                     .otherwise(pl.col("wind_direction"))\
                                     .alias("wind_direction"))
    # df_sub["wd_round"] = df_sub[f'wd_{i_up:03d}'].round()
    df_sub = df_sub.with_columns(pl.col("wind_direction").round().alias("wind_direction_round"))

    df_sub = df_sub.group_by("wind_direction_round").mean().collect(streaming=True).to_pandas()

    p_ratio = df_sub.loc[df_sub["turbine_id"] == f"wt{i_down:03d}", f'power_output'] \
        / df_sub.loc[df_sub["turbine_id"] == f"wt{i_up:03d}", f'power_output']

    plt.figure()
    plt.plot(p_ratio, label="_nolegend_")
    plt.plot(dir_align * np.ones(2),[0,1.25], 'k--', label="Direction of Alignment")
    plt.grid()

    nadir = np.argmin(p_ratio[np.arange(int(np.round(dir_align)) - prat_hfwdth,int(np.round(dir_align)) + prat_hfwdth + 1) % 360])
    nadir = nadir + int(np.round(dir_align)) - prat_hfwdth

    opt_gauss_params = minimize(gauss_corr, [0, 5.0, 1.0], args=(p_ratio[np.arange(nadir-prat_hfwdth,nadir + prat_hfwdth + 1) % 360]),method='SLSQP')

    xs = np.array(range(-int((60 - 1) / 2),int((60 + 1) / 2),1))
    gauss = -1 * opt_gauss_params.x[2] * np.exp(-0.5 * ((xs - opt_gauss_params.x[0]) / opt_gauss_params.x[1])**2) + 1.

    plt.plot(xs + nadir, gauss,'k',label="_nolegend_")
    plt.plot(2 * [nadir + opt_gauss_params.x[0]], [0,1.25], 'r--',label="Direction of Measured Wake Center")
    plt.title(f"Turbine Pair: ({i_up}, {i_down})")
    plt.legend()
    plt.xlabel("Wind Direction (deg)")
    plt.ylabel("Power Ratio (-)")
    
    dir_offset = DataFilter.wrap_180(nadir + opt_gauss_params.x[0] - dir_align)
    print(dir_offset)

    dir_offsets = dir_offsets + [dir_offset]

print(f"Mean offset = {np.mean(dir_offsets)}")
print(f"Std. Dev. = {np.std(dir_offsets)}")
print(f"Min. = {np.min(dir_offsets)}")
print(f"Max. = {np.max(dir_offsets)}")

## Normalization