In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
import xgboost as xgb
import matplotlib as plt
from datetime import datetime, timedelta
import re
import itertools
from math import radians, sin, cos, sqrt, asin

In [25]:


def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great-circle distance (in meters) between two points 
    on the Earth's surface given their latitude/longitude in degrees.
    """
    # Convert degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    
    # Haversine formula
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    
    # Earth radius in meters (mean radius = 6371 km)
    r = 6371 * 1000 
    return c * r

In [26]:
train_data_fpath =  "training_dataset.parquet"
submission_data_fpath =  "submission_dataset.parquet"

In [27]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [28]:
input_df = pd.read_parquet(train_data_fpath)
input_df= input_df[~input_df.target.isna()]
test_df = pd.read_parquet(submission_data_fpath)
input_df.head(3)

Unnamed: 0,TimeStamp_StartFormat,wtc_AcWindSp_mean;1,wtc_AcWindSp_mean;2,wtc_AcWindSp_mean;3,wtc_AcWindSp_mean;4,wtc_AcWindSp_mean;5,wtc_AcWindSp_mean;7,wtc_AcWindSp_min;1,wtc_AcWindSp_min;2,wtc_AcWindSp_min;3,wtc_AcWindSp_min;4,wtc_AcWindSp_min;5,wtc_AcWindSp_min;7,wtc_AcWindSp_max;1,wtc_AcWindSp_max;2,wtc_AcWindSp_max;3,wtc_AcWindSp_max;4,wtc_AcWindSp_max;5,wtc_AcWindSp_max;7,wtc_AcWindSp_stddev;1,wtc_AcWindSp_stddev;2,wtc_AcWindSp_stddev;3,wtc_AcWindSp_stddev;4,wtc_AcWindSp_stddev;5,wtc_AcWindSp_stddev;7,wtc_ScYawPos_mean;1,wtc_ScYawPos_mean;2,wtc_ScYawPos_mean;3,wtc_ScYawPos_mean;4,wtc_ScYawPos_mean;5,wtc_ScYawPos_mean;7,wtc_ScYawPos_min;1,wtc_ScYawPos_min;2,wtc_ScYawPos_min;3,wtc_ScYawPos_min;4,wtc_ScYawPos_min;5,wtc_ScYawPos_min;7,wtc_ScYawPos_max;1,wtc_ScYawPos_max;2,wtc_ScYawPos_max;3,wtc_ScYawPos_max;4,wtc_ScYawPos_max;5,wtc_ScYawPos_max;7,wtc_ScYawPos_stddev;1,wtc_ScYawPos_stddev;2,wtc_ScYawPos_stddev;3,wtc_ScYawPos_stddev;4,wtc_ScYawPos_stddev;5,wtc_ScYawPos_stddev;7,wtc_NacelPos_mean;1,wtc_NacelPos_mean;2,wtc_NacelPos_mean;3,wtc_NacelPos_mean;4,wtc_NacelPos_mean;5,wtc_NacelPos_mean;7,wtc_NacelPos_min;1,wtc_NacelPos_min;2,wtc_NacelPos_min;3,wtc_NacelPos_min;4,wtc_NacelPos_min;5,wtc_NacelPos_min;7,wtc_NacelPos_max;1,wtc_NacelPos_max;2,wtc_NacelPos_max;3,wtc_NacelPos_max;4,wtc_NacelPos_max;5,wtc_NacelPos_max;7,wtc_GenRpm_mean;1,wtc_GenRpm_mean;2,wtc_GenRpm_mean;3,wtc_GenRpm_mean;4,wtc_GenRpm_mean;5,wtc_GenRpm_mean;7,wtc_GenRpm_min;1,wtc_GenRpm_min;2,wtc_GenRpm_min;3,wtc_GenRpm_min;4,wtc_GenRpm_min;5,wtc_GenRpm_min;7,wtc_GenRpm_max;1,wtc_GenRpm_max;2,wtc_GenRpm_max;3,wtc_GenRpm_max;4,wtc_GenRpm_max;5,wtc_GenRpm_max;7,wtc_GenRpm_stddev;1,wtc_GenRpm_stddev;2,wtc_GenRpm_stddev;3,wtc_GenRpm_stddev;4,wtc_GenRpm_stddev;5,wtc_GenRpm_stddev;7,wtc_PitcPosA_mean;1,wtc_PitcPosA_mean;2,wtc_PitcPosA_mean;3,wtc_PitcPosA_mean;4,wtc_PitcPosA_mean;5,wtc_PitcPosA_mean;7,wtc_PitcPosA_min;1,wtc_PitcPosA_min;2,wtc_PitcPosA_min;3,wtc_PitcPosA_min;4,wtc_PitcPosA_min;5,wtc_PitcPosA_min;7,wtc_PitcPosA_max;1,wtc_PitcPosA_max;2,wtc_PitcPosA_max;3,wtc_PitcPosA_max;4,wtc_PitcPosA_max;5,wtc_PitcPosA_max;7,wtc_PitcPosA_stddev;1,wtc_PitcPosA_stddev;2,wtc_PitcPosA_stddev;3,wtc_PitcPosA_stddev;4,wtc_PitcPosA_stddev;5,wtc_PitcPosA_stddev;7,wtc_PitcPosB_mean;1,wtc_PitcPosB_mean;2,wtc_PitcPosB_mean;3,wtc_PitcPosB_mean;4,wtc_PitcPosB_mean;5,wtc_PitcPosB_mean;7,wtc_PitcPosC_mean;1,wtc_PitcPosC_mean;2,wtc_PitcPosC_mean;3,wtc_PitcPosC_mean;4,wtc_PitcPosC_mean;5,wtc_PitcPosC_mean;7,wtc_PowerRef_endvalue;1,wtc_PowerRef_endvalue;2,wtc_PowerRef_endvalue;3,wtc_PowerRef_endvalue;4,wtc_PowerRef_endvalue;5,wtc_PowerRef_endvalue;7,wtc_ScReToOp_timeon;1,wtc_ScReToOp_timeon;2,wtc_ScReToOp_timeon;3,wtc_ScReToOp_timeon;4,wtc_ScReToOp_timeon;5,wtc_ScReToOp_timeon;7,wtc_ActPower_mean;1,wtc_ActPower_mean;2,wtc_ActPower_mean;3,wtc_ActPower_mean;4,wtc_ActPower_mean;5,wtc_ActPower_mean;7,wtc_ActPower_min;1,wtc_ActPower_min;2,wtc_ActPower_min;3,wtc_ActPower_min;4,wtc_ActPower_min;5,wtc_ActPower_min;7,wtc_ActPower_max;1,wtc_ActPower_max;2,wtc_ActPower_max;3,wtc_ActPower_max;4,wtc_ActPower_max;5,wtc_ActPower_max;7,wtc_ActPower_stddev;1,wtc_ActPower_stddev;2,wtc_ActPower_stddev;3,wtc_ActPower_stddev;4,wtc_ActPower_stddev;5,wtc_ActPower_stddev;7,wtc_AmbieTmp_mean;1,wtc_AmbieTmp_mean;2,wtc_AmbieTmp_mean;3,wtc_AmbieTmp_mean;4,wtc_AmbieTmp_mean;5,wtc_AmbieTmp_mean;7,ShutdownDuration;1,ShutdownDuration;2,ShutdownDuration;3,ShutdownDuration;4,ShutdownDuration;5,ShutdownDuration;7,ERA5_temperature_2m,ERA5_relative_humidity_2m,ERA5_dew_point_2m,ERA5_precipitation,ERA5_surface_pressure,ERA5_cloud_cover,ERA5_wind_speed_10m,ERA5_wind_speed_100m,ERA5_wind_direction_10m,ERA5_wind_direction_100m,ERA5_wind_gusts_10m,id,is_valid,target
0,2016-01-01 00:00:00,6.085917,5.830675,6.613091,6.99851,7.645727,7.04079,2.0,1.1,1.6,2.4,4.3,3.9,10.0,10.0,11.0,10.6,11.8,10.5,1.484347,1.586648,1.571053,1.228338,1.09691,1.317204,83.754387,69.120506,-122.800903,-113.638397,-118.159103,-106.894501,79.5,66.400002,-124.400002,-114.800003,-120.5,-108.599998,86.800003,72.300003,-119.900002,-109.599998,-113.199997,-104.900002,2.266932,1.913408,1.426679,2.130993,1.8788,1.008626,83.751183,69.122231,237.192902,246.365005,241.846603,253.104004,79.5,66.400002,235.600006,245.199997,239.5,251.399994,86.800003,72.300003,240.100006,250.399994,246.800003,255.100006,941.06897,914.643799,1043.970947,1131.11499,1114.666016,1131.890991,649.900024,612.700012,730.099976,940.900024,935.400024,868.900024,1273.400024,1332.099976,1406.0,1535.599976,1391.800049,1430.800049,188.158005,211.196304,177.679596,129.183502,91.292953,128.462006,-0.974687,-0.973245,-0.937077,-0.966635,-0.927565,-0.971865,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-0.4,-0.4,-0.4,-0.4,-0.4,-0.4,0.517582,0.514683,0.522298,0.496113,0.497144,0.528061,-0.96637,-1.008135,-0.982902,-0.934692,-0.996075,-0.976442,-0.99509,-0.934227,-0.972555,-0.947758,-0.975915,-0.964212,2300.0,2300.0,2300.0,2300.0,2300.0,2300.0,600.0,600.0,600.0,600.0,600.0,600.0,352.522308,361.848511,468.959015,565.597473,533.635376,566.248108,78.0,72.0,180.0,309.0,287.0,248.0,760.0,869.0,974.0,1416.0,949.0,1018.0,187.078903,215.998001,213.243896,198.349503,127.434898,178.835403,0.0,0.827133,0.3154,0.0,0.0,0.0,0,0,0,0,0,0,1.792,73.957634,-2.358,0.0,976.166321,0.0,6.080296,10.040418,233.695404,236.784729,13.0,-210384,True,352.522308
1,2016-01-01 00:10:00,5.824693,5.810768,5.895642,6.5219,6.413868,6.599652,2.1,2.2,1.9,2.2,2.3,2.3,9.6,10.2,8.8,12.5,10.9,11.0,1.141584,1.547302,1.293487,1.441869,1.438043,1.334877,88.496246,75.914597,-115.395798,-110.868401,-113.227501,-102.6987,84.400002,72.300003,-123.900002,-112.5,-116.199997,-108.699997,91.400002,81.400002,-113.5,-109.599998,-112.400002,-101.0,1.879867,2.106318,2.478934,1.365305,1.014673,2.78289,88.491821,75.907959,244.613007,249.131195,246.777298,257.305389,84.400002,72.300003,236.100006,247.5,243.800003,251.300003,91.400002,81.400002,246.5,250.399994,247.600006,259.0,878.17627,891.244385,877.905701,1068.406006,901.705383,1027.93103,691.099976,641.400024,668.200012,780.799988,667.400024,684.5,1142.199951,1382.900024,1157.400024,1478.699951,1270.099976,1248.5,128.342499,172.585403,148.661896,162.483597,134.646103,146.772797,-0.976545,-0.973472,-0.943475,-0.968723,-0.927995,-0.967702,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-0.4,-0.4,-0.4,-0.4,-0.4,-0.4,0.514228,0.513102,0.521445,0.498363,0.498069,0.531581,-0.966463,-1.008063,-0.986907,-0.933587,-0.995267,-0.974115,-0.995205,-0.934798,-0.97826,-0.948613,-0.97302,-0.963473,2300.0,2300.0,2300.0,2300.0,2300.0,2300.0,600.0,600.0,600.0,600.0,600.0,600.0,294.078888,332.010406,308.005707,490.191193,314.5914,443.461914,119.0,103.0,109.0,233.0,109.0,132.0,564.0,948.0,599.0,1144.0,770.0,736.0,104.474998,165.113998,123.467796,211.253098,119.1884,153.566299,0.0,0.520733,0.07045,0.0,0.0,0.0,0,0,0,0,0,0,1.792,86.580147,-0.208,0.0,982.179504,100.0,3.301515,6.958448,178.264328,187.43132,6.9,-210383,True,294.078888
2,2016-01-01 00:20:00,7.10018,6.386981,7.606015,7.695034,7.812548,7.469052,2.2,2.1,0.0,4.2,4.0,3.4,11.6,12.3,13.6,12.8,11.6,12.8,1.59964,1.661032,2.328598,1.501421,1.426233,1.648712,83.129791,69.414749,-120.673698,-113.834702,-118.3255,-104.277702,81.300003,64.0,-124.699997,-114.599998,-122.199997,-106.400002,87.800003,77.199997,-113.900002,-110.099998,-112.400002,-98.800003,1.907214,2.861577,3.214255,1.593463,3.1479,2.727755,83.128883,69.409897,239.316895,246.165298,241.671005,255.7173,81.300003,64.0,235.300003,245.399994,237.800003,253.600006,87.800003,77.199997,246.100006,249.899994,247.600006,261.200012,1137.56604,1037.644043,1212.978027,1254.245972,1118.692993,1178.525024,752.0,646.099976,754.0,960.5,760.599976,938.299988,1547.199951,1473.300049,1582.0,1564.900024,1456.5,1535.900024,174.083206,234.354401,277.971313,145.854706,153.791,166.3965,-0.968695,-0.978538,-0.932615,-0.967485,-0.928043,-0.97015,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-0.4,-0.4,-0.4,-0.4,-0.4,-0.4,0.516025,0.512991,0.511504,0.495999,0.496092,0.528538,-0.963727,-1.008043,-0.977577,-0.935797,-0.994107,-0.976495,-0.986437,-0.937108,-0.970872,-0.948943,-0.974592,-0.966483,2300.0,2300.0,2300.0,2300.0,2300.0,2300.0,600.0,600.0,600.0,600.0,600.0,600.0,587.302795,501.303894,816.978821,771.708191,556.22052,646.179321,182.0,105.0,190.0,333.0,205.0,304.0,1458.0,1132.0,2189.0,1740.0,1062.0,1415.0,243.555801,261.407715,491.376892,298.910797,199.444397,274.355103,0.0,0.4834,0.32405,0.0,0.0,0.0,0,0,0,0,0,0,7.742,61.013905,0.692,0.0,985.24054,78.0,3.478505,4.648656,198.435043,198.824799,7.7,-210382,True,587.302795


In [29]:
turbine_data = {
    "T01": {"Latitude": 57.49921441, "Longitude": -3.086742896},
    "T02": {"Latitude": 57.49626574, "Longitude": -3.082817716},
    "T03": {"Latitude": 57.50206973, "Longitude": -3.088980479},
    "T04": {"Latitude": 57.50196736, "Longitude": -3.082085466},
    "T05": {"Latitude": 57.49891107, "Longitude": -3.078123820},
    "T06": {"Latitude": 57.50024464, "Longitude": -3.071321578},
    "T07": {"Latitude": 57.50513302, "Longitude": -3.085850762},
    "T08": {"Latitude": 57.50465629, "Longitude": -3.077542275},
    "T09": {"Latitude": 57.50827835, "Longitude": -3.082572740},
    "T10": {"Latitude": 57.50542459, "Longitude": -3.070655631},
    "T11": {"Latitude": 57.51184311, "Longitude": -3.080658605},
    "T12": {"Latitude": 57.51081569, "Longitude": -3.073684523},
    "T13": {"Latitude": 57.51666161, "Longitude": -3.078146735},
    "T14": {"Latitude": 57.51376046, "Longitude": -3.074939555},
    "T15": {"Latitude": 57.49941809, "Longitude": -3.062837374},
    "T16": {"Latitude": 57.50513916, "Longitude": -3.053340822},
    "T17": {"Latitude": 57.50662266, "Longitude": -3.047792556},
    "T18": {"Latitude": 57.50429684, "Longitude": -3.041183981},
    "T19": {"Latitude": 57.50859585, "Longitude": -3.041656952},
    "T20": {"Latitude": 57.51202849, "Longitude": -3.040436127},
    "T21": {"Latitude": 57.50978499, "Longitude": -3.034697307}
}

In [30]:
def create_wake_features_with_effect(df, turbine_lat_lon, active_turbines=["T01", "T02", "T03", "T04", "T05", "T07"]):
    """
    Creates wake features with calculated relative wake effect
    Args:
        df: Input DataFrame with time series data
        turbine_lat_lon: Dictionary of turbine coordinates
        active_turbines: List of turbines to include
    Returns:
        DataFrame with:
        - Relative wake effect (power difference)
        - Single power measurement
        - is_downstream and is_upstream flags
        - All wake interaction features
    """
    features = []
    
    for timestamp, row in df.iterrows():
        wind_dir = row['ERA5_wind_direction_100m']
        wind_speed = row['ERA5_wind_speed_100m']
        
        # Generate all possible turbine pairs
        for tid1, tid2 in itertools.combinations(active_turbines, 2):
            # Calculate static pair features
            distance = haversine(turbine_lat_lon[tid1]["Longitude"],
                               turbine_lat_lon[tid1]["Latitude"],
                               turbine_lat_lon[tid2]["Longitude"],
                               turbine_lat_lon[tid2]["Latitude"])
            
            direction = np.degrees(np.arctan2(
                turbine_lat_lon[tid2]["Latitude"] - turbine_lat_lon[tid1]["Latitude"],
                turbine_lat_lon[tid2]["Longitude"] - turbine_lat_lon[tid1]["Longitude"]
            )) % 360
            
            # Determine wake relationship
            angle_diff = min(abs(wind_dir - direction), 360 - abs(wind_dir - direction))
            is_wake_related = angle_diff <= 45  # Within wake cone
            
            if is_wake_related:
                # Get both turbines' power
                power1 = row[f'wtc_ActPower_mean;{tid1[-1]}']
                power2 = row[f'wtc_ActPower_mean;{tid2[-1]}']
                
                # Determine which turbine is upstream/downstream
                if angle_diff <= 90:
                    downstream, upstream = tid2, tid1
                    wake_effect = power1 - power2  # Upstream minus downstream
                else:
                    downstream, upstream = tid1, tid2
                    wake_effect = power2 - power1
                
                features.append({
                    'turbine_pair': f"{upstream}-{downstream}",
                    'distance': distance,
                    'direction_diff': angle_diff,
                    'wind_speed': wind_speed,
                    'is_downstream': 0,
                    'is_upstream': 1,
                    'other_pow': power2 if upstream == tid1 else power1,
                    'power': power1 if upstream == tid1 else power2,
                    'relative_wake_effect': wake_effect,
                    'wind_direction': wind_dir,
                    'timestamp': timestamp
                })
                
                features.append({
                    'turbine_pair': f"{upstream}-{downstream}",
                    'distance': distance,
                    'direction_diff': angle_diff,
                    'wind_speed': wind_speed,
                    'is_downstream': 1,
                    'is_upstream': 0,
                    'other_pow': power1 if upstream == tid1 else power2,
                    'power': power2 if downstream == tid2 else power1,
                    'relative_wake_effect': -wake_effect,  # Inverse effect
                    'wind_direction': wind_dir,
                    'timestamp': timestamp
                })
    
    wake_df = pd.DataFrame(features)
    
    # Add normalized wake effect (0-1 scale)
    max_effect = wake_df['relative_wake_effect'].abs().max()
    wake_df['normalized_wake_effect'] = wake_df['relative_wake_effect'] / max_effect
    
    return wake_df

# Usage:
input_df = input_df.fillna(0)
active_turbines = ["T01", "T02", "T03", "T04", "T05", "T07"]
wake_df = create_wake_features_with_effect(input_df, turbine_data, active_turbines)

In [31]:
def create_t01_single_row_features(df, turbine_lat_lon, other_turbines=["T02", "T03", "T04", "T05", "T07"]):
    target_turbine = "T01"
    max_power = df['wtc_ActPower_mean;2'].max()

    results = []

    for timestamp, row in df.iterrows():
        wind_dir = row['ERA5_wind_direction_100m']
        wind_speed = row['ERA5_wind_speed_100m']
        t01_power = row['wtc_ActPower_mean;2']
        
        # Initialize output row
        out_row = {
            'distance': np.nan,
            'direction_diff': np.nan,
            'wind_speed': wind_speed,
            'is_downstream': 0,
            'is_upstream': 0,
            'wind_direction': wind_dir,
            'relative_wake_effect': 0,
            'normalized_wake_effect': 0,
            'other_pow': np.nan
        }

        wake_effects = []

        for other_tid in other_turbines:
            # Compute direction and distance from other turbine to T01
            lat1 = turbine_lat_lon[other_tid]['Latitude']
            lon1 = turbine_lat_lon[other_tid]['Longitude']
            lat2 = turbine_lat_lon[target_turbine]['Latitude']
            lon2 = turbine_lat_lon[target_turbine]['Longitude']

            dx = lon2 - lon1
            dy = lat2 - lat1

            bearing = (np.degrees(np.arctan2(dy, dx)) + 360) % 360
            angle_diff = min(abs(wind_dir - bearing), 360 - abs(wind_dir - bearing))

            if angle_diff <= 45:  # Within wake cone
                distance = haversine(lon1, lat1, lon2, lat2)
                other_power = row[f'wtc_ActPower_mean;{other_tid[-1]}']
                effect = other_power - t01_power

                wake_effects.append({
                    'distance': distance,
                    'angle_diff': angle_diff,
                    'effect': effect,
                    'other_power': other_power,
                    'is_downstream': int(effect > 0),
                    'is_upstream': int(effect < 0),
                })

        if wake_effects:
            strongest = max(wake_effects, key=lambda x: abs(x['effect']))
            out_row.update({
                'distance': strongest['distance'],
                'direction_diff': strongest['angle_diff'],
                'relative_wake_effect': strongest['effect'],
                'normalized_wake_effect': strongest['effect'] / max_power if max_power else 0,
                'other_pow': strongest['other_power'],
                'is_downstream': strongest['is_downstream'],
                'is_upstream': strongest['is_upstream']
            })

        results.append(out_row)

    return pd.DataFrame(results)

In [32]:
test_wake_df = create_t01_single_row_features(test_df, turbine_data)
len(test_wake_df)

52704

In [33]:
wake_df.head(2)

Unnamed: 0,turbine_pair,distance,direction_diff,wind_speed,is_downstream,is_upstream,other_pow,power,relative_wake_effect,wind_direction,timestamp,normalized_wake_effect
0,T01-T02,403.11921,35.612632,4.401136,0,1,1163.564941,1299.646973,136.082031,358.698059,4,0.05875
1,T01-T02,403.11921,35.612632,4.401136,1,0,1299.646973,1163.564941,-136.082031,358.698059,4,-0.05875


In [34]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler

# 1. Define the neural network architecture
class WakeNet(nn.Module):
    def __init__(self, input_size=9):
        super(WakeNet, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_size, 256),# mat1 error
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1))
        
    def forward(self, x):
        return self.network(x)

# 2. Data Preparation
def prepare_nn_data(wake_df):
    """Convert wake pair features to PyTorch tensors"""
    X = wake_df[['distance', 'direction_diff', 'wind_speed','is_downstream', 'is_upstream', 'wind_direction','relative_wake_effect', "normalized_wake_effect","other_pow"]].values
    y = wake_df['power'].values
    y[y<0] = 0
    X[np.isnan(X)] = 0
    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Convert to tensors
    X_tensor = torch.FloatTensor(X_scaled)
    
    y_tensor = torch.FloatTensor(y).unsqueeze(1)
    
    return X_tensor, y_tensor, scaler

# 3. Training Loop
def train_wake_model(X_tensor, y_tensor, epochs=100, batch_size=64):
    dataset = TensorDataset(X_tensor, y_tensor)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    model = WakeNet()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    for epoch in range(epochs):
        for batch_X, batch_y in dataloader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
        
        if (epoch+1) % 10 == 0:
            print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')
    
    return model

# 4. Prediction Function


In [35]:
def predict_wake_effect(model, df, turbine_lat_lon, active_turbines=["T01", "T02", "T03", "T04", "T05", "T07"], scaler=None):
    """
    Predicts wake effects for all turbine pairs in a DataFrame
    
    Args:
        model: Trained WakeNet model
        df: Input DataFrame with time series data
        turbine_lat_lon: Dictionary of turbine coordinates
        active_turbines: List of turbines to include
        scaler: Pre-trained scaler (if used during training)
    
    Returns:
        DataFrame with predictions and original data
    """
    # Create features with wake effects
    wake_df = create_wake_features_with_effect(df, turbine_lat_lon, active_turbines)
    
    # Prepare input features (must match training exactly)
    feature_cols = ['distance', 'direction_diff', 'wind_speed', 'is_downstream', 'normalized_wake_effect']
    X_pred = wake_df[feature_cols].values
    
    # Scale features if scaler was used during training
    if scaler:
        X_pred = scaler.transform(X_pred)
    
    # Convert to tensor
    X_tensor = torch.FloatTensor(X_pred)
    
    # Predict
    model.eval()
    with torch.no_grad():
        predictions = model(X_tensor).numpy().flatten()
    
    # Add predictions to DataFrame
    results = wake_df.copy()
    results['predicted_power'] = predictions
    
    # Calculate predicted wake effect
    results['predicted_wake_effect'] = results.groupby(['timestamp', 'turbine_pair'])['predicted_power'].transform(
        lambda x: x[results['is_upstream'] == 1].values[0] - x[results['is_downstream'] == 1].values[0]
        if any(results['is_upstream'] == 1) and any(results['is_downstream'] == 1)
        else 0
    )
    
    return results

In [65]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np

def train_wake_model(X_tensor, y_tensor, model_class,epochs=200, batch_size=1024, patience=100):
    """
    Trains a wake model with validation tracking and early stopping.

    Args:
        X_tensor (Tensor): Input features
        y_tensor (Tensor): Target values
        model_class: A callable that returns a model instance (e.g., WakeNet)
        epochs (int): Number of training epochs
        batch_size (int): Batch size
        patience (int): Early stopping patience (in epochs)

    Returns:
        model: The best model based on validation MAE
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Split into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(
        X_tensor, y_tensor, test_size=0.2, random_state=42
    )

    train_dataset = TensorDataset(X_train.to(device), y_train.to(device))
    val_dataset = TensorDataset(X_val.to(device), y_val.to(device))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

    # Initialize model, optimizer, scheduler
    model = model_class().to(device)
    optimizer = optim.AdamW(model.parameters(), lr=0.01, weight_decay=0.01)
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.01, steps_per_epoch=len(train_loader), epochs=epochs)

    # Loss function
    def mae_loss(pred, target):
        return torch.abs(pred - target).mean()

    scaler = torch.cuda.amp.GradScaler(enabled=device.type == 'cuda')

    best_val_mae = float('inf')
    best_model_state = None
    epochs_without_improvement = 0

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0

        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()

            with torch.cuda.amp.autocast(enabled=device.type == 'cuda'):
                outputs = model(batch_X)
                loss = mae_loss(outputs, batch_y)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

            train_loss += loss.item()

        avg_train_loss = train_loss / len(train_loader)

        # Validation phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for val_X, val_y in val_loader:
                with torch.cuda.amp.autocast(enabled=device.type == 'cuda'):
                    val_outputs = model(val_X)
                    val_loss += mae_loss(val_outputs, val_y).item()

        avg_val_loss = val_loss / len(val_loader)

        print(f"Epoch {epoch+1}/{epochs} - Train MAE: {avg_train_loss:.4f} - Val MAE: {avg_val_loss:.4f} - LR: {scheduler.get_last_lr()[0]:.6f}")

        # Early stopping tracking
        if avg_val_loss < best_val_mae:
            best_val_mae = avg_val_loss
            best_model_state = model.state_dict()
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs.")
                break

    # Load best model weights before returning
    if best_model_state:
        model.load_state_dict(best_model_state)

    return model.cpu()

In [66]:
# 2. Prepare NN data
X_tensor, y_tensor, scaler = prepare_nn_data(wake_df)

In [67]:
len(test_df)

52704

In [68]:
len(test_wake_df)

52704

In [69]:
len(test_wake_df)

52704

In [70]:
len(test_df)

52704

In [71]:

# 3. Train the model
wake_model_nn = train_wake_model(X_tensor, y_tensor,model_class = WakeNet, epochs=100)


  scaler = torch.cuda.amp.GradScaler(enabled=device.type == 'cuda')
  with torch.cuda.amp.autocast(enabled=device.type == 'cuda'):
  with torch.cuda.amp.autocast(enabled=device.type == 'cuda'):


Epoch 1/100 - Train MAE: 159.4740 - Val MAE: 7.6738 - LR: 0.000426
Epoch 2/100 - Train MAE: 26.8288 - Val MAE: 4.5014 - LR: 0.000505
Epoch 3/100 - Train MAE: 20.7816 - Val MAE: 3.4245 - LR: 0.000635
Epoch 4/100 - Train MAE: 18.2894 - Val MAE: 3.9130 - LR: 0.000815
Epoch 5/100 - Train MAE: 16.7885 - Val MAE: 2.5818 - LR: 0.001043
Epoch 6/100 - Train MAE: 15.4749 - Val MAE: 2.3141 - LR: 0.001317
Epoch 7/100 - Train MAE: 14.3308 - Val MAE: 4.5518 - LR: 0.001633
Epoch 8/100 - Train MAE: 13.4854 - Val MAE: 2.1274 - LR: 0.001988
Epoch 9/100 - Train MAE: 12.7365 - Val MAE: 1.9277 - LR: 0.002379
Epoch 10/100 - Train MAE: 12.0873 - Val MAE: 3.5310 - LR: 0.002800
Epoch 11/100 - Train MAE: 11.6976 - Val MAE: 3.7788 - LR: 0.003248
Epoch 12/100 - Train MAE: 11.2450 - Val MAE: 8.1043 - LR: 0.003717
Epoch 13/100 - Train MAE: 11.2378 - Val MAE: 2.1397 - LR: 0.004202
Epoch 14/100 - Train MAE: 11.1095 - Val MAE: 6.5951 - LR: 0.004699
Epoch 15/100 - Train MAE: 10.6994 - Val MAE: 28.1799 - LR: 0.005200
Ep

In [72]:
def predict_with_model(model, X_new, batch_size=1024):
    """
    Makes predictions with your trained WakeNet model
    
    Args:
        model: Your trained WakeNet model
        X_new: New input data (NumPy array or DataFrame)
        batch_size: Same as training for consistency
    
    Returns:
        NumPy array of predictions
    """
    # 1. Ensure model is in eval mode and on CPU
    model.eval()
    model = model.cpu()
    
    # 2. Convert input to tensor
    if isinstance(X_new, pd.DataFrame):
        X_tensor = torch.FloatTensor(X_new.values)
    else:
        X_tensor = torch.FloatTensor(X_new)
    
    # 3. Create dataloader (same config as training)
    dataset = TensorDataset(X_tensor)
    dataloader = DataLoader(dataset, 
                          batch_size=batch_size,
                          shuffle=False,
                          num_workers=4)
    
    # 4. Make predictions
    predictions = []
    with torch.no_grad():
        for batch in dataloader:
            batch_X = batch[0]
            preds = model(batch_X).numpy().flatten()
            predictions.extend(preds)
    
    return np.array(predictions)

# Usage example:
# 1. First prepare your input data EXACTLY like training data
  # Same function used in training
X_new = test_wake_df[['distance', 'direction_diff', 'wind_speed','is_downstream', 'is_upstream', 'wind_direction','relative_wake_effect', "normalized_wake_effect","other_pow"]].values  # Same columns/order

# 2. Make predictions
predictions = predict_with_model(wake_model_nn, X_new)

# 3. Add to DataFrame
predicted_power = predictions

In [73]:
test_wake_df.head()

Unnamed: 0,distance,direction_diff,wind_speed,is_downstream,is_upstream,wind_direction,relative_wake_effect,normalized_wake_effect,other_pow
0,413.679701,16.976997,9.484725,1,0,227.563812,279.231934,0.121272,2020.569946
1,516.063307,32.979321,5.247857,1,0,210.963684,259.641846,0.112764,2156.587891
2,344.492802,33.733184,9.226592,0,1,274.350983,-360.209106,-0.156441,1447.272949
3,413.679701,11.354408,9.276314,1,0,221.941223,519.08606,0.225442,2042.202026
4,413.679701,11.354408,9.276314,1,0,221.941223,774.684082,0.336449,2040.154053


In [74]:
pd.DataFrame(predicted_power).to_csv("prediction.csv")