In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [11]:
df = pd.read_csv("../../../data/cleaned/multi_var_wind_hourly_cleaned.csv")
df = df.sort_values('datetime').reset_index(drop=True)
df['wind_speed'] = df['wind_speed'].astype(float)

In [12]:
df = df[['datetime', 'pressure', 'temperature', 'humidity', 'wind_speed', 'u', 'v']]


In [13]:
df.head()

Unnamed: 0,datetime,pressure,temperature,humidity,wind_speed,u,v
0,2024-01-01 00:00:00,758.466667,-1.016667,62.166667,0.1,-0.022693,-0.04734
1,2024-01-01 01:00:00,758.383333,-1.533333,64.0,0.283333,-0.113825,0.011167
2,2024-01-01 02:00:00,758.383333,-1.15,61.333333,0.2,-0.169286,-0.061781
3,2024-01-01 03:00:00,758.783333,-1.166667,58.666667,0.5,0.142045,-0.39769
4,2024-01-01 04:00:00,759.0,-1.483333,62.166667,0.966667,-0.51259,-0.609574


In [14]:
df['datetime'] = pd.to_datetime(df['datetime'])   # or df['datetime'] if that's the name

In [15]:
# now extract hour
df['hour'] = df['datetime'].dt.hour
df['sin_h'] = np.sin(2 * np.pi * df['hour'] / 24)
df['cos_h'] = np.cos(2 * np.pi * df['hour'] / 24)

In [16]:
df.head()

Unnamed: 0,datetime,pressure,temperature,humidity,wind_speed,u,v,hour,sin_h,cos_h
0,2024-01-01 00:00:00,758.466667,-1.016667,62.166667,0.1,-0.022693,-0.04734,0,0.0,1.0
1,2024-01-01 01:00:00,758.383333,-1.533333,64.0,0.283333,-0.113825,0.011167,1,0.258819,0.965926
2,2024-01-01 02:00:00,758.383333,-1.15,61.333333,0.2,-0.169286,-0.061781,2,0.5,0.866025
3,2024-01-01 03:00:00,758.783333,-1.166667,58.666667,0.5,0.142045,-0.39769,3,0.707107,0.707107
4,2024-01-01 04:00:00,759.0,-1.483333,62.166667,0.966667,-0.51259,-0.609574,4,0.866025,0.5


In [17]:
df = df.set_index('datetime')

In [18]:
df['month'] = df.index.month
df['season'] = df['month'] % 12 // 3 + 1  # 1=spring, 2=summer, etc.

In [19]:

# Create lag features (1-6 hours)
for i in range(1, 7):
    df[f'wind_speed_lag_{i}'] = df['wind_speed'].shift(i)
    df[f'temp_lag_{i}'] = df['temperature'].shift(i)
    df[f'humidity_lag_{i}'] = df['humidity'].shift(i)
    df[f'pressure_lag_{i}'] = df['pressure'].shift(i)
    df[f'u_lag_{i}'] = df['u'].shift(i)
    df[f'v_lag_{i}'] = df['v'].shift(i)

In [20]:

# Rolling statistics
df['wind_speed_3hr_avg'] = df['wind_speed'].rolling(window=3).mean()
df['wind_speed_6hr_std'] = df['wind_speed'].rolling(window=6).std()

In [21]:
# Drop rows with NaN values created by shifting
df = df.dropna()

In [22]:
# Define target for 1-hour forecasting
df['target'] = df['wind_speed'].shift(-1)
df = df.dropna()


In [23]:
# Chronological split
split_point = int(len(df) * 0.8)
train_df = df.iloc[:split_point]
test_df = df.iloc[split_point:]


In [24]:
# Prepare features and target
X_train = train_df.drop(['target', 'wind_speed'], axis=1)
y_train = train_df['target']
X_test = test_df.drop(['target', 'wind_speed'], axis=1)
y_test = test_df['target']

In [25]:
X_train.head()

Unnamed: 0_level_0,pressure,temperature,humidity,u,v,hour,sin_h,cos_h,month,season,...,u_lag_5,v_lag_5,wind_speed_lag_6,temp_lag_6,humidity_lag_6,pressure_lag_6,u_lag_6,v_lag_6,wind_speed_3hr_avg,wind_speed_6hr_std
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-01 06:00:00,759.466667,2.166667,50.0,0.532663,1.072439,6,1.0,6.123234000000001e-17,1,1,...,-0.113825,0.011167,0.1,-1.016667,62.166667,758.466667,-0.022693,-0.04734,1.016667,0.468409
2024-01-01 07:00:00,759.733333,3.566667,43.333333,-0.161001,0.264452,7,0.965926,-0.258819,1,1,...,-0.169286,-0.061781,0.283333,-1.533333,64.0,758.383333,-0.113825,0.011167,0.816667,0.455633
2024-01-01 08:00:00,759.533333,4.033333,46.666667,1.090914,1.377346,8,0.866025,-0.5,1,1,...,0.142045,-0.39769,0.2,-1.15,61.333333,758.383333,-0.169286,-0.061781,1.322222,0.679842
2024-01-01 09:00:00,759.1,4.483333,46.0,1.786727,1.452116,9,0.707107,-0.7071068,1,1,...,-0.51259,-0.609574,0.5,-1.166667,58.666667,758.783333,0.142045,-0.39769,1.611111,0.800515
2024-01-01 10:00:00,758.833333,4.616667,45.666667,2.723018,1.936597,10,0.5,-0.8660254,1,1,...,-0.623646,0.041307,0.966667,-1.483333,62.166667,759.0,-0.51259,-0.609574,2.605556,1.120945
