In [None]:
import os
import numpy as np
import pandas as pd
import netCDF4 as nc
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import cartopy.crs as ccrs
from windrose import WindroseAxes
from scipy.stats import weibull_min
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from datetime import datetime, timedelta
import glob

In [None]:
def read_netcdf(path):
    data = nc.Dataset(path, 'r')
    return data

def read_era5(path, start_year, end_year):
    all_files = [f for f in os.listdir(path) if f.startswith('ERA5_N-9_') and f.endswith('.csv')]
    all_files.sort()
    Era5_data_list = []
    for file in all_files:
        year = int(file.split('_')[-1].split('.')[0])
        if start_year <= year <= end_year:
            Era5_data = pd.read_csv(os.path.join(path, file))
            Era5_data_list.append(Era5_data)
    return pd.concat(Era5_data_list, ignore_index=True)

def convert_time(time_hours):
    base_time = datetime(1900, 1, 1)
    return [base_time + timedelta(hours=int(hours)) for hours in time_hours]

def calculate_windspeed_direction(era5_data):
    # Calculate the wind speed as the modulus of the u and v components at 100m
    era5_data['windspeed_era_100'] = np.sqrt(era5_data['u100']**2 + era5_data['v100']**2)
    
    # Calculate the wind direction in degrees
    era5_data['winddirection_era_100'] = np.degrees(np.arctan2(era5_data['v100'], era5_data['u100']))
    # Adjusting the direction so that North is 0/360 degrees
    era5_data['winddirection_era_100'] = (270 - era5_data['winddirection_era_100']) % 360
    return era5_data

def prepare_data_for_MCP(df_aligned, reanalysis_col, measurement_col):
    X = pd.DataFrame(df_aligned[reanalysis_col])  # Reanalysis data
    Y = pd.DataFrame(df_aligned[measurement_col])  # Measurement

    # Remove NaN values
    idx = Y[Y.isnull().any(axis=1)].index
    valid_values = Y[Y.notnull().all(axis=1)].index

    XtoPredict = X.loc[idx]
    x = X.loc[valid_values]  # Features
    y = Y.loc[valid_values]  # Target

    return x, y

def process_meas_data(df_buoy):
    
    df_buoy['time'] = pd.to_datetime(df_buoy['time'])  # Ensure 'time' column is in datetime format
    df_buoy.drop_duplicates(subset='time', inplace=True)
    df_buoy.replace(-999, np.nan, inplace=True) # Replace -999 with NaN

    # Resample the data to hourly frequency
    df_buoy.set_index('time', inplace=True)
    df_buoy = df_buoy.resample('h').mean()
    
    return df_buoy

# Function to align measurement data with Era5 data
def merge_data(df_buoy, df_era5, data_name):
    df_aligned = pd.merge(df_buoy, df_era5, how='right', on='time')

    # Calculate data period
    data_start = df_aligned.index.min()
    data_end = df_aligned.index.max()

    print(f'The period of merged data({data_name} & ERA 5) is: {data_start} to {data_end}')

    return df_aligned


In [None]:
buoy2_data_path = 'data/measurements/2023-11-09_Buoy2_BSH_N-9.nc'
buoy6_data_path = 'data/measurements/2023-11-06_Buoy6_BSH_N-9.nc'
era5_data_path = 'data/reanalysis'

buoy2_data = read_netcdf(buoy2_data_path)
buoy6_data = read_netcdf(buoy6_data_path)
era5_data = read_era5(era5_data_path, 2000, 2023)

In [None]:
height_index = 3
chosen_height = buoy2_data.variables['height'][height_index]
print('height:', chosen_height, 'm')

time_buoy2 = buoy2_data.variables['time'][:]
time_buoy6 = buoy6_data.variables['time'][:]
time_buoy2_converted = convert_time(time_buoy2)
time_buoy6_converted = convert_time(time_buoy6)

windspeed_buoy2 = buoy2_data.groups['ZX_LIDAR_WLBZ_6_MCP'].variables['wind_speed'][:,0,0,height_index]
windspeed_buoy6 = buoy6_data.groups['ZX_LIDAR_WLBZ_2_MCP'].variables['wind_speed'][:,0,0,height_index]
# print(windspeed_buoy6)

winddirection_buoy2 = buoy2_data.groups['ZX_LIDAR_WLBZ_6_MCP'].variables['wind_from_direction'][:,0,0,height_index].flatten()
winddirection_buoy6 = buoy6_data.groups['ZX_LIDAR_WLBZ_2_MCP'].variables['wind_from_direction'][:,0,0,height_index].flatten()
# print(winddirection_buoy6)

buoy2_df = pd.DataFrame({
    'time': time_buoy2_converted,
    'windspeed': windspeed_buoy2,
    'winddirection': winddirection_buoy2
})

buoy6_df = pd.DataFrame({
    'time': time_buoy6_converted,
    'windspeed': windspeed_buoy6,
    'winddirection': winddirection_buoy6
})

buoy_data = pd.merge(buoy2_df, buoy6_df, on='time', how='outer', suffixes=('_2', '_6'))

# print(time_buoy2_converted[-1])
# print(time_buoy6_converted[-1])
print(buoy_data.head())

In [None]:
era5_data = calculate_windspeed_direction(era5_data)
print(era5_data.head())

In [None]:
def correlate_wind_data(df_aligned, data_name):
    # Prepare data for wind speed correlation
    x_ws, y_ws = prepare_data_for_MCP(df_aligned, 'era5_WS100', 'buoy2_df[windspeed]')
    
    # Prepare data for wind direction correlation
    x_wd, y_wd = prepare_data_for_MCP(df_aligned, 'era5_WD100', 'buoy2_df[winddirection]')
    
    # Split data into training and test data
    X_train_ws, X_test_ws, y_train_ws, y_test_ws = train_test_split(x_ws, y_ws, test_size=0.3, random_state=42)
    X_train_wd, X_test_wd, y_train_wd, y_test_wd = train_test_split(x_wd, y_wd, test_size=0.3, random_state=42)
    
    # Scale the data
    scaler_ws = StandardScaler()
    scaler_wd = StandardScaler()
    X_train_ws_scaled = scaler_ws.fit_transform(X_train_ws)
    X_test_ws_scaled = scaler_ws.transform(X_test_ws)
    X_train_wd_scaled = scaler_wd.fit_transform(X_train_wd)
    X_test_wd_scaled = scaler_wd.transform(X_test_wd)
    
    # Perform MCP for wind speed and wind direction
    lin_model_ws, y_pred_ws = perform_linear_mcp(X_train_ws_scaled, y_train_ws, X_test_ws_scaled)
    lin_model_wd, y_pred_wd = perform_linear_mcp(X_train_wd_scaled, y_train_wd, X_test_wd_scaled)
    
    # Evaluate the models
    _, _, corr_ws = evaluate_mcp_model(y_test_ws, y_pred_ws, f'Wind Speed - {data_name}')
    _, _, corr_wd = evaluate_mcp_model(y_test_wd, y_pred_wd, f'Wind Direction - {data_name}')
    
    return lin_model_ws, lin_model_wd, corr_ws, corr_wd

In [None]:
# Process the measurement data
df_buoy2_processed = process_meas_data(buoy2_df)
df_buoy6_processed = process_meas_data(buoy6_df)

era5_selected = era5_data[['windspeed_era_100', 'winddirection_era_100', 'Time [UTC]']].copy()
era5_selected.rename(columns={'windspeed_era_100': 'era5_WS100', 'winddirection_era_100': 'era5_WD100', 'Time [UTC]': 'time'}, inplace=True)
era5_selected['time'] = pd.to_datetime(era5_selected['time'])


# Align measurement data with ERA5 data
aligned_data_buoy2 = merge_data(df_buoy2_processed, era5_selected, 'Buoy2')
aligned_data_buoy6 = merge_data(df_buoy6_processed, era5_selected, 'Buoy6')

# Correlate wind data for Buoy2
lin_model_ws_buoy2, lin_model_wd_buoy2, corr_ws_buoy2, corr_wd_buoy2 = correlate_wind_data(aligned_data_buoy2, 'Buoy2')

# Correlate wind data for Buoy6
lin_model_ws_buoy6, lin_model_wd_buoy6, corr_ws_buoy6, corr_wd_buoy6 = correlate_wind_data(aligned_data_buoy6, 'Buoy6')