In [22]:
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
import time
import networkx as nx
import numpy as np
import pandas as pd

import torch
from torch.utils.data import DataLoader
from torch_geometric.utils import from_networkx

from utils.prep_data import load_data, split_data, mask_data, Experiment
from utils.train import train
from utils.dataset import WindFarmDataset
from GCGRU.GRU import GRU
from GCGRU.GCGRU import GCGRU
from copy import deepcopy

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
full_graph = False

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, median_absolute_error, mean_squared_error
import pandas as pd
import numpy as np
import math

cuda


In [23]:
data = load_data(columns=["TurbID", "Wspd", "Wdir", "Etmp", "Itmp", "Ndir", "Pab1", "Pab2", "Pab3", "Prtv", "Patv", "datetime", "P_norm"])
nan_mask = ~data["Patv"].isna().to_numpy()
# subset of turbines for faster experiments
if full_graph:
    turbines_idx = data.TurbID.unique()
else:
    turbines_idx = [9, 10, 11, 12, 31, 32, 33, 34, 35, 52, 53, 54, 55, 56, 57]
data = data[data["TurbID"].isin(turbines_idx)]

data = data.sort_values(["datetime", "TurbID"]).reset_index(drop=True)
data['T'] = data.groupby("TurbID").cumcount()

# normalize features
features = ["Wspd", "Wdir", "Etmp", "Itmp", "Ndir", "Pab1", "Pab2", "Pab3", "Prtv", "Patv"]
data[features] = data[features].apply(lambda col: ((col - col.min()) / (col.max() - col.min())))

train_data, val_data, test_data = split_data(data, splits=[0.7, 0.2, 0.1])

In [24]:
random_percentages = [(0.01, None), (0.02, None), (0.05, None), (0.1, None)]
blackout_periods = [(30, 0.01), (60, 0.01), (150, 0.01), (300, 0.01)]
# blackout_periods = [(30, 0.01), (30, 0.02), (30, 0.05), (30, 0.1), (60, 0.01), (60, 0.02), (60, 0.05), (60, 0.1), (150, 0.01), (150, 0.02), (150, 0.05), (150, 0.1), (300, 0.01), (300, 0.02), (300, 0.05), (300, 0.1)]
maintenance_periods = [(1, 0.01), (2, 0.01), (7, 0.01), (14, 0.01)]
# maintenance_periods = [(1, 0.01), (1, 0.02), (1, 0.05), (1, 0.1), (2, 0.01), (2, 0.02), (2, 0.05), (2, 0.1), (7, 0.01), (7, 0.02), (7, 0.05), (7, 0.1), (14, 0.01), (14, 0.02), (14, 0.05), (14, 0.1)]

In [25]:
train_masks_random = { (size, fraction): mask_data(train_data, base_mask=None, experiment=Experiment.RANDOM, size = size, fraction=fraction) for (size, fraction) in random_percentages }
val_masks_random = { (size, fraction): mask_data(val_data, base_mask=None, experiment=Experiment.RANDOM, size = size, fraction=fraction) for (size, fraction) in random_percentages }
test_masks_random = { (size, fraction): mask_data(test_data, base_mask=None, experiment=Experiment.RANDOM, size = size, fraction=fraction) for (size, fraction) in random_percentages }

In [26]:
train_masks_blackout = { (size, fraction): mask_data(train_data, base_mask=None, experiment=Experiment.BLACKOUT, size=size, fraction=fraction) for (size, fraction) in blackout_periods }
val_masks_blackout = { (size, fraction): mask_data(val_data, base_mask=None, experiment=Experiment.BLACKOUT, size=size, fraction=fraction) for (size, fraction) in blackout_periods }
test_masks_blackout = { (size, fraction): mask_data(test_data, base_mask=None, experiment=Experiment.BLACKOUT, size=size, fraction=fraction) for (size, fraction) in blackout_periods }

In [27]:
train_masks_maintenance = { (size, fraction): mask_data(train_data, base_mask=None, experiment=Experiment.MAINTENANCE, size=size, fraction=fraction) for (size, fraction) in maintenance_periods }
val_masks_maintenance = { (size, fraction): mask_data(val_data, base_mask=None, experiment=Experiment.MAINTENANCE, size=size, fraction=fraction) for (size, fraction) in maintenance_periods }
test_masks_maintenance = { (size, fraction): mask_data(test_data, base_mask=None, experiment=Experiment.MAINTENANCE, size=size, fraction=fraction) for (size, fraction) in maintenance_periods }

In [28]:
experiment_list = [Experiment.RANDOM, Experiment.BLACKOUT, Experiment.MAINTENANCE]
stages = ["train", "val", "test"]
for i, masks in enumerate([train_masks_random, val_masks_random, test_masks_random, train_masks_blackout, val_masks_blackout, test_masks_blackout, train_masks_maintenance, val_masks_maintenance, test_masks_maintenance]):
    pd.DataFrame(masks).to_csv(f"../data/masks_{experiment_list[math.floor(i/3)]}_{stages[i%3]}_{full_graph}.csv")