In [24]:
import numpy as np
import pandas as pd
import pickle, csv
import os

from keras.models import Sequential, load_model, Model
from keras.layers import LSTM, Dense, Dropout, Input, concatenate, Flatten, Reshape
from keras import regularizers
from keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
from keras.preprocessing.sequence import TimeseriesGenerator
import tensorflow.keras.backend as K
from skimage.transform import resize

from matplotlib import pyplot as plt
from matplotlib import style
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import seaborn as sns

In [25]:
# Load powerprice data and format it
df_powerprice = pd.read_csv("data/PowerPrice.csv")
df_powerprice["Time"] = pd.to_datetime(df_powerprice["Time"], infer_datetime_format=True)
df_powerprice = df_powerprice.drop(columns=["Time"])

In [26]:
# read data from pickled files and concatenate into a single array
theArrays = []
for filename in os.listdir("data/Spatial"):
    if filename.endswith(".pickle"):
        with open(os.path.join("data/Spatial", filename), "rb") as file:
            theArrays.append(pickle.load(file)[1:,:])
        print(filename)

combined_array = np.concatenate(theArrays, axis=0)

# define regions and calculate number of rows per region
num_regions = 99
rows_per_region = combined_array.shape[0] // num_regions
regions = ["Region{}".format(i) for i in range(1, num_regions + 1)]

# create a DataFrame for each region and store in a list
region_dfs = []
for i, region in enumerate(regions):
    region_array = combined_array[i*rows_per_region:(i+1)*rows_per_region, :]
    region_df = pd.DataFrame(region_array, columns=pd.date_range('2019-01-01 00:00:00', '2019-12-31 23:45:00', freq='15min'), 
                             index=["Biogas (kW)", "Brown Coal (kW)", "Solar (kW)", "Wind Onshore (kW)", "Demand (kW)",
                                    "Hard Coal (kW)", "Nuclear (kW)", "Oil (kW)", "Water Laufwasser (kW)", "Water Pumpspeicher PB (kW)",
                                    "Water Pumpspeicher TB (kW)", "Water Speicherwasser (kW)", "Wind Offshore (kW)"]).transpose()
    region_dfs.append(region_df)

biogas_kw_2019.pickle
brown_coal_kw_2019.pickle
completed_solar_kw_2019.pickle
completed_wind_onshore_kw_2019.pickle
demand_kw_2019.pickle
hard_coal_kw_2019.pickle
nuclear_kw_2019.pickle
oil_kw_2019.pickle
water_laufwasser_kw_2019.pickle
water_pumpspeicher_PB_kw_2019.pickle
water_pumpspeicher_TB_kw_2019.pickle
water_speicherwasser_kw_2019.pickle
wind_offshore_kw_2019.pickle


In [27]:
#load the mask.csv
mask_df = pd.read_csv("germany/mask.csv", header=None)
mask_df.shape

(40, 40)

In [28]:
# Transpose the data_array to shape (35040, 13, 99)
data_array = np.transpose(theArrays, (0, 2, 1))
data_array.shape

(13, 35040, 99)

In [29]:
# convert DataFrame to numpy array
mask_arr = np.array(mask_df)

# slice array along third dimension
mask_resized = resize(mask_arr[...,0], (13, 40, 40))
mask_resized = np.transpose(mask_resized, (2, 1, 0))
mask_resized.shape

(40, 40, 13)