<a href="https://colab.research.google.com/github/ilya-talankin/LSTM-power-forecasting/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Функция для трансормации таблицы с данными в подходящую для LSTM размерность

(кол-во измерений, кол-во временных шагов, кол-во параметров) т. е. -> (n, historyInterval, 50)

In [None]:
def prepareData(dataCsv, historyInterval):
    x = []
    y = []
    power = dataCsv.pop('power_normed')
    power = np.array(power)
    features = np.array(dataCsv)
    for i in range(historyInterval, len(features) - historyInterval):
        x.append(features[i - historyInterval : i])
        y.append(sum(power[i : i + 5]))
    return np.array(x), np.array(y)


Функция для разделения данных на тестовые, тренировочные и валидационные.

test_ratio - какая часть данных будет использована для тестирования

In [None]:
from collections import namedtuple
Splitted = namedtuple("Splitted", ["test", "train", "val"])

In [None]:
def splitData(x, y, test_ratio = 0.8):

    test_idx = int(test_ratio * x.shape[0])
    x_test = x[test_idx:]
    y_test = y[test_idx:]
    x_train, x_val, y_train, y_val = train_test_split(x[:test_idx], y[:test_idx], test_size=0.1, shuffle=False)

    return Splitted(x_test, x_train, x_val), Splitted(y_test, y_train, y_val)


0. Необходимые модули

In [None]:
# includes
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras import optimizers
import tensorflow as tf
import os
from sklearn.model_selection import train_test_split

1. Загрузка данных с гугл диска

In [None]:

from google.colab import drive
drive.mount('/content/drive')

# read data
os.chdir('/content/drive/MyDrive/Solar-Power-Forecasting/GermanSolarFarm/data/')
csv_files_list = [f for f in os.listdir('.') if f.endswith('.csv')]
all_stations_data = []
for csv_file_name in csv_files_list:
    station_data = pd.read_csv(csv_file_name, delimiter=';')
    station_data = station_data.drop('Unnamed: 51', axis=1)
    all_stations_data.append(station_data)

2. Создание модели

In [None]:
historyInterval = 10 # данные каждые 3 часа -> 16 * 3 = 48 часов
model = Sequential()
model.add(LSTM(128, activation='relu', input_shape=(historyInterval, 50), return_sequences=True))
model.add(LSTM(64, activation='relu'))
model.add(Dense(1))
model.compile(loss='mse', optimizer=optimizers.Adam(learning_rate=1e-4), metrics=['mae'])
model.summary()

3. Обучение

In [None]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=1, min_lr=0.001)

In [None]:
import random
history = dict({'loss': [], 'mae': [], 'val_loss': [], 'val_mae': []})
for i in range(0, 50):
    print('Epoch number ', i)
    random.shuffle(all_stations_data);
    for station_data in all_stations_data:
        x, y = prepareData(station_data.copy(), historyInterval)
        x, y = splitData(x, y)
        model.fit(x.train, y.train, epochs=1, validation_data=(x.val, y.val), callbacks=[reduce_lr])
        history['loss'].append(model.history.history['loss'])
        history['mae'].append(model.history.history['mae'])
        history['val_loss'].append(model.history.history['val_loss'])
        history['val_mae'].append(model.history.history['val_mae'])

In [None]:
import matplotlib.pyplot as plt

# Get the training history
history = model.history.history

# Plot the training and validation loss over time
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.ylim(0,1)
plt.legend(['Train', 'Validation'], loc='upper right')
plt.show()

# Plot the training and validation accuracy over time
plt.plot(history['mae'])
plt.plot(history['val_mae'])
plt.title('Model mae')
plt.ylabel('Mae')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='lower right')
plt.show()

In [None]:
history