In [1]:
from sklearn.preprocessing import StandardScaler
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from keras.models import Sequential
from keras.layers import LSTM, Dense ,Dropout, Bidirectional, Input
from keras.optimizers import Nadam
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import ParameterGrid
from tqdm import tqdm

In [7]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [3]:
def create_model(x_train, 
                 n_forecast=4, 
                 units=30, 
                 n_timesteps=30,
                 optimizer='adam',
                 loss='mean_squared_error',
                 metrics='mae'):
    regressor = Sequential()
    
    # Используем Input для явного задания формы входных данных
    regressor.add(Input(shape=(x_train.shape[1], 1)))
    
    regressor.add(Bidirectional(LSTM(units=units, return_sequences=True)))
    regressor.add(Dropout(0.2))
    
    regressor.add(LSTM(units=units, return_sequences=True))
    regressor.add(Dropout(0.2))
    
    regressor.add(LSTM(units=units, return_sequences=True))
    regressor.add(Dropout(0.2))
    
    regressor.add(LSTM(units=units))
    regressor.add(Dropout(0.2))
    
    regressor.add(Dense(units=n_forecast, activation='linear'))
    
    # Компиляция модели
    regressor.compile(optimizer=optimizer, loss=loss, metrics=[metrics])

    return regressor


def create_train_test_data(dataset, percent_train_data):
    size = len(dataset)
    target_idx = int((size / 100) * percent_train_data)

    train = dataset[:target_idx]
    test = dataset[target_idx:]

    return train, test, target_idx


def create_data(data, n_timesteps, n_forecast):
    x_data = []
    y_data = []

    for i in range(len(data)-n_timesteps-n_forecast+1):
        x_data.append(data[i:i+n_timesteps, 0])
        y_data.append(data[i+n_timesteps:i+n_timesteps+n_forecast, 0])

    x_data, y_data = np.array(x_data), np.array(y_data)
    x_data = np.reshape(x_data, (x_data.shape[0], x_data.shape[1], 1))

    return x_data, y_data

In [12]:
# model_params = ParameterGrid({"units": [15, 30, 60, 90, 150],
#                               "n_timesteps": [15, 30, 60],
#                               "optimizer": ['adam', 'rmsprop', 'nadam'],
#                               "loss": ['mean_absolute_error', 'mean_squared_error'],
#                               "epochs" : [250],
#                               "batch_size": [64]})

In [4]:
model_params = ParameterGrid({"units": [15, 30, 60, 90, 150],
                              "n_timesteps": [15, 30, 60],
                              "optimizer": ['adam', 'rmsprop', 'nadam'],
                              "loss": ['mean_absolute_error'],
                              "epochs" : [500],
                              "batch_size": [32]})

In [5]:
scaler = MinMaxScaler(feature_range=(0,1))

In [6]:
df = pd.read_csv('dataset_days.csv', sep=',', index_col="time")
df.head()

Unnamed: 0_level_0,Среднняя t,Минимальная t,Максимальная t,Суточная сумма осадков,Глубина снега,Скорость ветра,Среднее давление hPa,Относительная влажность в %
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-05-24,13.6,8.9,17.5,0.0,0.0,4.7,1012.7,64.125
2019-05-25,15.6,11.8,18.9,0.3,0.0,4.6,1010.2,56.625
2019-05-26,18.1,14.3,23.6,0.0,0.0,2.4,1004.7,62.375
2019-05-27,16.7,12.5,21.2,0.0,0.0,4.7,1008.5,52.5
2019-05-28,22.0,15.9,28.4,0.0,0.0,2.9,1014.1,48.25


In [7]:
temp = df.iloc[:,:1].values
train, test, size = create_train_test_data(temp, 70)

In [8]:
train_scal = scaler.fit_transform(train)
test_scal = scaler.transform(test)

In [18]:
# d = {"params": [],
#      "day1": [],
#      "day2": [],
#      "day3": [],
#      "day4": [],
#      "day5": [],
#      "day6": [],
#      "day7": [],
#      "day8": [],
#      "day9": [],
#      "day10": []}

# df = pd.DataFrame(d)

# df.to_csv('res_test_model.csv')

In [9]:
dataset = pd.read_csv('res_test_model.csv', sep=',')

In [10]:
dataset

Unnamed: 0,params,day1,day2,day3,day4,day5,day6,day7,day8,day9,day10
0,"{'batch_size': 64, 'epochs': 250, 'loss': 'mea...",2.208369,3.012873,3.469074,3.681408,3.790077,3.967572,4.163516,4.344530,4.502006,4.648923
1,"{'batch_size': 64, 'epochs': 500, 'loss': 'mea...",2.306652,3.098385,3.523147,3.719802,3.841857,3.979302,4.117654,4.265179,4.389103,4.537787
2,"{'batch_size': 64, 'epochs': 250, 'loss': 'mea...",2.355383,3.169108,3.577043,3.785415,3.894658,4.068782,4.280042,4.464551,4.625480,4.755267
3,"{'batch_size': 64, 'epochs': 500, 'loss': 'mea...",2.376186,3.120337,3.527182,3.780602,3.925304,4.153919,4.367965,4.543726,4.708529,4.813364
4,"{'batch_size': 64, 'epochs': 250, 'loss': 'mea...",2.394197,3.202948,3.605310,3.816377,3.949121,4.130400,4.312463,4.462433,4.609981,4.769575
...,...,...,...,...,...,...,...,...,...,...,...
175,"{'batch_size': 64, 'epochs': 500, 'loss': 'mea...",4.721040,4.955592,5.058608,5.024088,4.997010,5.164393,5.392358,5.600112,5.866277,6.051541
176,"{'batch_size': 64, 'epochs': 500, 'loss': 'mea...",4.961529,4.820623,4.655858,4.726391,4.869909,5.047732,5.242114,5.502705,5.586183,5.527735
177,"{'batch_size': 64, 'epochs': 500, 'loss': 'mea...",5.249108,5.457809,5.505164,5.519813,5.521354,5.614072,5.765991,5.915891,5.994523,6.010291
178,"{'batch_size': 64, 'epochs': 500, 'loss': 'mea...",5.264601,5.381605,5.280178,5.227171,5.331639,5.568300,5.795400,5.928228,5.937462,5.921612


In [11]:
for item in tqdm(model_params):

    d = {"params": [],
    "day1": [],
    "day2": [],
    "day3": [],
    "day4": [],
    "day5": [],
    "day6": [],
    "day7": [],
    "day8": [],
    "day9": [],
    "day10": []}
    
    x_train, y_train = create_data(train_scal, item["n_timesteps"], 10)
    x_test, y_test = create_data(test_scal, item["n_timesteps"], 10)
    y_test = scaler.inverse_transform(y_test)

    model = create_model(x_train=x_train,
                        n_forecast=10,
                        units=item["units"],
                        n_timesteps=item["n_timesteps"],
                        optimizer=item["optimizer"],
                        loss=item["loss"])
    
    model.fit(x_train, y_train, epochs=item["epochs"], batch_size=item["batch_size"], verbose=0)

    predict = model.predict(x_test)

    predict = scaler.inverse_transform(predict)

    d["params"].append(item)
    for i in range(10):  # 10 это n_forecast
        mae_day = np.mean(np.abs(y_test[:, i] - predict[:, i]))
        d[f"day{i+1}"].append(mae_day)

    df_item = pd.DataFrame(d)
    dataset = pd.concat([dataset, df_item], axis=0)
    dataset_sort = dataset.sort_values(by=["day1", "day2", "day3", "day4", "day5", "day6", "day7", "day8", "day9", "day10"])
    dataset_sort.to_csv('res_test_model.csv', index=False)

  0%|          | 0/45 [00:00<?, ?it/s]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step


  2%|▏         | 1/45 [03:00<2:12:43, 180.99s/it]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step


  4%|▍         | 2/45 [06:17<2:16:07, 189.95s/it]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step


  7%|▋         | 3/45 [10:19<2:29:44, 213.92s/it]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 77ms/step


  9%|▉         | 4/45 [15:13<2:47:37, 245.30s/it]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step


 11%|█         | 5/45 [23:25<3:43:00, 334.51s/it]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 53ms/step 


 13%|█▎        | 6/45 [26:31<3:04:36, 284.01s/it]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step


 16%|█▌        | 7/45 [29:54<2:43:09, 257.61s/it]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step


 18%|█▊        | 8/45 [33:47<2:33:52, 249.53s/it]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step


 20%|██        | 9/45 [38:39<2:37:39, 262.77s/it]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step


 22%|██▏       | 10/45 [47:01<3:16:24, 336.71s/it]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step


 24%|██▍       | 11/45 [50:15<2:46:09, 293.22s/it]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step


 27%|██▋       | 12/45 [53:51<2:28:11, 269.45s/it]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step


 29%|██▉       | 13/45 [58:05<2:21:14, 264.83s/it]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 53ms/step


 31%|███       | 14/45 [1:03:27<2:25:43, 282.05s/it]

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step


 33%|███▎      | 15/45 [1:12:22<2:59:09, 358.30s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


 36%|███▌      | 16/45 [1:17:46<2:48:10, 347.94s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


 38%|███▊      | 17/45 [1:23:54<2:45:14, 354.09s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step


 40%|████      | 18/45 [1:31:30<2:53:10, 384.82s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step


 42%|████▏     | 19/45 [1:41:10<3:12:07, 443.35s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step


 44%|████▍     | 20/45 [1:57:25<4:11:14, 602.97s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


 47%|████▋     | 21/45 [2:02:57<3:28:43, 521.79s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


 49%|████▉     | 22/45 [2:08:52<3:00:49, 471.74s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step


 51%|█████     | 23/45 [2:16:05<2:48:40, 460.03s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step


 53%|█████▎    | 24/45 [2:25:52<2:54:20, 498.12s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step


 56%|█████▌    | 25/45 [2:41:23<3:29:17, 627.89s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


 58%|█████▊    | 26/45 [2:47:04<2:51:35, 541.89s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step


 60%|██████    | 27/45 [2:53:13<2:27:00, 490.02s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step


 62%|██████▏   | 28/45 [3:01:00<2:16:51, 483.04s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step


 64%|██████▍   | 29/45 [3:10:41<2:16:39, 512.46s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step


 67%|██████▋   | 30/45 [3:26:39<2:41:31, 646.09s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step


 69%|██████▉   | 31/45 [3:36:04<2:25:05, 621.82s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step


 71%|███████   | 32/45 [3:46:28<2:14:51, 622.46s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step


 73%|███████▎  | 33/45 [3:59:13<2:13:04, 665.38s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step


 76%|███████▌  | 34/45 [4:15:46<2:19:59, 763.59s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 57ms/step


 78%|███████▊  | 35/45 [4:44:39<2:55:43, 1054.36s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step


 80%|████████  | 36/45 [4:53:46<2:15:20, 902.23s/it] 

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step


 82%|████████▏ | 37/45 [5:04:54<1:50:54, 831.84s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step


 84%|████████▍ | 38/45 [5:18:18<1:36:05, 823.66s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 55ms/step


 87%|████████▋ | 39/45 [5:34:54<1:27:30, 875.16s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 58ms/step


 89%|████████▉ | 40/45 [6:03:47<1:34:23, 1132.64s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step


 91%|█████████ | 41/45 [6:13:12<1:04:09, 962.33s/it] 

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step


 93%|█████████▎| 42/45 [6:23:28<42:54, 858.26s/it]  

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step


 96%|█████████▌| 43/45 [6:36:25<27:47, 834.00s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 54ms/step


 98%|█████████▊| 44/45 [6:52:59<14:42, 882.15s/it]

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 55ms/step


100%|██████████| 45/45 [7:22:38<00:00, 590.20s/it] 


In [37]:
df = pd.DataFrame(d)

In [31]:
dataset1 = pd.read_csv('res1_test_model.csv', sep=',')
dataset2 = pd.read_csv('res2_test_model.csv', sep=',')

In [32]:
dataset = pd.concat([dataset1, dataset2], axis=0, ignore_index=True)

In [33]:
dataset_sort = dataset.sort_values(by=["day1", "day2", "day3", "day4", "day5", "day6", "day7", "day8", "day9", "day10"])

In [34]:
dataset_sort

Unnamed: 0,params,day1,day2,day3,day4,day5,day6,day7,day8,day9,day10
225,"{'batch_size': 32, 'epochs': 250, 'loss': 'mean_squared_error', 'n_timesteps': 15, 'optimizer': 'adam', 'units': 150}",2.063217,3.001082,3.503886,3.778379,3.896511,4.078947,4.257945,4.387245,4.549065,4.694083
226,"{'batch_size': 32, 'epochs': 250, 'loss': 'mean_absolute_error', 'n_timesteps': 15, 'optimizer': 'adam', 'units': 150}",2.071695,3.028820,3.521742,3.761844,3.928155,4.131037,4.386280,4.545744,4.718516,4.883468
227,"{'batch_size': 32, 'epochs': 250, 'loss': 'mean_absolute_error', 'n_timesteps': 15, 'optimizer': 'adam', 'units': 90}",2.193776,3.084165,3.611866,3.836648,3.948261,4.107571,4.275013,4.466022,4.582503,4.723937
228,"{'batch_size': 32, 'epochs': 250, 'loss': 'mean_absolute_error', 'n_timesteps': 30, 'optimizer': 'rmsprop', 'units': 150}",2.202038,3.102282,3.579905,3.909900,4.127794,4.433159,4.645190,4.649112,4.773598,4.939092
0,"{'batch_size': 64, 'epochs': 250, 'loss': 'mean_absolute_error', 'n_timesteps': 15, 'optimizer': 'adam', 'units': 150}",2.208369,3.012873,3.469074,3.681408,3.790077,3.967572,4.163516,4.344530,4.502006,4.648923
...,...,...,...,...,...,...,...,...,...,...,...
220,"{'batch_size': 32, 'epochs': 500, 'loss': 'mean_absolute_error', 'n_timesteps': 60, 'optimizer': 'adam', 'units': 60}",5.256114,5.328359,5.352008,5.380953,5.403206,5.453575,5.536617,5.608854,5.621505,5.628371
221,"{'batch_size': 64, 'epochs': 500, 'loss': 'mean_squared_error', 'n_timesteps': 60, 'optimizer': 'nadam', 'units': 90}",5.264601,5.381605,5.280178,5.227171,5.331639,5.568300,5.795400,5.928228,5.937462,5.921612
222,"{'batch_size': 64, 'epochs': 500, 'loss': 'mean_absolute_error', 'n_timesteps': 60, 'optimizer': 'nadam', 'units': 60}",5.272865,5.377047,5.499375,5.563915,5.627801,5.695966,5.793273,5.917023,6.001399,6.097872
223,"{'batch_size': 32, 'epochs': 500, 'loss': 'mean_absolute_error', 'n_timesteps': 60, 'optimizer': 'nadam', 'units': 60}",5.403487,5.457704,5.490883,5.527911,5.628851,5.783785,5.753048,5.799045,5.952174,5.990982


In [35]:
dataset_sort['mean'] = dataset_sort[['day1', 'day2', 'day3', 'day4', 'day5', 'day6', 'day7', 'day8', 'day9',
       'day10']].mean(axis=1)

In [36]:
dataset_sort

Unnamed: 0,params,day1,day2,day3,day4,day5,day6,day7,day8,day9,day10,mean
225,"{'batch_size': 32, 'epochs': 250, 'loss': 'mean_squared_error', 'n_timesteps': 15, 'optimizer': 'adam', 'units': 150}",2.063217,3.001082,3.503886,3.778379,3.896511,4.078947,4.257945,4.387245,4.549065,4.694083,3.821036
226,"{'batch_size': 32, 'epochs': 250, 'loss': 'mean_absolute_error', 'n_timesteps': 15, 'optimizer': 'adam', 'units': 150}",2.071695,3.028820,3.521742,3.761844,3.928155,4.131037,4.386280,4.545744,4.718516,4.883468,3.897730
227,"{'batch_size': 32, 'epochs': 250, 'loss': 'mean_absolute_error', 'n_timesteps': 15, 'optimizer': 'adam', 'units': 90}",2.193776,3.084165,3.611866,3.836648,3.948261,4.107571,4.275013,4.466022,4.582503,4.723937,3.882976
228,"{'batch_size': 32, 'epochs': 250, 'loss': 'mean_absolute_error', 'n_timesteps': 30, 'optimizer': 'rmsprop', 'units': 150}",2.202038,3.102282,3.579905,3.909900,4.127794,4.433159,4.645190,4.649112,4.773598,4.939092,4.036207
0,"{'batch_size': 64, 'epochs': 250, 'loss': 'mean_absolute_error', 'n_timesteps': 15, 'optimizer': 'adam', 'units': 150}",2.208369,3.012873,3.469074,3.681408,3.790077,3.967572,4.163516,4.344530,4.502006,4.648923,3.778835
...,...,...,...,...,...,...,...,...,...,...,...,...
220,"{'batch_size': 32, 'epochs': 500, 'loss': 'mean_absolute_error', 'n_timesteps': 60, 'optimizer': 'adam', 'units': 60}",5.256114,5.328359,5.352008,5.380953,5.403206,5.453575,5.536617,5.608854,5.621505,5.628371,5.456956
221,"{'batch_size': 64, 'epochs': 500, 'loss': 'mean_squared_error', 'n_timesteps': 60, 'optimizer': 'nadam', 'units': 90}",5.264601,5.381605,5.280178,5.227171,5.331639,5.568300,5.795400,5.928228,5.937462,5.921612,5.563620
222,"{'batch_size': 64, 'epochs': 500, 'loss': 'mean_absolute_error', 'n_timesteps': 60, 'optimizer': 'nadam', 'units': 60}",5.272865,5.377047,5.499375,5.563915,5.627801,5.695966,5.793273,5.917023,6.001399,6.097872,5.684653
223,"{'batch_size': 32, 'epochs': 500, 'loss': 'mean_absolute_error', 'n_timesteps': 60, 'optimizer': 'nadam', 'units': 60}",5.403487,5.457704,5.490883,5.527911,5.628851,5.783785,5.753048,5.799045,5.952174,5.990982,5.678787


In [37]:
dataset_sort.to_csv("res_test_model.csv", index=False)