In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!cp -r /content/drive/MyDrive/ML_LSTM/dataset/ /content/
!cp -r /content/drive/MyDrive/ML_LSTM/LSTM_1/ /content/
!cp -r /content/drive/MyDrive/ML_LSTM/LSTM_2/ /content/

In [3]:
# coding: utf-8
import csv
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Activation
from keras.layers.recurrent import LSTM
from keras import optimizers
from keras.callbacks import ModelCheckpoint
from keras import metrics

In [4]:
# CSVファイルからデータをDataFrame型で取得する
def get_df_data():
    df_data = pd.read_csv('dataset/dataset_test.csv', index_col=0, parse_dates=True, skiprows = 1, encoding = 'utf8', header=None)
    df_data.columns = [
        'sensor1',
        'sensor2',
        'sensor3',
        'sensor4',
        'feed1',
        'feed2',
        'feed3',
        'feed4',
    ]
    return df_data

In [5]:

# モデルに読み込ませるデータを生成する
def generate_data(data, length_per_unit, dimension):
    # DataFrame→array変換
    data_array = data.values
    # 時系列データを入れる箱
    sequences = []
    # 正解データを入れる箱
    target = []
    # 正解データの日付を入れる箱
    target_date = []

    # 一グループごとに時系列データと正解データをセットしていく
    for i in range(0, data_array.shape[0] - length_per_unit):
        sequences.append(data_array[i:i + length_per_unit])
        target.append(data_array[i + length_per_unit])
        target_date.append(data[i + length_per_unit: i + length_per_unit + 1].index.strftime('%Y/%m/%d'))

    # 時系列データを成形
    X = np.array(sequences).reshape(len(sequences), length_per_unit, dimension)
    # 正解データを成形
    Y = np.array(target).reshape(len(sequences), dimension)
    # 正解データの日付データを成形
    Y_date = np.array(target_date).reshape(len(sequences), 1)

    return (X, Y, Y_date)


In [6]:

# モデルリストを取得する
def get_model_list(input_shape):
    # 実行するモデル一覧
    model_list = [
        ['LSTM_1', Sequential([
            LSTM(100, input_shape=input_shape), 
            Dense(4), 
            Activation("linear")])], 
        ['LSTM_2', Sequential([
            LSTM(300, input_shape=input_shape), 
            Dense(4), 
            Activation("linear")])], 
    ]

    return model_list


In [7]:
# 扱う特徴量
FEATURE_VALUE = ['feed1','feed2','feed3','feed4']
# 次元数
DIMENSION = len(FEATURE_VALUE)

In [8]:
# 気象データを取得
dfWeather = get_df_data()
print(dfWeather)
# 学習用データを取得(日付順にソート)
df_data_train = dfWeather.loc['2021-01-01':'2023-12-31', FEATURE_VALUE]
df_data_train = df_data_train.sort_index()
df_data_train = df_data_train.dropna() # 欠損値のある行を取り除く
print(df_data_train)
df_data_label = dfWeather.loc['2021-01-01':'2023-12-31', ['sensor1','sensor2','sensor3','sensor4']]
df_data_label = df_data_label.sort_index()
df_data_label = df_data_label.dropna() # 欠損値のある行を取り除く
print(df_data_label)

            sensor1  sensor2  sensor3  sensor4  feed1  feed2  feed3  feed4
0                                                                         
2021-01-01        0        0        0        0     10      0      0      0
2021-01-02        0        0        0        0     10      0      0      0
2021-01-03        0        0        0        0     10      0      0      0
2021-01-04        0        0        0        0     10      0      0      0
2021-01-05       10        0        0        0      8      2      0      0
...             ...      ...      ...      ...    ...    ...    ...    ...
2023-12-27        3        6        1        0      1      3      6      0
2023-12-28        3        5        2        0      1      2      7      0
2023-12-29        2        5        3        0      1      1      8      0
2023-12-30        2        3        5        0      1      1      2      6
2023-12-31        2        2        6        0      1      1      1      7

[1095 rows x 8 columns]


In [9]:
# 一つの時系列データの長さ
LENGTH_PER_UNIT = 10
X_test, Y_test, Y_test_date = generate_data(df_data_train, LENGTH_PER_UNIT, DIMENSION)
# 正規化
X_test = X_test / np.nanmax(np.abs(X_test))
print(X_test)
label_X_test, label_Y_test, label_Y_test_date = generate_data(df_data_label, LENGTH_PER_UNIT, DIMENSION)

# 最適化手法の設定
opt = optimizers.Adam()

# 入力の形状
input_shape=(LENGTH_PER_UNIT, DIMENSION)

# 結果格納用DataFrameのカラム定義
df_columns = ['predict', 'real', 'diff']

model_list = get_model_list(input_shape)

[[[1.  0.  0.  0. ]
  [1.  0.  0.  0. ]
  [1.  0.  0.  0. ]
  ...
  [0.4 0.6 0.  0. ]
  [0.5 0.6 0.  0. ]
  [0.2 0.3 0.5 0. ]]

 [[1.  0.  0.  0. ]
  [1.  0.  0.  0. ]
  [1.  0.  0.  0. ]
  ...
  [0.5 0.6 0.  0. ]
  [0.2 0.3 0.5 0. ]
  [0.1 0.3 0.6 0. ]]

 [[1.  0.  0.  0. ]
  [1.  0.  0.  0. ]
  [0.8 0.2 0.  0. ]
  ...
  [0.2 0.3 0.5 0. ]
  [0.1 0.3 0.6 0. ]
  [0.1 0.2 0.7 0. ]]

 ...

 [[1.  0.  0.  0. ]
  [1.  0.  0.  0. ]
  [0.8 0.2 0.  0. ]
  ...
  [0.2 0.3 0.5 0. ]
  [0.1 0.3 0.6 0. ]
  [0.1 0.2 0.7 0. ]]

 [[1.  0.  0.  0. ]
  [0.8 0.2 0.  0. ]
  [0.8 0.2 0.  0. ]
  ...
  [0.1 0.3 0.6 0. ]
  [0.1 0.2 0.7 0. ]
  [0.1 0.1 0.8 0. ]]

 [[0.8 0.2 0.  0. ]
  [0.8 0.2 0.  0. ]
  [0.7 0.3 0.  0. ]
  ...
  [0.1 0.2 0.7 0. ]
  [0.1 0.1 0.8 0. ]
  [0.1 0.1 0.2 0.6]]]


In [10]:
# 全てのモデルについて実行する
for save_path, model in model_list:
    # 結果を格納するDataFrame
    dfResultWeather = pd.DataFrame(columns=df_columns)
    # フォルダが存在しなければスキップ
    if not os.path.isdir(save_path):
        continue

    # モデルを読み込む
    model = model_from_json(open(save_path + '/Keras_rnn.json').read())

    # 学習結果を読み込む
    model.load_weights(save_path + '/Keras_rnn_weights.h5')

    # モデルの要約を出力
    model.summary()

    # モデルのコンパイル
    model.compile(optimizer = opt,        # 最適化手法
                loss = 'mean_squared_error',           # 損失関数
                metrics = ['accuracy']) # 評価関数

    df_columns = ['predict', 'real', 'diff']
    
    # CSVファイルのヘッダーを準備
    with open(save_path + '/predict_result.csv', 'a') as f:
        writer = csv.writer(f)
        writer.writerow(['date'] + dfWeather.columns.tolist() + ['predict_' + value for value in FEATURE_VALUE])

    # 予測を行う
    for i in range(0, len(X_test)):
    #for i in range(0, X_test[0].size):
        y_ = model.predict(X_test[i:i+1, :, :])
        y_1 = y_[0]
        Y_test_1 = Y_test[i]
        tmp = np.array([float(y_1[0]), float(Y_test_1[0]), abs(float(y_1[0]) - float(Y_test_1[0]))])
        tmp = np.ravel(tmp)
        # print(Y_test_date[i:i+1][0])
        dfResult = pd.DataFrame([tmp], columns=df_columns, index=Y_test_date[i:i+1][0])
        dfResultWeather = dfResultWeather.append(dfResult)

        # 統計情報を表示
        print('統計情報:')
        print(dfResultWeather.describe())

        # 差(予測-実測値)の最大値
        print(dfResultWeather[dfResultWeather['diff'] >= 5])

        # # グラフ描画
        # dfResultWeather.plot(y=['predict', 'real'], figsize=(16,10), title='', grid=True)
        # plt.savefig(save_path + "/predict.png")

        # dfResultWeather.plot(y=['diff'], figsize=(16,10), title='', grid=True)
        # plt.savefig(save_path + "/predict_diff.png")

        print(Y_test_date[i:i+1][0].tolist() + label_Y_test[i].tolist() + Y_test_1.tolist() + y_1.tolist())
        # CSVに書き込み
        with open(save_path + '/predict_result.csv', 'a') as f:
            writer = csv.writer(f)
            writer.writerow(Y_test_date[i:i+1][0].tolist() + label_Y_test[i].tolist() + Y_test_1.tolist() + y_1.tolist())


plt.close()
            

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
統計情報:
          predict        real        diff
count  886.000000  886.000000  886.000000
mean     4.557911    3.658014    1.405775
std      3.979337    4.094557    2.201646
min     -0.185292    0.000000    0.001616
25%      0.593692    0.000000    0.143322
50%      4.150813    1.000000    0.625091
75%      9.168712    8.000000    1.284248
max     10.183802   10.000000   10.183802
             predict  real      diff
2021/01/18  6.331800   0.0  6.331800
2021/02/05  5.008890   0.0  5.008890
2021/02/06  7.307691   0.0  7.307691
2021/02/07  5.366075   0.0  5.366075
2021/02/08  9.168712   0.0  9.168712
...              ...   ...       ...
2023/05/08  6.331800   0.0  6.331800
2023/05/26  5.008890   0.0  5.008890
2023/05/27  7.307691   0.0  7.307691
2023/05/28  5.366075   0.0  5.366075
2023/05/29  9.168712   0.0  9.168712

[89 rows x 3 columns]
['2023/06/15', 1, 1, 7, 1, 0, 0, 1, 9, -0.18529248237609863, 0.2618579864501953, 1.7

In [12]:
!cp -r /content/LSTM_1/predict_result.csv /content/drive/MyDrive/ML_LSTM/LSTM_1/
!cp -r /content/LSTM_2/predict_result.csv /content/drive/MyDrive/ML_LSTM/LSTM_2/