In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense,Dropout
import tensorflow as tf
import random
from sklearn.model_selection import TimeSeriesSplit
import statsmodels.api as sm

2024-09-08 11:53:23.724186: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-08 11:53:23.724263: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-08 11:53:23.727607: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [9]:
seed_value = 42
np.random.seed(seed_value)
tf.random.set_seed(seed_value)
random.seed(seed_value)

In [35]:
df = pd.read_csv('../input/amirkabirdam/dam12.csv')
df.head(2)

Unnamed: 0,DamId,DateRow,Torbin_out,TakhlieAbgir_out,Sarriz_out,TabKhir_out,Total_out,TotalMasraf_out,TakhlieRosob_out,Cheshme_out,...,NamNesbi6_5,NamNesbi12_5,NamNesbi18_5,TolGeo,ArzGeo,MaxErtefaAzDarya,MinErtefaAzDarya,KafDaryache,GonjayeshKolMakhzan,GonjayeshMofidMakhzan
0,1,1966-05-11,1.63296,0.0,0.0,0.0,1.63296,0.22464,0,0,...,0.0,0.0,0.0,54.25563,35.252621,1765,1600,1595,183,177.3
1,1,1970-10-08,0.78624,0.0,0.0,0.0,0.78624,0.45792,0,0,...,0.0,0.0,0.0,54.25563,35.252621,1765,1600,1595,183,177.3


In [36]:
df['humidity']=df[['NamNesbi6_5', 'NamNesbi12_5','NamNesbi18_5']].mean(axis=1)
df['daraje']= df[['MinDarajehararat_in','MaxDarajehararat_in']].mean(axis=1)

In [37]:
df['Total_in1']=df['Total_in'].shift(1)
#df['Total_in2']= df['Total_in'].shift(2)
#df['Total_out1']=df['Total_out'].shift(1)
#df['Total_out2']=df['Total_out'].shift(2)
df['Barandegi_in1']=df['Barandegi_in'].shift(1)
df['Barandegi_in2']=df['Barandegi_in'].shift(2)

In [38]:
df.dropna(axis=0, inplace=True)

In [39]:
target = 'Total_in'
features = ['Barandegi_in2','Barandegi_in1','Total_in1','Barandegi_in','TotalAbBarfVBaran_in','DebiSeilabm3c','daraje','VazeHava_in','SoratBad_in' ,'humidity']

In [40]:
df['DateRow'] = pd.to_datetime(df['DateRow'])
df = df.sort_values(by= 'DateRow')
df = df[df['DateRow']>'2015']
df = df[features + [target]]

In [41]:
df[df.columns[1:]].corr()['Total_in'][:]

Barandegi_in1           0.041937
Total_in1               0.025842
Barandegi_in            0.203749
TotalAbBarfVBaran_in    0.185436
DebiSeilabm3c           0.328554
daraje                  0.165066
VazeHava_in             0.112074
SoratBad_in             0.001046
humidity               -0.147949
Total_in                1.000000
Name: Total_in, dtype: float64

In [42]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df[features + [target]])

In [66]:
def create_dataset_multistep(dataset, time_step=10, forecast_horizon=7):
    X, y = [], []
    for i in range(len(dataset)-time_step-forecast_horizon):
        X.append(dataset[i:(i+time_step), :])
        y.append(dataset[i+time_step+forecast_horizon, -1])
    return np.array(X), np.array(y)

time_step = 60
forecast_horizon = 14
X, y = create_dataset_multistep(scaled_data, time_step, forecast_horizon)

In [70]:
tscv = TimeSeriesSplit(n_splits = 7)
train_rmse = []
test_rmse = []
train_r2 = []
test_r2 = []
i=0
for train_index, test_index in tscv.split(X):
    i+=1
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = y[train_index], y[test_index]
    if i>=3:
        #print('train:',X_train.shape ,  Y_train.shape)
        #print('train:',X_test.shape ,  Y_test.shape)
        model = Sequential()
        model.add(LSTM(60, input_shape=(time_step, len(features+[target]))))
        model.add(Dense(256))
        model.add(Dropout(0.5))
        model.add(Dense(512))
        model.add(Dense(512))
        model.add(Dense(64))
        model.add(Dense(1))
        model.compile(optimizer='adam', loss='mean_squared_error')

        model.fit(X_train, Y_train, batch_size=64, epochs=20, validation_data=(X_test, Y_test))
        train_predict = model.predict(X_train)
        test_predict = model.predict(X_test)

        train_predict = np.clip(train_predict, 0, 1)
        test_predict = np.clip(test_predict, 0, 1)

        train_predict = scaler.inverse_transform(np.concatenate((X_train[:, -1, :-1], train_predict), axis=1))[:, -1]
        test_predict = scaler.inverse_transform(np.concatenate((X_test[:, -1, :-1], test_predict), axis=1))[:, -1]

        orig_train_y = scaler.inverse_transform (np.concatenate ((X_train[:, -1, :-1], Y_train.reshape(-1,1)), axis=1) )[:, -1]
        orig_test_y = scaler.inverse_transform(np.concatenate((X_test[:, -1, :-1], Y_test.reshape(-1,1)), axis=1))[:, -1]


        train_rmse .append (np.sqrt(mean_squared_error(orig_train_y, train_predict)))
        test_rmse.append ( np.sqrt(mean_squared_error(orig_test_y, test_predict)))
        train_r2 .append ( r2_score(orig_train_y, train_predict))
        test_r2 .append ( r2_score(orig_test_y, test_predict))
    
print(f"Train RMSE: {np.mean(train_rmse)}, Test RMSE: {np.mean(test_rmse)}")
print(f"Train R2: {np.mean(train_r2)}, Test R2: {np.mean(test_r2)}")
    
   
        

  super().__init__(**kwargs)


Epoch 1/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 71ms/step - loss: 0.3025 - val_loss: 0.0233
Epoch 2/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - loss: 0.0144 - val_loss: 0.0046
Epoch 3/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - loss: 0.0067 - val_loss: 0.0011
Epoch 4/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step - loss: 0.0051 - val_loss: 7.4304e-04
Epoch 5/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step - loss: 0.0050 - val_loss: 7.2734e-04
Epoch 6/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 62ms/step - loss: 0.0046 - val_loss: 7.0379e-04
Epoch 7/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 55ms/step - loss: 0.0045 - val_loss: 6.8657e-04
Epoch 8/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 47ms/step - loss: 0.0022 - val_loss: 0.0014
[1m79/79[0m [32m━━━━━━━━━━━━━