In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Import các modules của sklearn và tensorflow cần thiết
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import keras_tuner as kt  # sử dụng alias kt cho Keras Tuner
from kerastuner.tuners import Hyperband
from keras_tuner import BayesianOptimization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

file_path ='./updated_data_with_time.csv'
df = pd.read_csv(file_path, encoding='latin-1')

In [10]:
df

Unnamed: 0,Country,Continent,Year,Element,Months,TempC,Time
0,Afghanistan,Asia,1961,Temperature change,2,-1.787,1961-02-01
1,Afghanistan,Asia,1962,Temperature change,2,2.390,1962-02-01
2,Afghanistan,Asia,1963,Temperature change,2,3.875,1963-02-01
3,Afghanistan,Asia,1964,Temperature change,2,-0.218,1964-02-01
4,Afghanistan,Asia,1965,Temperature change,2,-0.100,1965-02-01
...,...,...,...,...,...,...,...
191218,Zimbabwe,Africa,2019,Temperature change,9,-0.003,2019-09-01
191219,Zimbabwe,Africa,2020,Temperature change,9,0.431,2020-09-01
191220,Zimbabwe,Africa,2021,Temperature change,9,1.108,2021-09-01
191221,Zimbabwe,Africa,2022,Temperature change,9,0.393,2022-09-01


In [11]:
df = df[df['Country'] == 'Germany']
df = df.sort_values(by='Time')
df['Time'] = pd.to_datetime(df['Time'])
df

Unnamed: 0,Country,Continent,Year,Element,Months,TempC,Time
21562,Germany,Europe,1961,Temperature change,1,-0.204,1961-01-01
5634,Germany,Europe,1961,Temperature change,2,4.187,1961-02-01
69357,Germany,Europe,1961,Temperature change,3,2.758,1961-03-01
53394,Germany,Europe,1961,Temperature change,4,3.224,1961-04-01
85320,Germany,Europe,1961,Temperature change,5,-1.325,1961-05-01
...,...,...,...,...,...,...,...
101339,Germany,Europe,2023,Temperature change,8,2.143,2023-08-01
181013,Germany,Europe,2023,Temperature change,9,4.228,2023-09-01
165110,Germany,Europe,2023,Temperature change,10,3.175,2023-10-01
149207,Germany,Europe,2023,Temperature change,11,1.552,2023-11-01


In [12]:
df = df[['Time','TempC']]
df = df.set_index('Time')
df

Unnamed: 0_level_0,TempC
Time,Unnamed: 1_level_1
1961-01-01,-0.204
1961-02-01,4.187
1961-03-01,2.758
1961-04-01,3.224
1961-05-01,-1.325
...,...
2023-08-01,2.143
2023-09-01,4.228
2023-10-01,3.175
2023-11-01,1.552


In [13]:
data = df['TempC'].values

In [15]:
#Normalize
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data.reshape(-1,1))

In [16]:
# Function to create sequences for LSTM model
def create_sequences(data, sequence_length):
    xs = []
    ys = []
    for i in range(len(data)-sequence_length-1):
        x = data[i:(i+sequence_length)]
        y = data[i+sequence_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Prepare the data
sequence_length = 24
X, y = create_sequences(scaled_data, sequence_length)

In [17]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
def build_model(hp):
    model = Sequential()
    
        # Tune the number of LSTM layers
    for i in range(hp.Int('num_layers', min_value=1, max_value=3, step=1)):
        # Tune the number of units in each LSTM layer
        model.add(LSTM(units=hp.Int(f'units_{i}', min_value=32, max_value=128, step=32),
                       activation=hp.Choice('activation', values=['relu', 'tanh']),
                       return_sequences=True if i < hp.get('num_layers') - 1 else False))
    model.add(Dropout(hp.Float('dropout', min_value=0.0, max_value=0.5, step=0.1)))
    # Output layer
    model.add(Dense(1))
    
    # Compile model with a tunable learning rate
    model.compile(
        optimizer=Adam(
            hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
        ),
        loss='mse'
    )
    
    return model

In [21]:
from kerastuner.tuners import RandomSearch
from tensorflow.keras.callbacks import EarlyStopping

# Định nghĩa tuner (với RandomSearch, có thể thay bằng Hyperband hoặc Bayesian nếu muốn)
tuner = RandomSearch(
    build_model,  # Hàm build_model như bạn đã định nghĩa
    objective='val_loss',  # Tối ưu hóa dựa trên validation loss
    max_trials=10,         # Số lần thử các tổ hợp siêu tham số khác nhau
    executions_per_trial=1, # Số lần thực thi mỗi thử nghiệm để tính trung bình
    directory='tuner_dir',  # Thư mục để lưu kết quả của tuner
    project_name='tempC_prediction'
)

# Bắt đầu tìm kiếm siêu tham số tốt nhất
tuner.search(X_train, y_train, epochs=100, validation_data=(X_test, y_test), 
             callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

# Lấy mô hình tốt nhất
best_model = tuner.get_best_models(num_models=1)[0]

# Huấn luyện mô hình tốt nhất với dữ liệu thực tế (có thể tiếp tục từ kết quả trước đó)
history = best_model.fit(X_train, y_train, epochs=50, validation_data=(X_test, y_test), 
                         callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

# Sau khi huấn luyện, bạn có thể đánh giá mô hình hoặc dự đoán với dữ liệu test
test_loss = best_model.evaluate(X_test, y_test)
print("Test Loss:", test_loss)


Trial 10 Complete [00h 00m 08s]
val_loss: 0.0171175766736269

Best val_loss So Far: 0.0171175766736269
Total elapsed time: 00h 02m 17s
Epoch 1/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 28ms/step - loss: 0.0283 - val_loss: 0.0213
Epoch 2/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.0211 - val_loss: 0.0173
Epoch 3/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.0197 - val_loss: 0.0177
Epoch 4/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.0217 - val_loss: 0.0171
Epoch 5/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 0.0180 - val_loss: 0.0175
Epoch 6/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 0.0185 - val_loss: 0.0175
Epoch 7/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.0218 - val_loss: 0.0171
Epoch 8/50
[1m19/19[0m [32m━━