In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import sklearn.metrics 
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
# Implement Learning rate decay
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau,ModelCheckpoint,LearningRateScheduler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D
from tensorflow.keras.layers import ConvLSTM2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import LeakyReLU
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from math import sqrt  
from sklearn.metrics import mean_squared_error, mean_absolute_error
import requests
import os
import h5py
from PIL import Image

2023-04-14 00:41:30.368191: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1


In [2]:
def create_dataset_from_raw():
# 遍历当前子目录内所有图像，逐个处理
    dataset = []
    idx = 0
    for i in tqdm(range(1, 10)):
        for j in range(1, 11):
            data = []
            for year in range(2009, 2018):
                file = "target_200/" + str(year) +"_"+ str(i) +"_"+ str(j) + ".jpg"
                # 读入hdf5格式的图像数据，并转换为numpy数组        
                img = Image.open(file)
                original_image = np.array(img)
                original_image = original_image.reshape(1,original_image.shape[0], original_image.shape[1], original_image.shape[2])
                #归一化
                original_image = original_image / 255.0
                
                #将所有年份的数据放入data
                if(year == 2009):
                    data = original_image
                else:
                    data = np.concatenate((data,original_image),axis=0)
            
            #将数据reshape为（sample， year， height， width， depth）
            data = data.reshape(1, data.shape[0], data.shape[1], data.shape[2], data.shape[3])
            if(idx == 0):
                dataset = data
            else:
                dataset = np.concatenate((dataset,data),axis=0)
            idx += 1
    # 将当前批次内所有图像的处理结果存入`dataset`数组中
    return dataset

In [3]:
def split_data_xy(data, timesteps):
    x = []
    y = []
    a = 2 * timesteps
    
    for i in tqdm(range(0, data.shape[0])):
        for j in range(0, 10 - a):
            temp = data[i, j:j+a,:,:,:]
            x_temp = temp[:timesteps, :, :, :]
            y_temp = temp[timesteps: , :, :, :]
            x_temp = x_temp.reshape(1, x_temp.shape[0], x_temp.shape[1], x_temp.shape[2], x_temp.shape[3])
            y_temp = y_temp.reshape(1, y_temp.shape[0], y_temp.shape[1], y_temp.shape[2], y_temp.shape[3])

            if(i == 0):
                x = x_temp
                y = y_temp
            else:
                x = np.concatenate((x,x_temp),axis=0)
                y = np.concatenate((y,y_temp),axis=0)
            
            del(x_temp)
            del(y_temp)
            del(temp)
    return x, y

In [None]:
dataset = create_dataset_from_raw()

 67%|██████▋   | 6/9 [00:18<00:12,  4.03s/it]

In [None]:
dataset.shape

In [None]:
timesteps = 2
dataset_x, dataset_y = split_data_xy(dataset, timesteps)
train_X, test_X, train_y, test_y = train_test_split(dataset_x,dataset_y,test_size=0.2, random_state = 42)

In [None]:
test_y.shape

In [None]:
model = Sequential()
model.add(ConvLSTM2D(filters=64, kernel_size=(7, 7),
             input_shape=(timesteps, 200, 200, 3),
             padding='same', activation=LeakyReLU(alpha=0.01), return_sequences=True))
model.add(BatchNormalization())
model.add(ConvLSTM2D(filters=64, kernel_size=(5, 5),
                    padding='same', activation=LeakyReLU(alpha=0.01), return_sequences=True))
model.add(BatchNormalization())
model.add(ConvLSTM2D(filters=64, kernel_size=(3, 3),
                    padding='same', activation=LeakyReLU(alpha=0.01), return_sequences=True))
model.add(Conv3D(filters=3, kernel_size=(3, 3, 3),
         activation='sigmoid',
         padding='same', data_format='channels_last'))

model.compile(loss='binary_crossentropy', optimizer='adam')
keras.utils.plot_model(model, to_file="model.png")
print(model.summary())


In [None]:
checkpoint = ModelCheckpoint("200_timesteps2.h5",      # 修改
                             monitor="val_loss",
                             mode="min",
                             save_best_only = True,
                             verbose=1)

earlystop = EarlyStopping(monitor = 'val_loss', 
                          min_delta = 0, 
                          patience = 5,
                          verbose = 2,
                          restore_best_weights = True)

reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                              factor = 0.2,
                              patience = 3,
                              verbose = 1)
                              #min_delta = 0.00001)

callbacks = [earlystop, checkpoint, reduce_lr]

In [None]:
# 训练模型
history = model.fit(train_X, train_y, epochs=100, batch_size=32, validation_data=(test_X, test_y), verbose=2, shuffle=False, callbacks = callbacks)

# 对损失进行可视化
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test') 
plt.title("Model Loss\n")
plt.xlabel('Epoch')
plt.ylabel("Loss")
plt.grid()
plt.show()



In [None]:
loss = np.array(history.history['loss'])
val_loss = np.array(history.history['val_loss'])

In [None]:
loss = loss.reshape(loss.shape[0], 1)
val_loss = val_loss.reshape(val_loss.shape[0], 1)

In [None]:
loss_all = np.hstack((loss, val_loss))

In [None]:
loss_all = pd.DataFrame(loss_all)
loss_all.to_csv(r'loss.csv', index=False)