In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder


def one_hot_encoding(arr):
    s = set()
    _sd = dict()
    for e in arr:
        s.add(e)

    sl = list(s)
    for i in range(len(sl)):
        encode = np.zeros(len(sl))
        encode[i] = 1
        _sd[sl[i]] = encode

    encoded_data = []
    for k in range(len(arr)):
        encoded_data.append(_sd[arr[k]])

    return np.array(encoded_data)


def data_preprocess(filepath):
    df = pd.read_excel(filepath)
    data = df.to_numpy()

    ip = data[:, 2]

    encoded_ip = one_hot_encoding(ip)

    # print(encoded_ip)

    data[:, 2] = encoded_ip.tolist()

    # print(data)
    expanded_data = []
    for row in data:
        expanded_data.append(np.concatenate([row[:2], row[2], row[3:]]).tolist())

    expanded_data = np.array(expanded_data)

    for i in range(len(expanded_data)):
        expanded_data[i][0] = expanded_data[i][0]

    expanded_data = expanded_data[:]

    training_x = expanded_data[:, 1:]
    training_y = expanded_data[:, 0]

    return training_x, training_y


# Data preprocessing
file_path = "D:\\model_fit\\training\\training_data\\output2.xlsx"
training_x, training_y = data_preprocess(file_path)

In [2]:
# Define the split points based on time
train_size = int(len(training_x) * 0.8)
val_size = int(len(training_x) * 0.1)

# Split the data
x_train = training_x[:train_size]
y_train = training_y[:train_size]

x_val = training_x[train_size + val_size :]
y_val = training_y[train_size + val_size :]


# test data from other file
x_test = training_x[: train_size + val_size]
y_test = training_y[: train_size + val_size]

In [3]:
from keras.callbacks import Callback, EarlyStopping # type: ignore

class LossHistory(Callback):
    def __init__(self, x_train, y_train, x_val, y_val):
        super().__init__()
        self.train_data = (x_train, y_train)
        self.validation_data = (x_val, y_val)
        self.losses = []
        self.val_losses = []
        self.train_errors = []
        self.val_errors = []

    def on_epoch_end(self, epoch, logs=None):
        self.losses.append(logs["loss"])
        self.val_losses.append(logs.get("val_loss"))
        self.train_errors.append(logs.get("mean_absolute_error"))
        self.val_errors.append(logs.get("val_mean_absolute_error"))

In [4]:
from keras.callbacks import TensorBoard
import os
import datetime

# log
log_dir = os.path.join("logs", "fit", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [8]:
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.optimizers import Adam, Adagrad
from keras.regularizers import l1, l2, l1_l2
import numpy as np

history = LossHistory(
    x_train, y_train, x_val, y_val
)

model = Sequential()
model.add(Input(shape=(x_train.shape[1],x_train.shape[2])))
model.add(Dense(128, activation="relu", kernel_regularizer=l2(0.01), activity_regularizer=l1_l2(0.03)))
model.add(Dense(64, activation="relu"))
model.add(Dense(32, activation="relu"))
model.add(Dense(32, activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(1, activation="linear"))

model.compile(
    loss="mean_squared_error",
    optimizer=Adagrad(learning_rate=0.001),
    metrics=["accuracy"],
)

history = model.fit(
    x_train,
    y_train,
    epochs=500,
    batch_size=64,
    validation_data=(x_val, y_val),
    verbose=1,
)

loss, accuracy = model.evaluate(x_test, y_test)

print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Epoch 1/500
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 32142286.0000 - val_accuracy: 0.0000e+00 - val_loss: 28084214.0000
Epoch 2/500
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 902us/step - accuracy: 0.0000e+00 - loss: 28304972.0000 - val_accuracy: 0.0000e+00 - val_loss: 27094480.0000
Epoch 3/500
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 944us/step - accuracy: 0.0000e+00 - loss: 27053106.0000 - val_accuracy: 0.0000e+00 - val_loss: 26235826.0000
Epoch 4/500
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 905us/step - accuracy: 0.0000e+00 - loss: 26236084.0000 - val_accuracy: 0.0000e+00 - val_loss: 25460938.0000
Epoch 5/500
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 902us/step - accuracy: 0.0000e+00 - loss: 25697624.0000 - val_accuracy: 0.0000e+00 - val_loss: 24748652.0000
Epoch 6/500
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [7]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# 使用模型对测试集进行预测
predicted_prob = model.predict(x_test)

# 计算均方误差和平均绝对误差
mse = mean_squared_error(y_test, predicted_prob)
mae = mean_absolute_error(y_test, predicted_prob)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)

for i in range(20):
    print(f"pred: {predicted_prob[i][0]} | real: {y_test[i]}")

[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 567us/step
Mean Squared Error: 16.789721615568766
Mean Absolute Error: 2.2292431772787946
pred: 0.3151185214519501 | real: 0.1475279331207275
pred: 0.6749817728996277 | real: 1.15568208694458
pred: 0.3702943027019501 | real: 0.7066545486450195
pred: 0.5089661478996277 | real: 1.612224340438843
pred: 0.9093567728996277 | real: 1.999531507492065
pred: 1.1603333950042725 | real: 5.916099071502686
pred: 1.0431458950042725 | real: 3.237768888473511
pred: 1.3273255825042725 | real: 8.06132698059082
pred: 1.0470521450042725 | real: 4.099016427993774
pred: 0.7472473978996277 | real: 1.507791042327881
pred: 0.4679505527019501 | real: 1.872663974761963
pred: 1.2238099575042725 | real: 6.798724412918091
pred: 1.4503724575042725 | real: 22.33745050430298
pred: 0.03728647902607918 | real: 0.1079268455505371
pred: 1.2843568325042725 | real: 16.69954204559326
pred: 1.0792787075042725 | real: 2.33093523979187
pred: 1.2863099575042725 | 