In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# 读取数据
data = pd.read_csv('student_data.csv', encoding="utf-8")

X = data[['Study_Hours', 'Study_Regularity', 'Study_Method', 'Student_Background', 'Study_Start', 'Exam_Score']]
y = data['Math_Score']

# 标签编码
label_encoders = {}
categorical_features = ['Study_Regularity', 'Study_Method', 'Student_Background', 'Study_Start']
for feature in categorical_features:
    le = LabelEncoder()
    X[feature] = le.fit_transform(X[feature])
    label_encoders[feature] = le

# 特征标准化
scaler = StandardScaler()
X[['Study_Hours', 'Exam_Score']] = scaler.fit_transform(X[['Study_Hours', 'Exam_Score']])

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 创建深度学习模型
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    layers.BatchNormalization(),
    layers.Dense(64, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(32, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(1)  # 输出层
])

# 编译模型
optimizer = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mean_squared_error')

# 训练模型
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=2, validation_split=0.1)

# 评估模型
mse = model.evaluate(X_test, y_test, verbose=0)
print(f"均方误差(MSE): {mse}")

# 保存模型
model.save('model.h5')




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[feature] = le.fit_transform(X[feature])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[['Study_Hours', 'Exam_Score']] = scaler.fit_transform(X[['Study_Hours', 'Exam_Score']])


Epoch 1/100
68/68 - 2s - loss: 44694.0273 - val_loss: 44422.6250 - 2s/epoch - 24ms/step
Epoch 2/100
68/68 - 0s - loss: 44150.9023 - val_loss: 43151.4922 - 141ms/epoch - 2ms/step
Epoch 3/100
68/68 - 0s - loss: 43416.9609 - val_loss: 41983.3984 - 131ms/epoch - 2ms/step
Epoch 4/100
68/68 - 0s - loss: 42463.3125 - val_loss: 41243.1172 - 134ms/epoch - 2ms/step
Epoch 5/100
68/68 - 0s - loss: 41270.6172 - val_loss: 40103.7891 - 135ms/epoch - 2ms/step
Epoch 6/100
68/68 - 0s - loss: 39850.4570 - val_loss: 40009.6523 - 146ms/epoch - 2ms/step
Epoch 7/100
68/68 - 0s - loss: 38179.9648 - val_loss: 38419.5703 - 138ms/epoch - 2ms/step
Epoch 8/100
68/68 - 0s - loss: 36289.0469 - val_loss: 35897.8398 - 134ms/epoch - 2ms/step
Epoch 9/100
68/68 - 0s - loss: 34194.3516 - val_loss: 33771.1680 - 135ms/epoch - 2ms/step
Epoch 10/100
68/68 - 0s - loss: 31966.4414 - val_loss: 31340.3555 - 140ms/epoch - 2ms/step
Epoch 11/100
68/68 - 0s - loss: 29634.6855 - val_loss: 28815.6152 - 138ms/epoch - 2ms/step
Epoch 12/1

测试集验证

In [32]:
import pandas as pd
import h5py
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
import tensorflow as tf
from tensorflow import keras

# 加载模型
file=h5py.File('model.h5')
model=keras.models.load_model(file)

test_data = pd.DataFrame({
    'Study_Hours': [8, 8],'Study_Regularity': [1, 1],'Study_Method': [1, 1],'Student_Background': [1, 1],'Study_Start': [1, 0],'Exam_Score': [221, 225]
})

# 对分类特征进行编码
label_encoders = {}
categorical_features = ['Study_Regularity', 'Study_Method', 'Student_Background', 'Study_Start']
for feature in categorical_features:
    le = LabelEncoder()
    test_data[feature] = le.fit_transform(test_data[feature])
    label_encoders[feature] = le

# 数据标准化
scaler = StandardScaler()
test_data[['Study_Hours', 'Exam_Score']] = scaler.fit_transform(test_data[['Study_Hours', 'Exam_Score']])

# 进行预测
predictions = model.predict(test_data)
all_predictions = []

# 进行预测并存储结果
for i, prediction in enumerate(predictions):
    # 将预测值加上10
    adjusted_prediction = prediction[0] + 10
    all_predictions.append(adjusted_prediction)

# 计算平均值
average_prediction = np.mean(all_predictions)

# 打印平均值
print(f"两个测试样本的平均预测分数为: {average_prediction}")

三个测试样本的平均预测分数为: 223.8022918701172
