In [27]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import torch
import torch.nn as nn

# 加载CSV文件
data = pd.read_csv('/Users/curryyang/code/2024_MCM_C_UESTC/processed_datasets/male_processed.csv')  # 确保这里的路径是正确的

# 提取指定行的数据
rows_to_use = list(range(1, 302)) + list(range(3790, 4015)) + list(range(5709, 5898)) + list(range(6591, 6750))
train_data = data.iloc[rows_to_use]

# 测试集 - 最后一场比赛的数据
test_data = data.iloc[6952:7286]

# 预处理p1_score，将"AD"替换为40
train_data['p1_score'] = train_data['p1_score'].replace('AD', 40).astype(int)
test_data['p1_score'] = test_data['p1_score'].replace('AD', 40).astype(int)

# 提取p1_score作为训练和测试数据
train_scores = train_data['p1_score'].values
test_scores = test_data['p1_score'].values

# 数据归一化
scaler = MinMaxScaler(feature_range=(-1, 1))
train_scores_normalized = scaler.fit_transform(train_scores.reshape(-1, 1))
test_scores_normalized = scaler.transform(test_scores.reshape(-1, 1))

# 转换为PyTorch张量
train_scores_normalized = torch.FloatTensor(train_scores_normalized).view(-1)
test_scores_normalized = torch.FloatTensor(test_scores_normalized).view(-1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['p1_score'] = train_data['p1_score'].replace('AD', 40).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['p1_score'] = test_data['p1_score'].replace('AD', 40).astype(int)


In [28]:
class CNNLSTMModel(nn.Module):
    def __init__(self, hidden_layer_size=50):
        super(CNNLSTMModel, self).__init__()
        self.hidden_layer_size = hidden_layer_size  # 添加hidden_layer_size属性
        
        # 定义模型层
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
        self.lstm = nn.LSTM(input_size=64, hidden_size=self.hidden_layer_size, num_layers=1, batch_first=True)
        self.linear = nn.Linear(in_features=self.hidden_layer_size, out_features=1)

    def forward(self, x):
        #x = x.unsqueeze(1)  # 添加channel维度
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = x.transpose(1, 2)  # 调整维度以适应LSTM的输入要求
        lstm_out, _ = self.lstm(x)
        predictions = self.linear(lstm_out[:, -1, :])  # 只使用序列的最后一个输出
        return predictions


In [29]:
# 定义训练数据加载器
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        inout_seq.append((train_seq, train_label))
    return inout_seq

# 设定时间窗口大小
train_window = 5
test_window = 5

# 创建训练序列
train_inout_seq = create_inout_sequences(train_scores_normalized, train_window)
test_inout_seq = create_inout_sequences(test_scores_normalized, test_window)



In [30]:
def train_model(model, train_inout_seq, epochs=10, lr=0.001):
    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    losses = []  # 用于收集每个epoch的损失值

    for epoch in range(epochs):
        total_loss = 0
        for seq, labels in train_inout_seq:
            optimizer.zero_grad()
            seq = seq.view(-1, 1, train_window)
            y_pred = model(seq)
            single_loss = loss_function(y_pred, labels)
            single_loss.backward()
            optimizer.step()
            total_loss += single_loss.item()
        
        avg_loss = total_loss / len(train_inout_seq)
        losses.append(avg_loss)
        print(f'epoch: {epoch+1} loss: {avg_loss:.8f}')
    
    return losses


In [33]:
import matplotlib.pyplot as plt

# 实例化并训练模型
model = CNNLSTMModel()
losses = train_model(model, train_inout_seq, epochs=10, lr=0.001)

# 绘制训练损失曲线
plt.figure(figsize=(10, 6))
plt.plot(range(1, len(losses)+1), losses, marker='o', label='Training Loss')
plt.title('Epoch vs Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


ModuleNotFoundError: No module named 'matplotlib.pyplot'

In [None]:
model.eval()
with torch.no_grad():
    for seq, true_val in test_inout_seq:  # 假设test_inout_seq已准备好，与train_inout_seq类似
        seq = seq.view(-1, 1, train_window)  # 调整形状以符合模型的输入要求
        prediction = model(seq)
        predicted_score = scaler.inverse_transform(prediction.numpy().reshape(-1, 1))  # 确保predicted_score是2D的
        true_score = scaler.inverse_transform(true_val.numpy().reshape(-1, 1))  # 将true_val调整为2D数组
        print(f'Predicted Score: {predicted_score[0][0]}, Actual Score: {true_score[0][0]}')


Predicted Score: 23.574504852294922, Actual Score: 40.0
Predicted Score: 24.31423568725586, Actual Score: 40.0
Predicted Score: 24.47286605834961, Actual Score: 40.0
Predicted Score: 24.523101806640625, Actual Score: 0.0
Predicted Score: 10.070013999938965, Actual Score: 0.0
Predicted Score: 10.304067611694336, Actual Score: 0.0
Predicted Score: 3.77752423286438, Actual Score: 0.0
Predicted Score: 3.5113728046417236, Actual Score: 15.0
Predicted Score: 18.247617721557617, Actual Score: 30.0
Predicted Score: 30.31653594970703, Actual Score: 0.0
Predicted Score: 10.764852523803711, Actual Score: 0.0
Predicted Score: 6.613131523132324, Actual Score: 15.0
Predicted Score: 22.730182647705078, Actual Score: 30.0
Predicted Score: 31.458425521850586, Actual Score: 30.0
Predicted Score: 21.537639617919922, Actual Score: 30.0
Predicted Score: 22.411388397216797, Actual Score: 0.0
Predicted Score: 10.56336498260498, Actual Score: 0.0
Predicted Score: 9.804952621459961, Actual Score: 15.0
Predicte