This code is used to process the data set we found online. This data set is for women's tennis matches,we have also processed it.

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# 加载数据
data = pd.read_csv('path_to_your_data.csv')

# 选择特征和预处理
features = data[['p1_ace', 'p2_ace', 'p1_break_pt_won', 'p2_break_pt_won', 'p1_double_fault', 'p2_double_fault', 'p1_unf_err', 'p2_unf_err','PointServer','Speed_KMH','p1_net_pt','p2_net_pt','p1_distance_run','p2_distance_run']]
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# 构造每个set的胜者标签（简化逻辑）
# 假设set的胜者是在该set中得分更多的玩家
# 注意：这里的逻辑可能需要根据实际数据和规则进行调整
set_winner = (data['SetWinner'] == 1).astype(int)

# 划分数据集（这里我们简化，直接使用行号进行划分）
X_train, X_test = features_scaled[:4391], features_scaled[4391:]
y_train, y_test = set_winner[:4391], set_winner[4391:]

# 将NumPy数组转换为Tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

# 神经网络定义
class SetPredictor(nn.Module):
    def __init__(self):
        super(SetPredictor, self).__init__()
        self.fc1 = nn.Linear(14, 28)
        self.fc2 = nn.Linear(28, 14)
        self.fc3 = nn.Linear(14, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

# 实例化模型、定义损失函数和优化器
model = SetPredictor()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
num_epochs = 250
epoch_loss = []
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs.squeeze(), y_train_tensor)
    loss.backward()
    optimizer.step()
    epoch_loss.append(loss.item())


    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')
plt.rcParams['font.family'] = 'Times New Roman'
plt.figure(figsize=(10, 5))
plt.plot(epoch_loss, label='Training Loss')
plt.title('Loss Curve')
plt.xlabel('Epoch')
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(5, 5))
plt.boxplot(epoch_loss, vert=True, patch_artist=True)
plt.title('Box Plot of Losses')
plt.ylabel('Loss')
plt.show()

# 简单评估模型
model.eval()
with torch.no_grad():
    outputs0 = model(X_train_tensor).squeeze()
    outputs = model(X_test_tensor).squeeze()
    predictions = (outputs >= 0.5).float()
    accuracy = (predictions == y_test_tensor).float().mean()
    print(f'Accuracy: {accuracy:.4f}')
features_array = np.append(X_train_tensor.numpy(), X_test_tensor.numpy(), axis=0)
predictions_array = np.append(outputs0.numpy(), outputs.numpy(), axis=0)
export_data = np.hstack((features_array, predictions_array.reshape(-1, 1)))
features_original = scaler.inverse_transform(features_array)
export_df = pd.DataFrame(features_original, columns=['p1_ace', 'p2_ace', 'p1_break_pt_won', 'p2_break_pt_won', 'p1_double_fault', 'p2_double_fault', 'p1_unf_err', 'p2_unf_err','PointServer','Speed_KMH','p1_net_pt','p2_net_pt','p1_distance_run','p2_distance_run'])
export_df['Predicted_Value'] = predictions_array

# 导出DataFrame为CSV文件
export_df.to_csv('path_to', index=False)


# 模型保存路径
model_path = 'path to your set_predictor_model.pth'

# 保存模型的状态字典
torch.save(model.state_dict(), model_path)

print(f'Model parameters saved to {model_path}')

import torch
import torchvision.models as models

# 加载权重
model.load_state_dict(torch.load('path to your set_predictor_model.pth'))

model.eval()

print(model)

import matplotlib.pyplot as plt

weights1 = model.fc1.weight.data.numpy()
weights2 = model.fc2.weight.data.numpy()
weights3 = model.fc3.weight.data.numpy()

# 选择权重的一个子集进行可视化
weights_subset1 = weights1[:14]
weights_subset2 = weights2[:28]
weights_subset3 = weights3[:14]
print(weights_subset1)
print(weights_subset2)
print(weights_subset3)
plt.matshow(weights_subset1, cmap='viridis')  # 选择一个颜色映射
plt.colorbar() # 显示颜色条
plt.title('fc1 weight matrix')
plt.matshow(weights_subset2, cmap='viridis') 
plt.colorbar() 
plt.title('fc2 weight matrix')
plt.matshow(weights_subset3, cmap='viridis') 
plt.colorbar() 
plt.title('fc3 weight matrix')

# 显示图像
plt.show()

