## 序列数据加载

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [36]:


# 1. 加载数据
file_path = 'data/summerOly_athletes.csv'
data = pd.read_csv(file_path)

# 2. 数据预处理
# 过滤掉没有奖牌的数据
data = data[data['Medal'] != 'No medal']

# 获取独特的运动项目、年份和代表队
sports = sorted(data['Sport'].unique())  # 运动项目 S
years = sorted(data['Year'].unique())   # 奥运会年份 T
nocs = sorted(data['NOC'].unique())     # 国家代表队 N

# 构建映射表
sport_to_idx = {sport: i for i, sport in enumerate(sports)}
year_to_idx = {year: i for i, year in enumerate(years)}
noc_to_idx = {noc: i for i, noc in enumerate(nocs)}
print(sport_to_idx)
print(year_to_idx)

# 初始化结果张量
S, T, N = len(sports), len(years), len(nocs)
result_tensor = np.zeros((S, T, N), dtype=int)

# 3. 构建张量
for _, row in data.iterrows():
    sport_idx = sport_to_idx[row['Sport']]
    year_idx = year_to_idx[row['Year']]
    noc_idx = noc_to_idx[row['NOC']]
    result_tensor[sport_idx, year_idx, noc_idx] = 1  # 标记该国家在该年该运动有奖牌

# 4. 保存结果
output_path = 'data/olympic_medals.npy'
np.save(output_path, result_tensor)
print(f"结果张量已保存到 {output_path}")
print(f"张量形状: {result_tensor.shape}")  # 输出 S*T*N 的张量形状


{'3x3 Basketball': 0, 'Aeronautics': 1, 'Alpinism': 2, 'Archery': 3, 'Art Competitions': 4, 'Artistic Gymnastics': 5, 'Artistic Swimming': 6, 'Athletics': 7, 'Badminton': 8, 'Baseball': 9, 'Baseball/Softball': 10, 'Basketball': 11, 'Basque Pelota': 12, 'Beach Volleyball': 13, 'Boxing': 14, 'Breaking': 15, 'Canoe Slalom': 16, 'Canoe Sprint': 17, 'Canoeing': 18, 'Cricket': 19, 'Croquet': 20, 'Cycling': 21, 'Cycling BMX Freestyle': 22, 'Cycling BMX Racing': 23, 'Cycling Mountain Bike': 24, 'Cycling Road': 25, 'Cycling Track': 26, 'Diving': 27, 'Equestrian': 28, 'Equestrianism': 29, 'Fencing': 30, 'Figure Skating': 31, 'Football': 32, 'Golf': 33, 'Gymnastics': 34, 'Handball': 35, 'Hockey': 36, 'Ice Hockey': 37, 'Jeu De Paume': 38, 'Judo': 39, 'Karate': 40, 'Lacrosse': 41, 'Marathon Swimming': 42, 'Modern Pentathlon': 43, 'Motorboating': 44, 'Polo': 45, 'Racquets': 46, 'Rhythmic Gymnastics': 47, 'Roque': 48, 'Rowing': 49, 'Rugby': 50, 'Rugby Sevens': 51, 'Sailing': 52, 'Shooting': 53, 'Skat

In [43]:
t = result_tensor[8]
print(t)
print(sum(sum(t)))
# 筛选出sum(sum(t))的值小于10的运动
sport_to_idx_inv = {v: k for k, v in sport_to_idx.items()}
selected_sports = [sport_to_idx_inv[i] for i in range(S) if sum(sum(result_tensor[i])) < 10]
print(selected_sports)


[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
57
['Aeronautics', 'Alpinism', 'Artistic Swimming', 'Baseball/Softball', 'Basque Pelota', 'Breaking', 'Cricket', 'Croquet', 'Cycling BMX Racing', 'Cycling Mountain Bike', 'Figure Skating', 'Ice Hockey', 'Jeu De Paume', 'Lacrosse', 'Marathon Swimming', 'Motorboating', 'Racquets', 'Roque', 'Surfing', 'Trampoline Gymnastics']


## 建立模型 （针对某一项运动）

## TODO:
### 1. 对于每个 **年份-国家** 的二维 01 数据，使用 **运动员数量-国家** 的统计数据作为平滑01分布的参考，即对于每个国家，其运动员数量越多，越可能获得奖牌。对于每个年份，已经统计了 **运动员数量-国家** 的分布（国家为横坐标）

### 2. 现在建模的是每个运动的大类

In [41]:
# 加载某一项运动对应的数据作为时间序列数据用于训练
x = torch.tensor(np.load('data/olympic_medals.npy')[8], dtype=torch.float32)  


# reweight 



x_reweight = x


T, N = x.size()  # 时间序列长度和向量维度

# 定义权重优化模型
class WeightOptimizer(nn.Module):
    def __init__(self, T):
        super(WeightOptimizer, self).__init__()
        # 初始化全局权重序列为可训练参数
        self.weights = nn.Parameter(torch.ones(T))  # 长度为 T 的权重序列


    def forward(self, x):
        T, N = x.size()  # 时间序列长度和向量维度
        total_loss = 0  # 损失累计
        
        for k in range(1, T):  # 从时间步 2 开始预测
            # 通过 Softplus 确保非负，再通过 cumsum 累加确保单增
            weights = torch.cumsum(torch.nn.functional.softplus(self.weights), dim=0)  # (T,)
            # 截取前 k 个权重
            k_weights = weights[:k]
            # 根据权重加权计算预测值
            x_pred = torch.mean(k_weights.unsqueeze(1) * x[:k], dim=0)  # (N,)
            # 计算当前时间步的预测损失
            total_loss += torch.norm(x_pred - x[k]) ** 2


        # 返回平均损失
        return total_loss / T

# 实例化模型
model = WeightOptimizer(T)

# 定义优化器
optimizer = optim.Adam(model.parameters(), lr=0.01)

# 训练循环
num_epochs = 1000
for epoch in range(num_epochs):
    optimizer.zero_grad()  # 清除梯度
    loss = model(x)  # 计算损失
    loss.backward()  # 反向传播
    optimizer.step()  # 更新权重参数
    if (epoch + 1) % 50 == 0:
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}")

# 查看学习到的权重
raw_weights = model.weights
weights = torch.cumsum(torch.nn.functional.softplus(raw_weights), dim=0)
print("Learned weights:", weights)


Epoch 50/1000, Loss: 69.1623
Epoch 100/1000, Loss: 40.3209
Epoch 150/1000, Loss: 25.5777
Epoch 200/1000, Loss: 17.4934
Epoch 250/1000, Loss: 12.7328
Epoch 300/1000, Loss: 9.7497
Epoch 350/1000, Loss: 7.7812
Epoch 400/1000, Loss: 6.4257
Epoch 450/1000, Loss: 5.4589
Epoch 500/1000, Loss: 4.7490
Epoch 550/1000, Loss: 4.2149
Epoch 600/1000, Loss: 3.8047
Epoch 650/1000, Loss: 3.4841
Epoch 700/1000, Loss: 3.2296
Epoch 750/1000, Loss: 3.0250
Epoch 800/1000, Loss: 2.8585
Epoch 850/1000, Loss: 2.7218
Epoch 900/1000, Loss: 2.6085
Epoch 950/1000, Loss: 2.5139
Epoch 1000/1000, Loss: 2.4343
Learned weights: tensor([0.1714, 0.3427, 0.5141, 0.6854, 0.8568, 1.0281, 1.1995, 1.3709, 1.5422,
        1.7136, 1.8849, 2.0550, 2.2231, 2.3891, 2.5541, 2.7177, 2.8809, 3.0432,
        3.2060, 3.3697, 3.5376, 3.7059, 3.8812, 4.0602, 4.2395, 4.4204, 4.5992,
        4.7773, 4.9562, 5.1602, 6.4735], grad_fn=<CumsumBackward0>)
