In [1]:
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, TensorDataset

# 导入自己的数据
df = pd.read_csv(r'C:\Users\admin\Desktop\Filtered_Data2.csv', encoding='utf-8', header=None)
df.columns = ['time','id','gl']

In [2]:
ID = df['id'].value_counts().index
pd.set_option('display.max_rows',100)
print(ID)

Index([263,  77, 193, 277, 229, 251, 245, 155, 111, 228,
       ...
       264, 167, 186, 249, 162,  39,  52, 266, 223, 289],
      dtype='int64', name='id', length=226)


In [3]:
all_data = []
for i in ID:
    #获取数据
    print("开始获取id为{}的被试的数据：---------------------------------".format(i))
    data = df[df['id']== i].copy()

    #将time列设成date数据类型
    data['time'] = pd.to_datetime(data['time'])
    
    #按照时间排序
    data = data.sort_values(by='time') 
    
    #提取月为单独的一列
    data['month'] = data['time'].dt.month
    
    #提取日为单独的一列
    data['day'] = data['time'].dt.day
    
    #提取小时为单独的一列
    data['hour'] = data['time'].dt.hour
    
    #提取分钟为单独的一列
    data['minute'] = data['time'].dt.minute
    
    # 提取周几（0表示星期一，1表示星期二，依此类推）
    data['weekday'] = data['time'].dt.dayofweek
    
    #以time为索引
    data = data.set_index('time')
                      
    data = data[['id', 'month','day','hour','minute','weekday','gl']].astype(np.float32)  # 修改数据类型
    all_data.append(data)

开始获取id为263的被试的数据：---------------------------------
开始获取id为77的被试的数据：---------------------------------
开始获取id为193的被试的数据：---------------------------------
开始获取id为277的被试的数据：---------------------------------
开始获取id为229的被试的数据：---------------------------------
开始获取id为251的被试的数据：---------------------------------
开始获取id为245的被试的数据：---------------------------------
开始获取id为155的被试的数据：---------------------------------
开始获取id为111的被试的数据：---------------------------------
开始获取id为228的被试的数据：---------------------------------
开始获取id为130的被试的数据：---------------------------------
开始获取id为232的被试的数据：---------------------------------
开始获取id为68的被试的数据：---------------------------------
开始获取id为23的被试的数据：---------------------------------
开始获取id为7的被试的数据：---------------------------------
开始获取id为164的被试的数据：---------------------------------
开始获取id为70的被试的数据：---------------------------------
开始获取id为102的被试的数据：---------------------------------
开始获取id为227的被试的数据：---------------------------------
开始获取id为95的被试的数据：---------------------

开始获取id为217的被试的数据：---------------------------------
开始获取id为141的被试的数据：---------------------------------
开始获取id为41的被试的数据：---------------------------------
开始获取id为50的被试的数据：---------------------------------
开始获取id为291的被试的数据：---------------------------------
开始获取id为200的被试的数据：---------------------------------
开始获取id为293的被试的数据：---------------------------------
开始获取id为57的被试的数据：---------------------------------
开始获取id为98的被试的数据：---------------------------------
开始获取id为257的被试的数据：---------------------------------
开始获取id为11的被试的数据：---------------------------------
开始获取id为96的被试的数据：---------------------------------
开始获取id为246的被试的数据：---------------------------------
开始获取id为131的被试的数据：---------------------------------
开始获取id为115的被试的数据：---------------------------------
开始获取id为148的被试的数据：---------------------------------
开始获取id为145的被试的数据：---------------------------------
开始获取id为89的被试的数据：---------------------------------
开始获取id为19的被试的数据：---------------------------------
开始获取id为235的被试的数据：----------------------

In [4]:
all_data = pd.concat(all_data, axis=0)

In [5]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))  
sel_col = ['id','month','day','hour','minute','weekday','gl']
for col in sel_col:                           
    all_data[col] = scaler.fit_transform(all_data[col].values.reshape(-1,1))

In [6]:
all_data['gl'] = all_data['gl'].replace(0, np.nan)

In [7]:
mask = np.isnan(all_data)
mask = mask.to_numpy()
all_data['gl'] = np.nan_to_num(all_data['gl'])
np_data = all_data.values

In [8]:
import torch
tensor_data = torch.from_numpy(np_data.reshape(-1,7)).to(torch.float32)
mask = torch.tensor(mask, dtype=torch.float32).view(-1,7)

In [9]:
from torch.utils.data import DataLoader, TensorDataset
trainSet = TensorDataset(tensor_data, mask)
batch_size = 64
trainLoader = DataLoader(dataset=trainSet, batch_size=batch_size, shuffle=True, drop_last=False)

In [10]:
for a,b in trainLoader:
    print(a.shape)
    print(b.shape)
    break

torch.Size([64, 7])
torch.Size([64, 7])


In [11]:
# GAIN模型 
import torch.nn as nn
class Generator(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(Generator, self).__init__()
        self.fc1 = nn.Linear(input_dim * 2, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, input_dim)

    def forward(self, input_data):
        x = torch.relu(self.fc1(input_data))
        x = torch.relu(self.fc2(x))
        generated_data = torch.sigmoid(self.fc3(x))
        return generated_data

class Discriminator(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, input_dim)

    def forward(self, input_data):
        x = torch.relu(self.fc1(input_data))
        x = torch.relu(self.fc2(x))
        validity = torch.sigmoid(self.fc3(x))
        return validity

In [12]:
import torch.optim as optim
# 模型参数
input_dim = 7  # 输入维度
hidden_dim = 64  # 隐层维度

# 创建生成器和判别器实例
generator = Generator(input_dim, hidden_dim)

discriminator = Discriminator(input_dim, hidden_dim)

# 设置优化器
gen_optimizer = optim.Adam(generator.parameters(), lr=0.001)
dis_optimizer = optim.Adam(discriminator.parameters(), lr=0.001)

In [13]:
# 定义损失函数（均方误差损失）
loss_function = nn.BCELoss()

# 创建均方误差损失函数
mse_loss = nn.MSELoss()

In [17]:
# 训练参数
num_epochs = 100
# 训练循环
for epoch in range(num_epochs):
    
    for data, m in trainLoader:
            
        x_hat = generator(torch.cat([data, 1-m], dim=1))
        
        x_imputed = data + m * x_hat
        
        torch.autograd.set_detect_anomaly(True)
        
        #表示输入数据被判别为真实数据的概率
        D_output = discriminator(x_imputed)
        
        # 计算判别器的损失
        discriminator_loss = loss_function(D_output, 1-m)

        torch.nn.utils.clip_grad_norm_(generator.parameters(), max_norm=1)
        torch.nn.utils.clip_grad_norm_(discriminator.parameters(), max_norm=1)

        # 更新判别器
        dis_optimizer.zero_grad()
        discriminator_loss.backward(retain_graph=True)
        dis_optimizer.step()

        # 计算生成损失
        G_loss_temp = -torch.mean( m * torch.log(D_output + 1e-8))
        med_original_data = data * (1 - m)
        med_generated_data = x_hat * (1 - m)
        Mse_loss = mse_loss(med_generated_data, med_original_data)

        generator_loss = Mse_loss * 100 + G_loss_temp

        # 更新生成器
        gen_optimizer.zero_grad()
        generator_loss.backward(retain_graph=True)
        gen_optimizer.step()

    # 打印每个epoch的损失
    print(f"Epoch [{epoch+1}/{num_epochs}] | Generator Loss: {generator_loss.item()} | Discriminator Loss: {discriminator_loss.item()}")
    torch.save(generator,r'./model/allen_{}.pth'.format(epoch + 1))

  File "D:\AnacondaEnv\envs\pytorch02\lib\runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "D:\AnacondaEnv\envs\pytorch02\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "D:\AnacondaEnv\envs\pytorch02\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "D:\AnacondaEnv\envs\pytorch02\lib\site-packages\traitlets\config\application.py", line 1043, in launch_instance
    app.start()
  File "D:\AnacondaEnv\envs\pytorch02\lib\site-packages\ipykernel\kernelapp.py", line 728, in start
    self.io_loop.start()
  File "D:\AnacondaEnv\envs\pytorch02\lib\site-packages\tornado\platform\asyncio.py", line 195, in start
    self.asyncio_loop.run_forever()
  File "D:\AnacondaEnv\envs\pytorch02\lib\asyncio\base_events.py", line 570, in run_forever
    self._run_once()
  File "D:\AnacondaEnv\envs\pytorch02\lib\asyncio\base_events.py", line 1859, in _run_once
    handle._run()
  File "D

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [64, 7]], which is output 0 of AsStridedBackward0, is at version 5; expected version 4 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!