In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import feather
from tqdm import tqdm
from visdom import Visdom
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
df = feather.read_dataframe('feature/final_feature.feather')

In [3]:
# 提取特征和标签
X = df.drop(['user_id', 'item_id', 'pred_date','label'], axis=1).values
y = df['label'].values


# 数据预处理
scaler = StandardScaler()
X = scaler.fit_transform(X)


# 处理样本不平衡问题，这里使用 SMOTE 进行过采样
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)


# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)


In [4]:
# 转换为 PyTorch 张量
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# 传入GPU
X_train_tensor = X_train_tensor.to(device)
y_train_tensor = y_train_tensor.to(device)
X_test_tensor = X_test_tensor.to(device)
y_test_tensor = y_test_tensor.to(device)

In [5]:
# 定义神经网络模型
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.layer1 = nn.Linear(input_size, 128)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.sigmoid(self.layer3(x))
        return x

In [6]:
# 初始化模型、损失函数和优化器
input_size = X_train.shape[1]
model = NeuralNetwork(input_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 传入GPU
model.to(device)
criterion.to(device)

BCELoss()

In [7]:
# 可视化
vis = Visdom()
win = None
losses = []

# 训练模型
num_epochs = 50
batch_size = 128
for epoch in tqdm(range(num_epochs)):
    running_loss = 0.0
    for i in range(0, len(X_train_tensor), batch_size):
        inputs = X_train_tensor[i:i+batch_size]
        labels = y_train_tensor[i:i+batch_size]

        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    losses.append(running_loss)
    if win is None:
        win = vis.line(X=np.array([epoch]), Y=np.array([running_loss]), opts=dict(title='Training Loss', xlabel='Epoch', ylabel='Loss'))
    else:
        vis.line(X=np.array([epoch]), Y=np.array([running_loss]), win=win, update='append')

100%|██████████| 50/50 [3:02:20<00:00, 218.80s/it]  


In [10]:
# 评估模型（仅评估正值），计算F1值
with torch.no_grad():
    outputs = model(X_test_tensor)
    predicted = (outputs >= 0.5).float()
    TP = (predicted * y_test_tensor).sum().item()
    print(f'True Positive: {TP}')
    precision = TP / predicted.sum().item()
    recall = TP / y_test_tensor.sum().item()
    f1 = 2 * precision * recall / (precision + recall)
    print(f'F1 Score: {f1:.4f}')

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.76 GiB (GPU 0; 16.00 GiB total capacity; 3.34 GiB already allocated; 11.49 GiB free; 3.34 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [11]:
from datetime import datetime
# 保存模型
torch.save(model.state_dict(), 'model/NN/model_{:}_epoch50.pth'.format(datetime.now().strftime('%m%d%H%M')))