In [34]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [1]:
import torch.nn as nn

class MultiLayerTMPredictor(nn.Module):
    def __init__(self, input_size=1280 * 3):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 1024),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(16, 1)
        )

    def forward(self, x):
        return self.model(x)


def create_dataset(features, labels, batch_size=32):
    dataset = torch.utils.data.TensorDataset(features, labels)
    dataloader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=True
    )
    return dataloader


def train_model(model, optimizer, train_loader, loss_fn, epochs=100):
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        for batch_features, batch_labels in train_loader:
            batch_features = batch_features.to(device)
            batch_labels = batch_labels.to(device)

            optimizer.zero_grad()
            outputs = model(batch_features)
            loss = loss_fn(outputs, batch_labels)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        avg_loss = epoch_loss / len(train_loader)
        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Average Loss: {avg_loss:.4f}")


In [57]:
# Load the features and labels
import pandas as pd

train_data = pd.read_csv("../data/train_features_1000.csv")
train_label = train_data["tm"].values
train_features = train_data["features"].values
train_features = [eval(i) for i in train_features]

train_features = torch.tensor(train_features, dtype=torch.float32)
train_label = torch.tensor(train_label, dtype=torch.float32)

print(train_features.shape, train_label.shape)

train_loader = create_dataset(train_features, train_label)

# Create the model
model = MultiLayerTMPredictor(input_size=480*2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

# Train the model
train_model(model, optimizer, train_loader, loss_fn, epochs=100)


torch.Size([1000, 960]) torch.Size([1000])


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 0, Average Loss: 1405.2165
Epoch 10, Average Loss: 237.7694
Epoch 20, Average Loss: 218.2690
Epoch 30, Average Loss: 236.2615
Epoch 40, Average Loss: 231.2914
Epoch 50, Average Loss: 217.1115
Epoch 60, Average Loss: 210.2934
Epoch 70, Average Loss: 245.4939
Epoch 80, Average Loss: 213.8843
Epoch 90, Average Loss: 201.7368


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
import torch

# 加载特征和标签数据
train_data = pd.read_csv("../data/train_features_1000.csv")
train_label = train_data["tm"].values
train_features = train_data["features"].values
train_features = [eval(i) for i in train_features]

train_features = torch.tensor(train_features, dtype=torch.float32)
train_label = torch.tensor(train_label, dtype=torch.float32)
y = pd.read_csv("../data/train.csv")["tm"].values

# 定义模型
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(alpha=1.0),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42),
    "Support Vector Regression": SVR(kernel="rbf", C=1.0, epsilon=0.1)
}

# val
for name, model in models.items():
    scores = cross_val_score(model, train_features, train_label, cv=5, scoring="neg_mean_squared_error")
    print(f"{name} CV MSE: {scores.mean():.4f}")

Linear Regression CV MSE: -11788.2109
Ridge Regression CV MSE: -137.5139
Random Forest CV MSE: -72.0502
Gradient Boosting CV MSE: -74.7600
Support Vector Regression CV MSE: -108.2614




Neural Network CV MSE: -118.7773


