In [1]:
import json
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.models import resnet50
from PIL import Image
from gensim.models import KeyedVectors
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
import numpy as np
from sklearn.metrics import mean_squared_error, accuracy_score
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image
from gensim.models import KeyedVectors

# Load pretrained ResNet
resnet = resnet50(pretrained=False)
resnet.load_state_dict(torch.load('/data1/dxw_data/llm/resnet/resnet50-19c8e357.pth'))
resnet = nn.Sequential(*list(resnet.children())[:-1])  # Remove the classification layer
resnet.eval()

word2vec_path = '/data1/dxw_data/llm/word2vec/GoogleNews-vectors-negative300.bin.gz'
word2vec_model  = KeyedVectors.load_word2vec_format(word2vec_path, binary=True)




In [2]:
# Preprocess transforms for images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Ensure the cover directory exists
cover_path = '/data1/dxw_data/llm/redbook/1000/cover'
os.makedirs(cover_path, exist_ok=True)

# Function to apply mask, save the merged image, and extract image features
def extract_imagemask_features(image_path, mask_path, cover_path):
    image = Image.open(image_path).convert('RGB')
    mask = Image.open(mask_path).convert('L')

    # Apply mask to the image
    image_np = np.array(image)
    mask_np = np.array(mask)
    masked_image_np = np.multiply(image_np, mask_np[:, :, None] / 255.0)
    masked_image = Image.fromarray(masked_image_np.astype(np.uint8))

    # Save the masked image
    masked_image_save_path = os.path.join(cover_path, os.path.basename(image_path))
    masked_image.save(masked_image_save_path)

    # Apply transformations
    masked_image = transform(masked_image).unsqueeze(0)

    # Extract features using ResNet
    with torch.no_grad():
        features = resnet(masked_image).squeeze().numpy()
    return features

# Function to extract image features
def extract_image_features(image_path):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)
    with torch.no_grad():
        features = resnet(image).squeeze().numpy()
    return features

# Function to extract text features
def extract_text_features(caption):
    words = caption.split()
    word_vectors = []
    for word in words:
        if word in word2vec_model:
            vector = word2vec_model[word]
            word_vectors.append(vector)
    if not word_vectors:
        return np.zeros(word2vec_model.vector_size)
    return np.mean(word_vectors, axis=0)

# Define the dataset class
class HotnessDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Define the LSTM model class
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[:, -1, :]  # Take the output of the last time step
        out = self.fc(lstm_out)
        out = self.sigmoid(out)
        return out

# Prepare dataset
image_features = []
text_features = []
mask_features = []
labels = []
times = []

# Load JSON file
with open('/data1/dxw_data/llm/MKT_data_mining/Multimodal/time_sequence/captions_with_hotness_and_time.json', 'r') as f:
    data = json.load(f)

for item in data:
    image_path = os.path.join('/data1/dxw_data/llm/redbook/1000/data2', item['image'])
    mask_path = os.path.join('/data1/dxw_data/llm/redbook/1000/processed2', item['image'])
    if os.path.exists(image_path) and os.path.exists(mask_path):
        mask_feat = extract_imagemask_features(image_path, mask_path, cover_path)
        img_feat = extract_image_features(image_path)
        txt_feat = extract_text_features(item['caption'])
        mask_features.append(mask_feat)
        image_features.append(img_feat)
        text_features.append(txt_feat)
        labels.append(item['hotness'])
        times.append(item['time'])

# Convert to numpy arrays
mask_features = np.array(mask_features)
image_features = np.array(image_features)
text_features = np.array(text_features)
labels = np.array(labels)
times = np.array(times)

In [3]:
# ----------- mask,image和text ----------- #
combined_features = np.hstack((mask_features, image_features))
combined_embeddings = np.hstack((combined_features, text_features))

In [4]:
# Encode time information
def encode_time(times, max_time):
    times = np.array(times)
    sin_time = np.sin(2 * np.pi * times / max_time)
    cos_time = np.cos(2 * np.pi * times / max_time)
    return np.vstack((sin_time, cos_time)).T

encoded_times = encode_time(times, max_time=1000)

def create_time_based_splits(features, labels, times):
    # Split based on time
    train_indices = np.where(times <= 800)[0]
    test_indices = np.where(times > 800)[0]
    
    return train_indices, test_indices

train_indices, test_indices = create_time_based_splits(combined_embeddings, labels, times)

# Add encoded time information as features
time_train = encoded_times[train_indices]
time_test = encoded_times[test_indices]

# Concatenate encoded time information with features
X_train = np.hstack((combined_embeddings[train_indices], time_train))
X_test = np.hstack((combined_embeddings[test_indices], time_test))
y_train = labels[train_indices]
y_test = labels[test_indices]

# Function to create sliding windows
def create_sliding_windows(X, y, window_size):
    features = []
    labels = []
    for i in range(len(X) - window_size):
        window = X[i:i + window_size]
        label = y[i + window_size - 1]
        features.append(window)
        labels.append(label)
    return np.array(features), np.array(labels)

window_size = 10

# Create sliding windows for training and testing
X_train, y_train = create_sliding_windows(X_train, y_train, window_size)
X_test, y_test = create_sliding_windows(X_test, y_test, window_size)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create datasets and dataloaders
train_dataset = HotnessDataset(X_train, y_train)
test_dataset = HotnessDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

input_dim = X_train.shape[2]
hidden_dim = 64
num_layers = 2
output_dim = 1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim).to(device)

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs.squeeze() > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy * 100:.2f}%')

Epoch [1/20], Loss: 0.5799
Epoch [2/20], Loss: 0.7525
Epoch [3/20], Loss: 0.6591
Epoch [4/20], Loss: 0.7004
Epoch [5/20], Loss: 0.7611
Epoch [6/20], Loss: 0.5413
Epoch [7/20], Loss: 0.7093
Epoch [8/20], Loss: 0.4921
Epoch [9/20], Loss: 0.7335
Epoch [10/20], Loss: 0.5302
Epoch [11/20], Loss: 0.2917
Epoch [12/20], Loss: 0.3811
Epoch [13/20], Loss: 0.4030
Epoch [14/20], Loss: 0.2300
Epoch [15/20], Loss: 0.3212
Epoch [16/20], Loss: 0.1982
Epoch [17/20], Loss: 0.2385
Epoch [18/20], Loss: 0.1866
Epoch [19/20], Loss: 0.1993
Epoch [20/20], Loss: 0.1034
Test Accuracy: 62.11%


In [4]:
# ----------- image和text ----------- #
combined_embeddings = np.hstack((image_features, text_features))

In [5]:
# Encode time information
def encode_time(times, max_time):
    times = np.array(times)
    sin_time = np.sin(2 * np.pi * times / max_time)
    cos_time = np.cos(2 * np.pi * times / max_time)
    return np.vstack((sin_time, cos_time)).T

encoded_times = encode_time(times, max_time=1000)

def create_time_based_splits(features, labels, times):
    # Split based on time
    train_indices = np.where(times <= 800)[0]
    test_indices = np.where(times > 800)[0]
    
    return train_indices, test_indices

train_indices, test_indices = create_time_based_splits(combined_embeddings, labels, times)

# Add encoded time information as features
time_train = encoded_times[train_indices]
time_test = encoded_times[test_indices]

# Concatenate encoded time information with features
X_train = np.hstack((combined_embeddings[train_indices], time_train))
X_test = np.hstack((combined_embeddings[test_indices], time_test))
y_train = labels[train_indices]
y_test = labels[test_indices]

# Function to create sliding windows
def create_sliding_windows(X, y, window_size):
    features = []
    labels = []
    for i in range(len(X) - window_size):
        window = X[i:i + window_size]
        label = y[i + window_size - 1]
        features.append(window)
        labels.append(label)
    return np.array(features), np.array(labels)

window_size = 10

# Create sliding windows for training and testing
X_train, y_train = create_sliding_windows(X_train, y_train, window_size)
X_test, y_test = create_sliding_windows(X_test, y_test, window_size)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create datasets and dataloaders
train_dataset = HotnessDataset(X_train, y_train)
test_dataset = HotnessDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

input_dim = X_train.shape[2]
hidden_dim = 64
num_layers = 2
output_dim = 1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim).to(device)

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs.squeeze() > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy * 100:.2f}%')

Epoch [1/20], Loss: 0.6930
Epoch [2/20], Loss: 0.6902
Epoch [3/20], Loss: 0.6962
Epoch [4/20], Loss: 0.6259
Epoch [5/20], Loss: 0.5584
Epoch [6/20], Loss: 0.6114
Epoch [7/20], Loss: 0.5218
Epoch [8/20], Loss: 0.5905
Epoch [9/20], Loss: 0.7218
Epoch [10/20], Loss: 0.6299
Epoch [11/20], Loss: 0.7531
Epoch [12/20], Loss: 0.4809
Epoch [13/20], Loss: 0.6481
Epoch [14/20], Loss: 0.3111
Epoch [15/20], Loss: 0.4301
Epoch [16/20], Loss: 0.3318
Epoch [17/20], Loss: 0.4152
Epoch [18/20], Loss: 0.2182
Epoch [19/20], Loss: 0.3316
Epoch [20/20], Loss: 0.3444
Test Accuracy: 62.11%


In [3]:
# ----------- mask和text ----------- #
combined_embeddings = np.hstack((mask_features, text_features))

In [5]:
# Encode time information
def encode_time(times, max_time):
    times = np.array(times)
    sin_time = np.sin(2 * np.pi * times / max_time)
    cos_time = np.cos(2 * np.pi * times / max_time)
    return np.vstack((sin_time, cos_time)).T

encoded_times = encode_time(times, max_time=1000)

def create_time_based_splits(features, labels, times):
    # Split based on time
    train_indices = np.where(times <= 800)[0]
    test_indices = np.where(times > 800)[0]
    
    return train_indices, test_indices

train_indices, test_indices = create_time_based_splits(combined_embeddings, labels, times)

# Add encoded time information as features
time_train = encoded_times[train_indices]
time_test = encoded_times[test_indices]

# Concatenate encoded time information with features
X_train = np.hstack((combined_embeddings[train_indices], time_train))
X_test = np.hstack((combined_embeddings[test_indices], time_test))
y_train = labels[train_indices]
y_test = labels[test_indices]

# Function to create sliding windows
def create_sliding_windows(X, y, window_size):
    features = []
    labels = []
    for i in range(len(X) - window_size):
        window = X[i:i + window_size]
        label = y[i + window_size - 1]
        features.append(window)
        labels.append(label)
    return np.array(features), np.array(labels)

window_size = 10

# Create sliding windows for training and testing
X_train, y_train = create_sliding_windows(X_train, y_train, window_size)
X_test, y_test = create_sliding_windows(X_test, y_test, window_size)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create datasets and dataloaders
train_dataset = HotnessDataset(X_train, y_train)
test_dataset = HotnessDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

input_dim = X_train.shape[2]
hidden_dim = 64
num_layers = 2
output_dim = 1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim).to(device)

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs.squeeze() > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy * 100:.2f}%')

Epoch [1/20], Loss: 0.6935
Epoch [2/20], Loss: 0.6683
Epoch [3/20], Loss: 0.6798
Epoch [4/20], Loss: 0.5586
Epoch [5/20], Loss: 0.6500
Epoch [6/20], Loss: 0.5156
Epoch [7/20], Loss: 0.5728
Epoch [8/20], Loss: 0.5757
Epoch [9/20], Loss: 0.3031
Epoch [10/20], Loss: 0.4675
Epoch [11/20], Loss: 0.3687
Epoch [12/20], Loss: 0.4503
Epoch [13/20], Loss: 0.3549
Epoch [14/20], Loss: 0.3321
Epoch [15/20], Loss: 0.1138
Epoch [16/20], Loss: 0.1789
Epoch [17/20], Loss: 0.1145
Epoch [18/20], Loss: 0.1607
Epoch [19/20], Loss: 0.0601
Epoch [20/20], Loss: 0.0538
Test Accuracy: 60.00%


In [3]:
# ----------- mask和image ----------- #
combined_embeddings = np.hstack((image_features, mask_features))

In [4]:
# Encode time information
def encode_time(times, max_time):
    times = np.array(times)
    sin_time = np.sin(2 * np.pi * times / max_time)
    cos_time = np.cos(2 * np.pi * times / max_time)
    return np.vstack((sin_time, cos_time)).T

encoded_times = encode_time(times, max_time=1000)

def create_time_based_splits(features, labels, times):
    # Split based on time
    train_indices = np.where(times <= 800)[0]
    test_indices = np.where(times > 800)[0]
    
    return train_indices, test_indices

train_indices, test_indices = create_time_based_splits(combined_embeddings, labels, times)

# Add encoded time information as features
time_train = encoded_times[train_indices]
time_test = encoded_times[test_indices]

# Concatenate encoded time information with features
X_train = np.hstack((combined_embeddings[train_indices], time_train))
X_test = np.hstack((combined_embeddings[test_indices], time_test))
y_train = labels[train_indices]
y_test = labels[test_indices]

# Function to create sliding windows
def create_sliding_windows(X, y, window_size):
    features = []
    labels = []
    for i in range(len(X) - window_size):
        window = X[i:i + window_size]
        label = y[i + window_size - 1]
        features.append(window)
        labels.append(label)
    return np.array(features), np.array(labels)

window_size = 10

# Create sliding windows for training and testing
X_train, y_train = create_sliding_windows(X_train, y_train, window_size)
X_test, y_test = create_sliding_windows(X_test, y_test, window_size)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create datasets and dataloaders
train_dataset = HotnessDataset(X_train, y_train)
test_dataset = HotnessDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

input_dim = X_train.shape[2]
hidden_dim = 64
num_layers = 2
output_dim = 1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim).to(device)

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs.squeeze() > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy * 100:.2f}%')

Epoch [1/20], Loss: 0.6765
Epoch [2/20], Loss: 0.7166
Epoch [3/20], Loss: 0.6372
Epoch [4/20], Loss: 0.5522
Epoch [5/20], Loss: 0.5397
Epoch [6/20], Loss: 0.5192
Epoch [7/20], Loss: 0.4338
Epoch [8/20], Loss: 0.4533
Epoch [9/20], Loss: 0.5280
Epoch [10/20], Loss: 0.3811
Epoch [11/20], Loss: 0.5274
Epoch [12/20], Loss: 0.5125
Epoch [13/20], Loss: 0.2537
Epoch [14/20], Loss: 0.3334
Epoch [15/20], Loss: 0.1623
Epoch [16/20], Loss: 0.5161
Epoch [17/20], Loss: 0.1528
Epoch [18/20], Loss: 0.1303
Epoch [19/20], Loss: 0.1737
Epoch [20/20], Loss: 0.0664
Test Accuracy: 61.58%


In [3]:
# ----------- 纯mask ----------- #
combined_embeddings = mask_features

In [4]:
# Encode time information
def encode_time(times, max_time):
    times = np.array(times)
    sin_time = np.sin(2 * np.pi * times / max_time)
    cos_time = np.cos(2 * np.pi * times / max_time)
    return np.vstack((sin_time, cos_time)).T

encoded_times = encode_time(times, max_time=1000)

def create_time_based_splits(features, labels, times):
    # Split based on time
    train_indices = np.where(times <= 800)[0]
    test_indices = np.where(times > 800)[0]
    
    return train_indices, test_indices

train_indices, test_indices = create_time_based_splits(combined_embeddings, labels, times)

# Add encoded time information as features
time_train = encoded_times[train_indices]
time_test = encoded_times[test_indices]

# Concatenate encoded time information with features
X_train = np.hstack((combined_embeddings[train_indices], time_train))
X_test = np.hstack((combined_embeddings[test_indices], time_test))
y_train = labels[train_indices]
y_test = labels[test_indices]

# Function to create sliding windows
def create_sliding_windows(X, y, window_size):
    features = []
    labels = []
    for i in range(len(X) - window_size):
        window = X[i:i + window_size]
        label = y[i + window_size - 1]
        features.append(window)
        labels.append(label)
    return np.array(features), np.array(labels)

window_size = 10

# Create sliding windows for training and testing
X_train, y_train = create_sliding_windows(X_train, y_train, window_size)
X_test, y_test = create_sliding_windows(X_test, y_test, window_size)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create datasets and dataloaders
train_dataset = HotnessDataset(X_train, y_train)
test_dataset = HotnessDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

input_dim = X_train.shape[2]
hidden_dim = 64
num_layers = 2
output_dim = 1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim).to(device)

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs.squeeze() > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy * 100:.2f}%')

Epoch [1/20], Loss: 0.6913
Epoch [2/20], Loss: 0.6699
Epoch [3/20], Loss: 0.6093
Epoch [4/20], Loss: 0.6555
Epoch [5/20], Loss: 0.6268
Epoch [6/20], Loss: 0.5277
Epoch [7/20], Loss: 0.5549
Epoch [8/20], Loss: 0.5526
Epoch [9/20], Loss: 0.4514
Epoch [10/20], Loss: 0.4663
Epoch [11/20], Loss: 0.2434
Epoch [12/20], Loss: 0.3563
Epoch [13/20], Loss: 0.1781
Epoch [14/20], Loss: 0.2557
Epoch [15/20], Loss: 0.1805
Epoch [16/20], Loss: 0.2709
Epoch [17/20], Loss: 0.1905
Epoch [18/20], Loss: 0.1633
Epoch [19/20], Loss: 0.1506
Epoch [20/20], Loss: 0.1101
Test Accuracy: 60.00%


In [3]:
# ----------- 纯image ----------- #
combined_embeddings = image_features

In [4]:
# Encode time information
def encode_time(times, max_time):
    times = np.array(times)
    sin_time = np.sin(2 * np.pi * times / max_time)
    cos_time = np.cos(2 * np.pi * times / max_time)
    return np.vstack((sin_time, cos_time)).T

encoded_times = encode_time(times, max_time=1000)

def create_time_based_splits(features, labels, times):
    # Split based on time
    train_indices = np.where(times <= 800)[0]
    test_indices = np.where(times > 800)[0]
    
    return train_indices, test_indices

train_indices, test_indices = create_time_based_splits(combined_embeddings, labels, times)

# Add encoded time information as features
time_train = encoded_times[train_indices]
time_test = encoded_times[test_indices]

# Concatenate encoded time information with features
X_train = np.hstack((combined_embeddings[train_indices], time_train))
X_test = np.hstack((combined_embeddings[test_indices], time_test))
y_train = labels[train_indices]
y_test = labels[test_indices]

# Function to create sliding windows
def create_sliding_windows(X, y, window_size):
    features = []
    labels = []
    for i in range(len(X) - window_size):
        window = X[i:i + window_size]
        label = y[i + window_size - 1]
        features.append(window)
        labels.append(label)
    return np.array(features), np.array(labels)

window_size = 10

# Create sliding windows for training and testing
X_train, y_train = create_sliding_windows(X_train, y_train, window_size)
X_test, y_test = create_sliding_windows(X_test, y_test, window_size)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create datasets and dataloaders
train_dataset = HotnessDataset(X_train, y_train)
test_dataset = HotnessDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

input_dim = X_train.shape[2]
hidden_dim = 64
num_layers = 2
output_dim = 1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim).to(device)

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs.squeeze() > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy * 100:.2f}%')

Epoch [1/20], Loss: 0.7702
Epoch [2/20], Loss: 0.6695
Epoch [3/20], Loss: 0.6739
Epoch [4/20], Loss: 0.6483
Epoch [5/20], Loss: 0.5888
Epoch [6/20], Loss: 0.5445
Epoch [7/20], Loss: 0.4699
Epoch [8/20], Loss: 0.5057
Epoch [9/20], Loss: 0.5140
Epoch [10/20], Loss: 0.6774
Epoch [11/20], Loss: 0.5432
Epoch [12/20], Loss: 0.4086
Epoch [13/20], Loss: 0.3639
Epoch [14/20], Loss: 0.4115
Epoch [15/20], Loss: 0.4007
Epoch [16/20], Loss: 0.2967
Epoch [17/20], Loss: 0.2177
Epoch [18/20], Loss: 0.3002
Epoch [19/20], Loss: 0.3622
Epoch [20/20], Loss: 0.1498
Test Accuracy: 63.16%


In [4]:
# ----------- 纯text ----------- #
combined_embeddings = text_features

In [5]:
# Encode time information
def encode_time(times, max_time):
    times = np.array(times)
    sin_time = np.sin(2 * np.pi * times / max_time)
    cos_time = np.cos(2 * np.pi * times / max_time)
    return np.vstack((sin_time, cos_time)).T

encoded_times = encode_time(times, max_time=1000)

def create_time_based_splits(features, labels, times):
    # Split based on time
    train_indices = np.where(times <= 800)[0]
    test_indices = np.where(times > 800)[0]
    
    return train_indices, test_indices

train_indices, test_indices = create_time_based_splits(combined_embeddings, labels, times)

# Add encoded time information as features
time_train = encoded_times[train_indices]
time_test = encoded_times[test_indices]

# Concatenate encoded time information with features
X_train = np.hstack((combined_embeddings[train_indices], time_train))
X_test = np.hstack((combined_embeddings[test_indices], time_test))
y_train = labels[train_indices]
y_test = labels[test_indices]

# Function to create sliding windows
def create_sliding_windows(X, y, window_size):
    features = []
    labels = []
    for i in range(len(X) - window_size):
        window = X[i:i + window_size]
        label = y[i + window_size - 1]
        features.append(window)
        labels.append(label)
    return np.array(features), np.array(labels)

window_size = 10

# Create sliding windows for training and testing
X_train, y_train = create_sliding_windows(X_train, y_train, window_size)
X_test, y_test = create_sliding_windows(X_test, y_test, window_size)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create datasets and dataloaders
train_dataset = HotnessDataset(X_train, y_train)
test_dataset = HotnessDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

input_dim = X_train.shape[2]
hidden_dim = 64
num_layers = 2
output_dim = 1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim).to(device)

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs.squeeze() > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy * 100:.2f}%')

Epoch [1/20], Loss: 0.6767
Epoch [2/20], Loss: 0.6688
Epoch [3/20], Loss: 0.6668
Epoch [4/20], Loss: 0.6080
Epoch [5/20], Loss: 0.6730
Epoch [6/20], Loss: 0.6498
Epoch [7/20], Loss: 0.7265
Epoch [8/20], Loss: 0.6195
Epoch [9/20], Loss: 0.6387
Epoch [10/20], Loss: 0.5712
Epoch [11/20], Loss: 0.5360
Epoch [12/20], Loss: 0.5355
Epoch [13/20], Loss: 0.5676
Epoch [14/20], Loss: 0.4224
Epoch [15/20], Loss: 0.5931
Epoch [16/20], Loss: 0.4248
Epoch [17/20], Loss: 0.4004
Epoch [18/20], Loss: 0.4173
Epoch [19/20], Loss: 0.3380
Epoch [20/20], Loss: 0.1998
Test Accuracy: 66.84%
