# Building FF Scoring Model

In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from tqdm import tqdm

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

Using device: cuda


# Data Loading

In [3]:
caption_path = "./caption_data"
relevant_columns = ["cap_feat", "img_feat", "mean"]
df_list = []
for pkl in os.listdir(caption_path):
    pkl_path = os.path.join(caption_path, pkl)
    img_df = pd.read_pickle(pkl_path)[relevant_columns]
    df_list.append(img_df)


In [4]:
data_df = pd.concat(df_list)
# print(data_df.head(5))
# print(len(data_df.index))

In [5]:
# Assuming df is your DataFrame

# Combine the embeddings into a single feature array
# Note: This assumes 'cap_feat' and 'img_feat' are each a list or array of 512 floats.
X = np.hstack([np.vstack(data_df['img_feat'].values), np.vstack(data_df['cap_feat'].values)])

# Your target variable
y = data_df['mean'].values

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
class HumorRatingNN(nn.Module):
    def __init__(self):
        super(HumorRatingNN, self).__init__()
        
        self.fc1 = nn.Linear(1024, 512) # 1024 inputs (512 from image + 512 from caption), to 512 outputs
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(512, 256)  # 512 inputs to 256 outputs
        self.fc3 = nn.Linear(256, 1)    # 256 inputs to 1 output (your mean humor rating)
        
        self.to(DEVICE)
        
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)  # No activation here, as we're predicting a continuous value
        return x

In [7]:
# Convert data to PyTorch tensors
train_tensor_x = torch.Tensor(X_train).to(DEVICE)
train_tensor_y = torch.Tensor(y_train).to(DEVICE)
test_tensor_x = torch.Tensor(X_test).to(DEVICE)
test_tensor_y = torch.Tensor(y_test).to(DEVICE)

# Create TensorDatasets
train_data = TensorDataset(train_tensor_x, train_tensor_y)
test_data = TensorDataset(test_tensor_x, test_tensor_y)

# Create data loaders
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

# Initialize the model, loss function, and optimizer
model = HumorRatingNN()
criterion = nn.MSELoss()  # Mean Squared Error Loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [8]:
# Training loop
num_epochs = 10
for epoch in tqdm(range(num_epochs)):
    model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

 10%|█         | 1/10 [00:37<05:33, 37.08s/it]

Epoch 1, Loss: 0.02077396772801876


 20%|██        | 2/10 [01:04<04:12, 31.62s/it]

Epoch 2, Loss: 0.022775467485189438


 30%|███       | 3/10 [01:32<03:27, 29.68s/it]

Epoch 3, Loss: 0.011568046174943447


 40%|████      | 4/10 [01:59<02:52, 28.71s/it]

Epoch 4, Loss: 0.02611869014799595


 50%|█████     | 5/10 [02:26<02:20, 28.00s/it]

Epoch 5, Loss: 0.012907912023365498


 60%|██████    | 6/10 [02:53<01:50, 27.63s/it]

Epoch 6, Loss: 0.018103918060660362


 70%|███████   | 7/10 [03:20<01:22, 27.40s/it]

Epoch 7, Loss: 0.010551112703979015


 80%|████████  | 8/10 [03:47<00:54, 27.28s/it]

Epoch 8, Loss: 0.01554368156939745


 90%|█████████ | 9/10 [04:13<00:27, 27.06s/it]

Epoch 9, Loss: 0.015414169989526272


100%|██████████| 10/10 [04:40<00:00, 28.05s/it]

Epoch 10, Loss: 0.008528538979589939





In [9]:
# Function to compute the root mean squared error
def rmse(predictions, targets):
    return torch.sqrt(((predictions - targets) ** 2).mean())

# Model evaluation
model.eval()  # Set the model to evaluation mode
test_loss = 0
total_rmse = 0

with torch.no_grad():  # Inference mode, gradient is not computed
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        test_loss += loss.item()  # Sum up batch loss
        total_rmse += rmse(outputs.squeeze(), targets).item()

# Calculate average losses
test_loss /= len(test_loader)
total_rmse /= len(test_loader)

print(f'Test Loss: {test_loss:.4f}')
print(f'Test RMSE: {total_rmse:.4f}')

Test Loss: 0.0132
Test RMSE: 0.1137


In [42]:
row = data_df.sample().iloc[0]
img_feat = row['img_feat']
cap_feat = row['cap_feat']
mean = row['mean']

model_in = torch.Tensor(np.concatenate((img_feat, cap_feat), axis=None)).to(DEVICE)

model.eval()  # Set the model to evaluation mode

with torch.no_grad():  # Inference mode, gradient is not computed
    model_out = model(model_in).item()
    
print("Model Output:", model_out)
print("Ground Truth:", mean)

Model Output: 1.1956510543823242
Ground Truth: 1.334975369458128


In [11]:
# Save the model parameters
model_path = 'humor_rating_model.pth'
torch.save(model.state_dict(), model_path)