Initial model based on Impressions (main label)

Libraries

In [2]:
import os
import re
import glob

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error


Set up device (CUDA) and Preprocessing (Transformations)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Image transformations (resize, then convert to tensor, then normalize)
img_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    )
])
print(device)


cuda


In [4]:
import torch
print(torch.cuda.is_available())  # Should print True if successful
print(torch.cuda.device_count())  # Number of GPUs recognized


True
1


Loading Pretrained ResNet and Extract Embeddings

In [5]:
# Load pretrained ResNet-18
resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# Remove the final fully-connected layer so we can get raw features
# In ResNet-18, the last layer is resnet.fc, we replace it with nn.Identity()
resnet.fc = nn.Identity()

resnet = resnet.to(device)
resnet.eval()  # set to evaluation mode


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [6]:
@torch.no_grad()
def get_image_embedding(img_path):
    """
    Loads an image, transforms it, and returns the ResNet embedding (tensor of shape [512]).
    """
    image = Image.open(img_path).convert('RGB')
    image = img_transform(image).unsqueeze(0).to(device)  # shape [1, 3, 224, 224]
    
    embedding = resnet(image)  # shape [1, 512]
    embedding = embedding.squeeze(0).cpu().numpy()  # convert to numpy array [512]
    return embedding


Parsing Filenames and Building the Dataset (X, y)

In [7]:
# Use glob to get all image files
image_folder = r"C:\Bakalauras\downloaded_images"
image_paths = glob.glob(os.path.join(image_folder, "*.jpg"))

X = []
y = []

pattern = re.compile(r"([^\\/_]+)_\d+\.jpg$")
# Explanation of regex:
#  - `([^\\/_]+)` captures one or more characters that are NOT a slash, underscore, etc.
#  - `_\d+\.jpg$` matches underscore, then digits, then .jpg at the end of string
# This assumes Windows paths that might have backslashes, 
# and the impressions are the chunk before the underscore.

for img_path in image_paths:
    filename = os.path.basename(img_path)  # e.g. "1545660_0.jpg"
    match = pattern.search(filename)
    if not match:
        # If the format doesn't match, skip or handle differently
        continue
    
    # The capturing group will be the impressions number
    impressions_str = match.group(1)  # e.g. "1545660"
    
    # Convert to float or int as needed
    try:
        impressions_value = float(impressions_str)
    except:
        impressions_value = 0.0  # or skip if invalid
    
    # 1) Get ResNet embedding
    emb = get_image_embedding(img_path)
    
    # 2) Add to dataset
    X.append(emb)
    y.append(impressions_value)

X = np.array(X)  # shape [num_images, 512]
y = np.array(y)  # shape [num_images]


Training a Random Forest Regressor

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, 
                                                    random_state=42)

regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train, y_train)

# Evaluate
y_pred = regressor.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mse**0.5

from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R²:   {r2:.4f}")


MAE: 18635.01
MSE: 3638225831.62
RMSE: 60317.71
R²:   0.5668


: 