# Image Memorability Analysis

This notebook analyzes image memorability using two different pre-trained models:
1. **ResMem**: A deep learning model specifically designed for memorability prediction
2. **MemNet**: A Caffe-based model for image memorability analysis

The analysis will provide memorability scores for each image, allowing for comparison between the two models and identification of highly memorable images.


## 1. Setup and Dependencies

First, let's install and import all necessary packages.


In [None]:
# Install required packages
%pip install resmem pandas pillow tqdm opencv-python numpy requests matplotlib seaborn


In [None]:
# Import required libraries
import os
import torch
import numpy as np
import pandas as pd
from PIL import Image
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns


## 2. Configuration

Set up paths and parameters for the analysis.


In [None]:
# Configuration
class Config:
    # Image paths - Update this path to your image folder
    IMAGE_FOLDER = r'.\psychophysics\stimuli2\Animal'
    
    # MemNet model paths
    MEMNET_PROTOTXT = 'memnet/deploy.prototxt'
    MEMNET_MODEL = 'memnet/memnet.caffemodel'
    
    # Image processing settings
    RESIZE_METHOD = 'stretch'  # Options: 'center_crop', 'resize_pad', 'stretch'
    USE_CONSISTENT_PREPROCESSING = True  # Use same preprocessing for both models for fair comparison
    
    # Output paths
    OUTPUT_DIR = 'results'
    MEMORABILITY_SCORES_FILE = os.path.join(OUTPUT_DIR, 'memorability_scores.csv')
    
    # Create output directory if it doesn't exist
    os.makedirs(OUTPUT_DIR, exist_ok=True)

config = Config()
print(f"Image folder: {config.IMAGE_FOLDER}")
print(f"Output directory: {config.OUTPUT_DIR}")
print(f"Resize method: {config.RESIZE_METHOD}")
print(f"Consistent preprocessing: {config.USE_CONSISTENT_PREPROCESSING}")
print()
print("Resize method options:")
print("• 'center_crop': Scale and center crop (recommended, preserves aspect ratio)")
print("• 'resize_pad': Scale and pad with gray borders (preserves all content)")
print("• 'stretch': Simple stretch to 227x227 (may distort aspect ratio)")
print()
print("Preprocessing consistency:")
print(f"• True: Both models use same resizing method (fair comparison)")
print(f"• False: Each model uses its own built-in preprocessing")


## 3. ResMem Implementation

Load and set up the ResMem model for memorability prediction.


In [None]:
from resmem import ResMem, transformer
from torchvision import transforms

# Load ResMem model
resmem_model = ResMem(pretrained=True)
resmem_model.eval()

# ResMem preprocessing options
print("=" * 60)
print("RESMEM PREPROCESSING OPTIONS")
print("=" * 60)
print("ResMem typically uses its own preprocessing, but for fair comparison")
print("with MemNet, we can use consistent preprocessing for both models.")
print()
print("Options:")
print("1. use_builtin=True: Use ResMem's built-in transformer (default)")
print("2. use_builtin=False: Use same preprocessing as MemNet for fair comparison")
print("=" * 60)

def preprocess_image_resmem(image_path, resize_method='center_crop', use_builtin=True):
    """
    Preprocess image for ResMem model.
    
    Args:
        image_path (str): Path to input image
        resize_method (str): Method for resizing ('center_crop', 'resize_pad', 'stretch')
        use_builtin (bool): If True, use ResMem's built-in transformer; 
                           If False, use same preprocessing as MemNet
        
    Returns:
        torch.Tensor: Preprocessed image tensor ready for ResMem
    """
    if use_builtin:
        # Use ResMem's built-in preprocessing
        img = Image.open(image_path).convert('RGB')
        tensor = transformer(img).unsqueeze(0)
        return tensor
    else:
        # Use consistent preprocessing (same as MemNet but for PyTorch)
        img = Image.open(image_path).convert('RGB')
        
        # Use same smart resizing as MemNet - but need to check ResMem's expected size
        # Let's use the same 227x227 as MemNet for true consistency
        img = resize_image_smart(img, target_size=227, method=resize_method)
        
        # Convert to tensor and normalize (PyTorch format)
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet RGB means
                               std=[0.229, 0.224, 0.225])     # ImageNet RGB stds
        ])
        
        tensor = transform(img).unsqueeze(0)
        return tensor

def predict_resmem(image_path, resize_method='center_crop', use_consistent_preprocessing=True):
    """
    Predict memorability score using ResMem model.
    
    Args:
        image_path (str): Path to input image
        resize_method (str): Method for resizing ('center_crop', 'resize_pad', 'stretch')
        use_consistent_preprocessing (bool): If True, use same preprocessing as MemNet;
                                           If False, use ResMem's built-in preprocessing
        
    Returns:
        float or None: Memorability score or None if error
    """
    try:
        img = Image.open(image_path).convert('RGB')
        
        if use_consistent_preprocessing:
            # Use same preprocessing as MemNet: intelligent resizing to 227x227
            img_resized = resize_image_smart(img, target_size=227, method=resize_method)
            
            # Apply ResMem's transformer to the resized image
            image_x = transformer(img_resized)
            
            # CRITICAL: Use the official ResMem reshape as shown in documentation
            # prediction = model(image_x.view(-1, 3, 227, 227))
            prediction = resmem_model(image_x.view(-1, 3, 227, 227))
        else:
            # Use ResMem's built-in preprocessing (original method)
            image_x = transformer(img)
            prediction = resmem_model(image_x.view(-1, 3, 227, 227))
        
        with torch.no_grad():
            score = prediction.item()
        return score
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        return None

print("ResMem model loaded successfully!")
print("Default: Using consistent preprocessing for fair comparison with MemNet")


## 4. MemNet Implementation

Load and set up the MemNet model for memorability prediction.


In [None]:
# MemNet Preprocessing Requirements Documentation
print("=" * 60)
print("MEMNET PREPROCESSING REQUIREMENTS")
print("=" * 60)
print("MemNet is a Caffe-based model from MIT for image memorability prediction.")
print("Requirements for proper preprocessing:")
print()
print("1. INPUT SIZE: 227x227 pixels (fixed size)")
print("2. COLOR FORMAT: RGB -> BGR conversion required")
print("3. NORMALIZATION: Mean subtraction with ImageNet means")
print("4. DATA TYPE: Float32")
print("5. FORMAT: NCHW (Batch, Channels, Height, Width)")
print()
print("Preprocessing steps:")
print("- Load image as RGB")
print("- Resize to 227x227")
print("- Convert RGB to BGR (reverse channel order)")
print("- Subtract ImageNet BGR means: [104.0, 117.0, 123.0]")
print("- Convert to blob format for OpenCV DNN")
print("=" * 60)

# Load MemNet model (requires model files)
try:
    if not os.path.exists(config.MEMNET_PROTOTXT) or not os.path.exists(config.MEMNET_MODEL):
        print("Warning: MemNet model files not found.")
        print("To download MemNet model files:")
        print("1. Visit: http://memorability.csail.mit.edu/")
        print("2. Download deploy.prototxt and memnet.caffemodel")
        print("3. Place them in the 'memnet/' directory")
        print(f"Expected files: {config.MEMNET_PROTOTXT}, {config.MEMNET_MODEL}")
        memnet = None
    else:
        memnet = cv2.dnn.readNetFromCaffe(config.MEMNET_PROTOTXT, config.MEMNET_MODEL)
        print("MemNet model loaded successfully!")
except Exception as e:
    print(f"Error loading MemNet: {str(e)}")
    memnet = None

def resize_image_smart(img, target_size=227, method='center_crop'):
    """
    Resize image to target size using different methods to preserve quality.
    
    Args:
        img (PIL.Image): Input image
        target_size (int): Target size (227 for MemNet)
        method (str): Resizing method - 'center_crop', 'resize_pad', or 'stretch'
    
    Returns:
        PIL.Image: Resized image
    """
    width, height = img.size
    
    if method == 'center_crop':
        # Resize maintaining aspect ratio, then center crop
        # This is the most common approach for computer vision models
        
        # Calculate the scale to make the smaller dimension equal to target_size
        scale = max(target_size / width, target_size / height)
        new_width = int(width * scale)
        new_height = int(height * scale)
        
        # Resize maintaining aspect ratio
        img = img.resize((new_width, new_height), Image.LANCZOS)
        
        # Center crop to target_size x target_size
        left = (new_width - target_size) // 2
        top = (new_height - target_size) // 2
        right = left + target_size
        bottom = top + target_size
        
        img = img.crop((left, top, right, bottom))
        
    elif method == 'resize_pad':
        # Resize maintaining aspect ratio, then pad to square
        # This preserves all content but may add borders
        
        # Calculate scale to fit within target_size x target_size
        scale = min(target_size / width, target_size / height)
        new_width = int(width * scale)
        new_height = int(height * scale)
        
        # Resize maintaining aspect ratio
        img = img.resize((new_width, new_height), Image.LANCZOS)
        
        # Create new image with target size and paste resized image in center
        new_img = Image.new('RGB', (target_size, target_size), (128, 128, 128))  # Gray padding
        paste_x = (target_size - new_width) // 2
        paste_y = (target_size - new_height) // 2
        new_img.paste(img, (paste_x, paste_y))
        img = new_img
        
    elif method == 'stretch':
        # Simple stretch to target size (can distort aspect ratio)
        img = img.resize((target_size, target_size), Image.LANCZOS)
        
    else:
        raise ValueError(f"Unknown resize method: {method}")
    
    return img

def preprocess_image_memnet(image_path, resize_method='center_crop'):
    """
    Preprocess image for MemNet model according to official requirements.
    
    MemNet preprocessing steps:
    1. Load and convert to RGB
    2. Resize to 227x227 pixels (using smart resizing)
    3. Convert to float32
    4. Convert RGB to BGR (OpenCV/Caffe format)
    5. Subtract ImageNet BGR means: [104.0, 117.0, 123.0]
    6. Convert to blob format (NCHW)
    
    Args:
        image_path (str): Path to input image
        resize_method (str): Method for resizing ('center_crop', 'resize_pad', 'stretch')
        
    Returns:
        np.ndarray: Preprocessed image blob ready for MemNet
    """
    # Load image as RGB
    img = Image.open(image_path).convert('RGB')
    
    # Smart resize to required input size
    img = resize_image_smart(img, target_size=227, method=resize_method)
    
    # Convert to numpy array and float32
    img = np.array(img).astype(np.float32)
    
    # Convert RGB to BGR (Caffe/OpenCV convention)
    img = img[:, :, ::-1]
    
    # Subtract ImageNet BGR means
    # These are the standard ImageNet means in BGR order
    mean = np.array([104.0, 117.0, 123.0])
    img -= mean
    
    # Convert to blob format (adds batch dimension and reorders to NCHW)
    blob = cv2.dnn.blobFromImage(img)
    
    return blob

def predict_memnet(image_path, resize_method='center_crop'):
    """
    Predict memorability score using MemNet model.
    
    Args:
        image_path (str): Path to input image
        resize_method (str): Method for resizing ('center_crop', 'resize_pad', 'stretch')
        
    Returns:
        float or None: Memorability score (0-1) or None if error
    """
    if memnet is None:
        return None
    try:
        # Preprocess image with specified resize method
        blob = preprocess_image_memnet(image_path, resize_method=resize_method)
        
        # Set input and run forward pass
        memnet.setInput(blob)
        pred = memnet.forward()
        
        # Extract memorability score
        score = float(pred[0][0])
        
        return score
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        return None


## 5. Image Processing and Analysis

Process images and calculate memorability scores using both models.


In [None]:
def process_images(image_folder):
    """Process all images in the folder using both models."""
    if not os.path.exists(image_folder):
        print(f"Warning: Image folder '{image_folder}' not found.")
        return pd.DataFrame()
    
    image_files = [f for f in os.listdir(image_folder) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
    
    if not image_files:
        print(f"No image files found in {image_folder}")
        return pd.DataFrame()
    
    print(f"Found {len(image_files)} images to process")
    
    results = []
    for fname in tqdm(image_files, desc="Processing images"):
        image_path = os.path.join(image_folder, fname)
        
        # Get predictions from both models using consistent preprocessing
        resmem_score = predict_resmem(
            image_path, 
            resize_method=config.RESIZE_METHOD,
            use_consistent_preprocessing=config.USE_CONSISTENT_PREPROCESSING
        )
        memnet_score = predict_memnet(image_path, resize_method=config.RESIZE_METHOD)
        
        results.append({
            'filename': fname,
            'resmem_score': resmem_score,
            'memnet_score': memnet_score
        })
    
    return pd.DataFrame(results)

# Check if image folder exists before processing
if os.path.exists(config.IMAGE_FOLDER):
    print(f"Processing images from: {config.IMAGE_FOLDER}")
    df = process_images(config.IMAGE_FOLDER)
    
    # Save results
    if not df.empty:
        df.to_csv(config.MEMORABILITY_SCORES_FILE, index=False)
        print(f"Results saved to: {config.MEMORABILITY_SCORES_FILE}")
        print(f"Processed {len(df)} images")
        df.head()
    else:
        print("No results to display")
else:
    print(f"Image folder not found: {config.IMAGE_FOLDER}")
    print("Please update the IMAGE_FOLDER path in the Configuration section")


## 6. Results Analysis and Visualization

Analyze and visualize the memorability scores from both models.


In [None]:
def analyze_results(df):
    """Analyze and visualize memorability scores."""
    if df.empty:
        print("No data to analyze")
        return df
    
    # Remove rows with missing scores
    df_clean = df.dropna()
    
    if df_clean.empty:
        print("No valid data to analyze")
        return df
    
    # Basic statistics
    print("Basic Statistics:")
    print(df_clean[['resmem_score', 'memnet_score']].describe())
    
    # Check if both models have valid scores
    resmem_valid = df_clean['resmem_score'].notna().sum()
    memnet_valid = df_clean['memnet_score'].notna().sum()
    
    print(f"\nValid scores - ResMem: {resmem_valid}, MemNet: {memnet_valid}")
    
    # Spearman rank correlation between models (if both have valid scores)
    if resmem_valid > 0 and memnet_valid > 0:
        df_both = df_clean.dropna(subset=['resmem_score', 'memnet_score'])
        if len(df_both) > 1:
            # Calculate both Pearson and Spearman correlations for comparison
            pearson_corr = df_both['resmem_score'].corr(df_both['memnet_score'], method='pearson')
            spearman_corr = df_both['resmem_score'].corr(df_both['memnet_score'], method='spearman')
            
            print(f"Pearson correlation (linear): {pearson_corr:.3f}")
            print(f"Spearman rank correlation: {spearman_corr:.3f}")
            print()
            
            # Interpret the Spearman correlation
            if spearman_corr >= 0.7:
                rank_strength = "Strong"
            elif spearman_corr >= 0.4:
                rank_strength = "Moderate"
            elif spearman_corr >= 0.2:
                rank_strength = "Weak"
            else:
                rank_strength = "Very weak"
            
            print(f"Rank correlation interpretation: {rank_strength}")
            print("→ This measures how well the models agree on relative ordering of images")
            
            # Add ranking analysis
            print(f"\nRanking Analysis:")
            df_both['resmem_rank'] = df_both['resmem_score'].rank(ascending=False)
            df_both['memnet_rank'] = df_both['memnet_score'].rank(ascending=False)
            df_both['rank_diff'] = abs(df_both['resmem_rank'] - df_both['memnet_rank'])
            
            # Show images with highest agreement (small rank differences)
            high_agreement = df_both.nsmallest(3, 'rank_diff')
            print("Top 3 images with highest ranking agreement:")
            for _, row in high_agreement.iterrows():
                print(f"  {row['filename']}: ResMem rank {row['resmem_rank']:.0f}, MemNet rank {row['memnet_rank']:.0f} (diff: {row['rank_diff']:.0f})")
            
            # Show images with highest disagreement (large rank differences)
            high_disagreement = df_both.nlargest(3, 'rank_diff')
            print("Top 3 images with highest ranking disagreement:")
            for _, row in high_disagreement.iterrows():
                print(f"  {row['filename']}: ResMem rank {row['resmem_rank']:.0f}, MemNet rank {row['memnet_rank']:.0f} (diff: {row['rank_diff']:.0f})")
    
    # Create visualizations
    plt.figure(figsize=(15, 5))
    
    # Distribution of scores
    plt.subplot(1, 3, 1)
    if resmem_valid > 0:
        plt.hist(df_clean['resmem_score'].dropna(), alpha=0.7, label='ResMem', bins=20)
    if memnet_valid > 0:
        plt.hist(df_clean['memnet_score'].dropna(), alpha=0.7, label='MemNet', bins=20)
    plt.title('Distribution of Memorability Scores')
    plt.xlabel('Score')
    plt.ylabel('Frequency')
    plt.legend()
    
    # Box plot
    plt.subplot(1, 3, 2)
    data_to_plot = []
    labels = []
    if resmem_valid > 0:
        data_to_plot.append(df_clean['resmem_score'].dropna())
        labels.append('ResMem')
    if memnet_valid > 0:
        data_to_plot.append(df_clean['memnet_score'].dropna())
        labels.append('MemNet')
    
    if data_to_plot:
        plt.boxplot(data_to_plot, labels=labels)
        plt.title('Score Distribution Comparison')
        plt.ylabel('Score')
    
    # Scatter plot (if both models have data)
    plt.subplot(1, 3, 3)
    if resmem_valid > 0 and memnet_valid > 0:
        df_both = df_clean.dropna(subset=['resmem_score', 'memnet_score'])
        if len(df_both) > 0:
            plt.scatter(df_both['resmem_score'], df_both['memnet_score'], alpha=0.6)
            plt.xlabel('ResMem Score')
            plt.ylabel('MemNet Score')
            plt.title('ResMem vs MemNet Scores')
    else:
        plt.text(0.5, 0.5, 'Need both models\nfor comparison', 
                ha='center', va='center', transform=plt.gca().transAxes)
        plt.title('Model Comparison')
    
    plt.tight_layout()
    plt.show()
    
    return df_clean

# Analyze results if data exists
if 'df' in globals() and not df.empty:
    analyzed_df = analyze_results(df)
else:
    print("No data to analyze. Please run the image processing cell first.")
