# Shortcut Learning Analysis: Texture vs. Shape Bias in CNNs

## 1. Project Overview
**Author:** Humza Gohar Kabir


**Objective:** To investigate the "Shortcut Learning" phenomenon in Deep Convolutional Neural Networks (CNNs), specifically hypothesis that standard CNNs rely on superficial texture statistics rather than high-level semantic shape features.

## 2. Problem Statement
Deep Learning models often achieve high performance on benchmark datasets but fail to generalize to out-of-distribution samples. This project quantifies the **"Texture Bias"** of ResNet architectures by evaluating their performance on datasets where texture cues are rigorously removed.

## 3. Methodology
We conduct a comparative analysis between **ResNet-18** (Low Capacity) and **ResNet-101** (High Capacity) using the **Imagenette** dataset (10 classes).

The experiment follows a three-phase protocol:
1.  **Data Preprocessing:** Generating deterministic variations of the dataset:
    * *Edge-Extracted:* Using Canny Edge Detection to isolate shape.
    * *Segmented:* Using DeepLabV3 to remove background context.
2.  **Bias Quantification:** Training models on standard RGB images and evaluating the **"Degradation Gap"** when tested on Edge/Segmented data.
3.  **Control Experiment:** Training models specifically on Edge data to verify if shape-based learning is feasible ("Domain Mastery").

## 4. Key Hypotheses
* **H1:** Standard CNNs will suffer a catastrophic performance drop (>50%) when texture is removed, proving reliance on shortcuts.
* **H2:** Increasing model depth (ResNet-101) will not significantly mitigate this bias without changes to the training objective.

Computations were performed on the TRUBA High-Performance Computing cluster, which is why the code files below include the .py and sbatch slurm scripts, which are identifiable by the first commented line in each cell. All these code files are compiled together into a ipynb file not to run but for the ease of reading the code. Implementations can be made though your preferred Truba console (such as Termius etc), and these files can be written there though nano commands like (nano ~/CNN_Shortcut_Project/src/preprocess.py) or (nano ~/CNN_Shortcut_Project/slurm_scripts/run_train.sh)

Dataset used: https://github.com/fastai/imagenette


In [None]:
# preprocess_data.py

import os
import cv2
import torch
import numpy as np
from torchvision import models, transforms
from torchvision.models.segmentation import deeplabv3_resnet50, DeepLabV3_ResNet50_Weights
from PIL import Image
from tqdm import tqdm
import argparse

# ARGS
parser = argparse.ArgumentParser()
parser.add_argument('--input_dir', type=str, required=True, help="Path to raw imagenette folder")
parser.add_argument('--output_dir', type=str, required=True, help="Base path for processed data")
parser.add_argument('--mode', type=str, choices=['edges', 'segmentation'], required=True)
args = parser.parse_args()

# SETUP DEVICE
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running mode: {args.mode} on {device}")

# MODEL SETUP (Only for segmentation)
seg_model = None
seg_transform = None
if args.mode == 'segmentation':
    weights = DeepLabV3_ResNet50_Weights.DEFAULT
    seg_model = deeplabv3_resnet50(weights=weights).to(device)
    seg_model.eval()
    seg_transform = weights.transforms()

def process_edges(img_path, save_path):
    # Read Image
    img = cv2.imread(img_path)
    if img is None: return

    # Canny Edge Detection
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 100, 200)

    # Stack to make 3-channel (so standard CNNs accept it)
    edges_rgb = np.stack([edges]*3, axis=-1)

    cv2.imwrite(save_path, edges_rgb)

def process_segmentation(img_path, save_path):
    # Load and Transform
    try:
        img_pil = Image.open(img_path).convert("RGB")
        original_np = np.array(img_pil)
    except:
        return

    # Model scales image up to ~520px internally
    input_tensor = seg_transform(img_pil).unsqueeze(0).to(device)

    with torch.no_grad():
        output = seg_model(input_tensor)['out'][0]

    # Generate Mask (Class 0 is background)
    output_predictions = output.argmax(0).byte().cpu().numpy()

    # Resize mask back to original image size
    # cv2.resize expects (Width, Height), but numpy shape is (Height, Width)
    h, w = original_np.shape[:2]
    mask_resized = cv2.resize(output_predictions, (w, h), interpolation=cv2.INTER_NEAREST)

    # Create Binary Mask (0=Background, 1=Object)
    mask = (mask_resized > 0).astype(np.uint8)

    # Apply Mask to Original Image
    mask_3ch = np.stack([mask]*3, axis=-1)
    foreground = original_np * mask_3ch

    # Convert back to BGR for OpenCV saving
    foreground_bgr = cv2.cvtColor(foreground, cv2.COLOR_RGB2BGR)
    cv2.imwrite(save_path, foreground_bgr)

# MAIN LOOP
splits = ['train', 'val']

for split in splits:
    split_path = os.path.join(args.input_dir, split)
    if not os.path.exists(split_path): continue

    # Iterate over classes
    classes = os.listdir(split_path)
    for cls in classes:
        cls_dir = os.path.join(split_path, cls)
        if not os.path.isdir(cls_dir): continue

        # Create Output Directory
        out_cls_dir = os.path.join(args.output_dir, args.mode, split, cls)
        os.makedirs(out_cls_dir, exist_ok=True)

        # Process Images
        images = os.listdir(cls_dir)
        print(f"Processing {split}/{cls}...")

        for img_name in images:
            src = os.path.join(cls_dir, img_name)
            dst = os.path.join(out_cls_dir, img_name)

            if args.mode == 'edges':
                process_edges(src, dst)
            elif args.mode == 'segmentation':
                process_segmentation(src, dst)

print("Processing Complete.")

In [None]:
# slurm_scripts/run_preprocess.sh

#!/bin/bash
#SBATCH -J preprocess_data
#SBATCH -p barbun
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -c 40
#SBATCH --time=04:00:00
#SBATCH --output=preprocess_%j.log

echo "Job started on $(hostname)"

# 1. Load Environment
module purge
# We need to ensure python 3 is available
export PATH=$HOME/.local/bin:$PATH

# 2. Define Paths
USER_NAME=$(whoami)
SRC_DIR="/arf/scratch/$USER_NAME/shortcut_data/raw/imagenette2-160"
DEST_DIR="/arf/scratch/$USER_NAME/shortcut_data/processed"
SCRIPT_PATH="$HOME/CNN_Shortcut_Project/src/preprocess_data.py"

# 3. Run Edge Detection
echo "Starting Edge Detection..."
# python $SCRIPT_PATH --input_dir $SRC_DIR --output_dir $DEST_DIR --mode edges

# 4. Run Segmentation (Background Removal)
echo "Starting Segmentation..."
python $SCRIPT_PATH --input_dir $SRC_DIR --output_dir $DEST_DIR --mode segmentation

echo "Job finished."

In [None]:
# train.py

import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, f1_score
import argparse
import copy
import sys

# ARGUMENTS
parser = argparse.ArgumentParser()
parser.add_argument('--data_root', type=str, required=True, help="Root of dataset")
parser.add_argument('--model', type=str, choices=['resnet18', 'resnet101'], default='resnet18')
parser.add_argument('--train_mode', type=str, choices=['original', 'edges', 'segmentation', 'grayscale', 'occlusion'], required=True)
parser.add_argument('--test_mode', type=str, choices=['original', 'edges', 'segmentation', 'grayscale', 'occlusion'], required=True)
parser.add_argument('--epochs', type=int, default=15)
parser.add_argument('--batch_size', type=int, default=32)
parser.add_argument('--output_dir', type=str, default='./results')
args = parser.parse_args()

# SETUP DEVICE
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Exp: Train on {args.train_mode} -> Test on {args.test_mode} | Model: {args.model}")

# 1. DEFINE TRANSFORMS (Dynamic Augmentation)
def get_transforms(mode, is_training=False):
    trans_list = [transforms.Resize((224, 224))]

    # Mode-Specific Pre-processing
    if mode == 'grayscale':
        trans_list.append(transforms.Grayscale(num_output_channels=3))

    # Standard Tensors
    trans_list.append(transforms.ToTensor())
    trans_list.append(transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))

    # Occlusion (Cutout) - Applied after Normalization
    if mode == 'occlusion':
        # Probability 1.0 means we ALWAYS occlude something (since we want to test robustness)
        trans_list.append(transforms.RandomErasing(p=1.0, scale=(0.02, 0.2)))

    return transforms.Compose(trans_list)

# 2. DATA LOADER SETUP
def get_data_dir(root, mode):
    # Base paths
    if mode == 'original':
        base = os.path.join(root, 'raw', 'imagenette2-160')
    elif mode in ['edges', 'segmentation']:
        base = os.path.join(root, 'processed', mode)
    elif mode in ['grayscale', 'occlusion']:
        base = os.path.join(root, 'raw', 'imagenette2-160')
    else:
        raise ValueError(f"Unknown mode: {mode}")
    return base

# Train Loader (Append '/train')
train_dir = os.path.join(get_data_dir(args.data_root, args.train_mode), 'train')
train_dataset = datasets.ImageFolder(train_dir, get_transforms(args.train_mode, is_training=True))
train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4)

# Test Loader (Append '/val')
test_dir = os.path.join(get_data_dir(args.data_root, args.test_mode), 'val')
test_dataset = datasets.ImageFolder(test_dir, get_transforms(args.test_mode, is_training=False))
test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4)

class_names = train_dataset.classes
num_classes = len(class_names)
print(f"Data Loaded. Classes: {num_classes}")

# 3. MODEL SETUP
if args.model == 'resnet18':
    model = models.resnet18(weights='IMAGENET1K_V1')
elif args.model == 'resnet101':
    model = models.resnet101(weights='IMAGENET1K_V1')

model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# 4. TRAINING LOOP
best_acc = 0.0

for epoch in range(args.epochs):
    print(f'Epoch {epoch+1}/{args.epochs}')

    # Train Phase
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    scheduler.step()

    # Test Phase
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    epoch_acc = accuracy_score(all_labels, all_preds)
    epoch_f1 = f1_score(all_labels, all_preds, average='macro')

    print(f"Train Loss: {running_loss/len(train_dataset):.4f} | Test Acc: {epoch_acc:.4f} | Test F1: {epoch_f1:.4f}")

    # Save Best
    if epoch_acc > best_acc:
        best_acc = epoch_acc
        os.makedirs(args.output_dir, exist_ok=True)
        save_path = os.path.join(args.output_dir, f"{args.model}_Tr-{args.train_mode}_Te-{args.test_mode}.pth")
        torch.save(model.state_dict(), save_path)

print(f"Best Accuracy: {best_acc:.4f}")

In [None]:
# slurm_scripts/run_train.sh


#!/bin/bash
#SBATCH -J cnn_exp_full
#SBATCH -p akya-cuda         # Primary GPU partition
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -c 10                # CPU cores for data loading
#SBATCH --gres=gpu:1         # Request 1 GPU
#SBATCH --time=24:00:00      # 24 hours to cover all experiments
#SBATCH --output=/arf/scratch/%u/shortcut_data/results/train_%j.log

echo "Job started on $(hostname)"

# 1. Load Environment
module purge
export PATH=$HOME/.local/bin:$PATH

# 2. Define Variables
USER_NAME=$(whoami)
# Data and Results in Scratch
DATA_ROOT="/arf/scratch/$USER_NAME/shortcut_data"
OUTPUT_DIR="/arf/scratch/$USER_NAME/shortcut_data/results"
SCRIPT_PATH="$HOME/CNN_Shortcut_Project/src/train.py"

mkdir -p $OUTPUT_DIR

# Define Lists
MODELS=("resnet18" "resnet101")
MODIFICATIONS=("edges" "segmentation" "grayscale" "occlusion")

# MAIN EXPERIMENT LOOP

for MODEL in "${MODELS[@]}"; do
    echo "STARTING BATCH FOR MODEL: $MODEL"



    # BLOCK A: BASELINE & EXPERIMENT 1 (Bias Check)
    # Goal: Train on Original, Test on Everything (Original + Modified)

    echo "[Exp 1] Training on ORIGINAL Data "

    # 1. Baseline: Train Original -> Test Original
    echo "Running: Train Original -> Test Original"
    python $SCRIPT_PATH \
        --data_root $DATA_ROOT \
        --output_dir $OUTPUT_DIR \
        --model $MODEL \
        --epochs 15 \
        --train_mode original \
        --test_mode original

    # 2. Bias Checks: Train Original -> Test Modified
    for MOD in "${MODIFICATIONS[@]}"; do
        echo "Running: Train Original -> Test $MOD"
        python $SCRIPT_PATH \
            --data_root $DATA_ROOT \
            --output_dir $OUTPUT_DIR \
            --model $MODEL \
            --epochs 15 \
            --train_mode original \
            --test_mode $MOD
    done


    # BLOCK B: EXPERIMENT 2 & 3 (Shape Adaptation & Domain Mastery)
    # Goal: Train on Modified, Test on Original & Same Modified

    echo "[Exp 2 & 3] Training on MODIFIED Data "

    for MOD in "${MODIFICATIONS[@]}"; do

        # Exp 2: Train Modified -> Test Original (Does the model learn shape?)
        echo "Running: Train $MOD -> Test Original"
        python $SCRIPT_PATH \
            --data_root $DATA_ROOT \
            --output_dir $OUTPUT_DIR \
            --model $MODEL \
            --epochs 15 \
            --train_mode $MOD \
            --test_mode original

        # Exp 3: Train Modified -> Test Modified (Theoretical limit)
        echo "Running: Train $MOD -> Test $MOD"
        python $SCRIPT_PATH \
            --data_root $DATA_ROOT \
            --output_dir $OUTPUT_DIR \
            --model $MODEL \
            --epochs 15 \
            --train_mode $MOD \
            --test_mode $MOD
    done

done

echo "All training experiments finished."

In [None]:
# parse_results.py

import re
import pandas as pd
import sys

# Path to your log file
log_file = sys.argv[1]

results = []
current_exp = {}

with open(log_file, 'r') as f:
    for line in f:
        # Detect Experiment Start
        # Format: "Exp: Train on X -> Test on Y | Model: Z"
        match_exp = re.search(r"Exp: Train on (.*?) -> Test on (.*?) \| Model: (.*)", line)
        if match_exp:
            current_exp = {
                'Model': match_exp.group(3).strip(),
                'Train': match_exp.group(1).strip(),
                'Test': match_exp.group(2).strip()
            }

        # Detect Final Accuracy
        # Format: "Best Accuracy: 0.9958"
        match_acc = re.search(r"Best Accuracy: ([\d\.]+)", line)
        if match_acc and current_exp:
            current_exp['Accuracy'] = float(match_acc.group(1))
            results.append(current_exp)
            current_exp = {}

# Create DataFrame
df = pd.DataFrame(results)
print(df.to_string(index=False))

# Save to CSV
df.to_csv("final_results_summary.csv", index=False)
print("\nSaved to final_results_summary.csv")

In [None]:
# visualize.py

import torch
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from torchvision import datasets, transforms, models
import torch.nn as nn
from torch.utils.data import DataLoader
import os
import argparse

# ARGS
parser = argparse.ArgumentParser()
parser.add_argument('--model_path', type=str, required=True)
parser.add_argument('--data_root', type=str, required=True)
parser.add_argument('--test_mode', type=str, required=True)
parser.add_argument('--model_arch', type=str, default='resnet18')
args = parser.parse_args()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1. Setup Data
def get_transforms(mode):
    trans = [transforms.Resize((224, 224))]
    if mode == 'grayscale': trans.append(transforms.Grayscale(num_output_channels=3))
    trans.append(transforms.ToTensor())
    trans.append(transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))
    return transforms.Compose(trans)

#  Point directly to 'val' folder
if args.test_mode in ['original', 'grayscale', 'occlusion']:
    base_dir = os.path.join(args.data_root, 'raw', 'imagenette2-160')
else:
    base_dir = os.path.join(args.data_root, 'processed', args.test_mode)

data_dir = os.path.join(base_dir, 'val')

dataset = datasets.ImageFolder(data_dir, get_transforms(args.test_mode))
loader = DataLoader(dataset, batch_size=32, shuffle=False)

print(f"Loaded {len(dataset.classes)} classes from {data_dir}")

# 2. Load Model
if args.model_arch == 'resnet18': model = models.resnet18()
else: model = models.resnet101()
model.fc = nn.Linear(model.fc.in_features, len(dataset.classes))
model.load_state_dict(torch.load(args.model_path, map_location=device))
model.to(device)
model.eval()

# 3. Get Predictions
y_true, y_pred = [], []
with torch.no_grad():
    for inputs, labels in loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

# 4. Plot
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10,8))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=dataset.classes, yticklabels=dataset.classes, cmap='Blues')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title(f'Confusion Matrix: {args.test_mode}')
plt.savefig(f'confusion_matrix_{args.test_mode}.png')
print(f"Saved confusion_matrix_{args.test_mode}.png")

In [None]:
# analyze_failures.py

import torch
import matplotlib.pyplot as plt
from torchvision import datasets, transforms, models
import torch.nn as nn
from torch.utils.data import DataLoader
import os
import argparse
import numpy as np

# ARGS
parser = argparse.ArgumentParser()
parser.add_argument('--model_path', type=str, required=True)
parser.add_argument('--data_root', type=str, required=True)
parser.add_argument('--test_mode', type=str, required=True)
args = parser.parse_args()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transforms
trans = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

#  Point directly to 'val' folder
if args.test_mode in ['original', 'grayscale', 'occlusion']:
    base_dir = os.path.join(args.data_root, 'raw', 'imagenette2-160')
else:
    base_dir = os.path.join(args.data_root, 'processed', args.test_mode)

data_dir = os.path.join(base_dir, 'val')


dataset = datasets.ImageFolder(data_dir, trans)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

print(f"Loaded {len(dataset.classes)} classes from {data_dir}")

# Load Model (ResNet18)
model = models.resnet18()
model.fc = nn.Linear(model.fc.in_features, len(dataset.classes))
model.load_state_dict(torch.load(args.model_path, map_location=device))
model.to(device)
model.eval()

# Find Failures
failures = []

print("Searching for failures...")
with torch.no_grad():
    for inputs, labels in loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        # Find indices where predictions do NOT match labels
        wrong_idx = (preds != labels).nonzero(as_tuple=True)[0]

        for idx in wrong_idx:
            if len(failures) < 5:
                # Save the failure: (Image Tensor, True Label, Pred Label)
                failures.append((inputs[idx].cpu(), labels[idx].item(), preds[idx].item()))

        if len(failures) >= 5:
            break

# Plot
print(f"Found {len(failures)} failures. Generating image...")
fig, axes = plt.subplots(1, 5, figsize=(20, 5))
class_names = dataset.classes

for i, (img_tensor, true_idx, pred_idx) in enumerate(failures):
    # Un-normalize for display
    img = img_tensor.permute(1, 2, 0).numpy()
    img = img * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
    img = np.clip(img, 0, 1)

    axes[i].imshow(img)
    axes[i].set_title(f"True: {class_names[true_idx]}\nPred: {class_names[pred_idx]}", color='red')
    axes[i].axis('off')

plt.suptitle(f'Failure Analysis: {args.test_mode}', fontsize=16)
plt.savefig('failure_examples.png')
print("Saved failure_examples.png")