In [43]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import numpy as np
from PIL import Image
import cv2
import copy
import time
from skimage.metrics import peak_signal_noise_ratio as psnr
from skimage.metrics import structural_similarity as ssim
import pandas as pd
experimental_data_log = []
# YOUR LOGGING CODE - END


In [13]:

if torch.cuda.is_available():
    print("CUDA is available!")
    print("Device name:", torch.cuda.get_device_name(0))  # Access the device name
    print("CUDA version:", torch.version.cuda) # Check CUDA version
else:
    print("CUDA is not available. You may need to install the CUDA toolkit and drivers.")

CUDA is available!
Device name: NVIDIA GeForce RTX 4050 Laptop GPU
CUDA version: 12.8


In [44]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

imsize = 128 if not torch.cuda.is_available() else 256

loader = transforms.Compose([
    transforms.Resize((imsize, imsize)),
    transforms.ToTensor()
])

def image_loader(image_path):
    image = Image.open(image_path).convert("RGB")
    image = loader(image).unsqueeze(0)
    return image.to(device, torch.float)

def tensor_to_cv2(tensor):
    image = tensor.cpu().clone().detach().squeeze(0)
    image = transforms.ToPILImage()(image)
    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)


In [45]:
class ContentLoss(nn.Module):
    """Computes the content loss between the target and input feature maps."""
    def __init__(self, target):
        super().__init__()
        # We 'detach' the target content from the tree used
        # to dynamically compute the gradient: this is a stated value,
        # not a variable. Otherwise the forward method of the criterion
        # will throw an error.
        self.target = target.detach()
        self.loss = None # METRIC: To store loss value

    def forward(self, input_tensor):
        self.loss = nn.functional.mse_loss(input_tensor, self.target)
        return input_tensor

def gram_matrix(input):
    b, c, h, w = input.size()
    features = input.view(b * c, h * w)
    G = torch.mm(features, features.t())
    return G.div(b * c * h * w)

class StyleLoss(nn.Module):
    def __init__(self, target_feature):
        super().__init__()
        self.target = gram_matrix(target_feature).detach()
    def forward(self, x):
        G = gram_matrix(x)
        self.loss = nn.functional.mse_loss(G, self.target)
        return x

In [46]:
cnn = models.vgg19(weights=models.VGG19_Weights.DEFAULT).features.to(device).eval()

normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device)
normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device)

class Normalization(nn.Module):
    def __init__(self, mean, std):
        super().__init__()
        self.mean = mean.view(-1, 1, 1)
        self.std = std.view(-1, 1, 1)
    def forward(self, img):
        return (img - self.mean) / self.std

def get_style_model_and_losses(cnn, normalization_mean, normalization_std,
                                style_img, content_img,
                                content_layers=['conv_4'],
                                style_layers=['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']):
    cnn = copy.deepcopy(cnn)

    normalization = Normalization(normalization_mean, normalization_std).to(device)

    content_losses = []
    style_losses = []

    model = nn.Sequential(normalization)

    i = 0
    for layer in cnn.children():
        if isinstance(layer, nn.Conv2d):
            i += 1
            name = f'conv_{i}'
        elif isinstance(layer, nn.ReLU):
            name = f'relu_{i}'
            layer = nn.ReLU(inplace=False)
        elif isinstance(layer, nn.MaxPool2d):
            name = f'pool_{i}'
        elif isinstance(layer, nn.BatchNorm2d):
            name = f'bn_{i}'
        else:
            continue

        model.add_module(name, layer)

        if name in content_layers:
            target = model(content_img).detach()
            content_loss = ContentLoss(target)
            model.add_module(f"content_loss_{i}", content_loss)
            content_losses.append(content_loss)

        if name in style_layers:
            target_feature = model(style_img).detach()
            style_loss = StyleLoss(target_feature)
            model.add_module(f"style_loss_{i}", style_loss)
            style_losses.append(style_loss)

    # Truncate the model after the last loss
    for i in range(len(model) - 1, -1, -1):
        if isinstance(model[i], (ContentLoss, StyleLoss)):
            break
    model = model[:i+1]

    return model, style_losses, content_losses




Style loss logging 

In [53]:
def run_style_transfer(cnn_model, norm_mean, norm_std,
                       content_img_tensor, style_img_tensor, input_img_tensor,
                       num_steps=200, style_weight=1e6, content_weight=1,
                       log_every_k_steps=10): # New parameter for convergence logging frequency

    # Build the style transfer model with loss layers
    model, style_losses, content_losses = get_style_model_and_losses(
        cnn_model, norm_mean, norm_std, style_img_tensor, content_img_tensor
    ) 
    # We want to optimize the input image, not the model parameters
    input_img_tensor.requires_grad_(True)
    model.requires_grad_(False) # Model parameters are frozen

    optimizer = optim.LBFGS([input_img_tensor]) 
    
    convergence_this_run = [] # Log for step-wise convergence within this specific call
    run = [0] # Current step counter, as a list to be modifiable in closure

    while run[0] <= num_steps:
        def closure():
            # Correct the values of updated input image (clamp to [0,1])
            with torch.no_grad():
                input_img_tensor.clamp_(0, 1)

            optimizer.zero_grad()
            model(input_img_tensor) # This populates sl.loss and cl.loss in style_losses and content_losses
            
            # Calculate raw scores (sum of losses from individual loss modules)
            s_score_raw = sum(sl.loss for sl in style_losses)
            c_score_raw = sum(cl.loss for cl in content_losses)

            # Calculate the total loss for optimization
            # Note: s_score_raw and c_score_raw are tensors here
            loss = (s_score_raw * style_weight) + (c_score_raw * content_weight)
            loss.backward()

            run[0] += 1

            # METRIC: Log convergence data at specified intervals or at the last step
            if run[0] % log_every_k_steps == 0 or run[0] == num_steps:
                convergence_this_run.append({
                    'step': run[0],
                    'total_loss': loss.item(),
                    'weighted_style_loss': s_score_raw.item() * style_weight, # Log the weighted component
                    'weighted_content_loss': c_score_raw.item() * content_weight # Log the weighted component
                })
            return loss
        
        optimizer.step(closure)

    # Final clamp of the input image after optimization
    with torch.no_grad():
        input_img_tensor.clamp_(0, 1)
    
    # Calculate final weighted losses (these are the values often reported)
    final_s_loss_val = sum(sl.loss.item() for sl in style_losses) * style_weight
    final_c_loss_val = sum(cl.loss.item() for cl in content_losses) * content_weight
    
    return input_img_tensor.detach(), final_s_loss_val, final_c_loss_val, convergence_this_run # Return convergence log
# === UPDATED run_style_transfer function END ===

style loss new one

In [39]:
def run_style_transfer(cnn_model, norm_mean, norm_std,
                       content_img_tensor, style_img_tensor, input_img_tensor,
                       num_steps=200, style_weight=1e6, content_weight=1):
    """Runs the style transfer algorithm."""
    # print('Building the style transfer model..')
    model, style_losses, content_losses = get_style_model_and_losses(
        cnn_model, norm_mean, norm_std, style_img_tensor, content_img_tensor)

    # We want to optimize the input image, not the model parameters
    input_img_tensor.requires_grad_(True)
    model.requires_grad_(False) # Model parameters are frozen

    optimizer = optim.LBFGS([input_img_tensor]) # LBFGS is a common optimizer for this

    # print('Optimizing..')
    run = [0]
    while run[0] <= num_steps:
        def closure():
            # correct the values of updated input image
            with torch.no_grad():
                input_img_tensor.clamp_(0, 1)

            optimizer.zero_grad()
            model(input_img_tensor)
            
            s_score = 0
            c_score = 0

            for sl in style_losses:
                s_score += sl.loss
            for cl in content_losses:
                c_score += cl.loss

            s_score *= style_weight
            c_score *= content_weight

            loss = s_score + c_score
            loss.backward()

            run[0] += 1
            # METRIC: Optionally print loss at each step (can be verbose)
            # if run[0] % 50 == 0:
            #     print(f"run {run[0]}:")
            #     print(f'Style Loss : {s_score.item():4f} Content Loss: {c_score.item():4f}')
            #     print()
            return loss

        optimizer.step(closure)

    # METRIC: Final clamp after optimization
    with torch.no_grad():
        input_img_tensor.clamp_(0, 1)
    
    # METRIC: Get final losses
    final_style_loss = sum(sl.loss.item() for sl in style_losses) * style_weight
    final_content_loss = sum(cl.loss.item() for cl in content_losses) * content_weight
    
    return input_img_tensor.detach(), final_style_loss, final_content_loss


style transfer main og

In [32]:
def run_style_transfer(cnn, norm_mean, norm_std, content_img, style_img, input_img, num_steps=200, style_weight=1e6, content_weight=1):
    model, style_losses, content_losses = get_style_model_and_losses(
        cnn, norm_mean, norm_std, style_img, content_img)

    optimizer = optim.LBFGS([input_img.requires_grad_()])

    run = [0]
    while run[0] <= num_steps:

        def closure():
            optimizer.zero_grad()
            model(input_img)
            style_score = sum(sl.loss for sl in style_losses)
            content_score = sum(cl.loss for cl in content_losses)

            loss = style_score * style_weight + content_score * content_weight
            loss.backward()

            run[0] += 1
            return loss

        optimizer.step(closure)

    return input_img.detach()


Execution alt

In [41]:
style_path = "Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg" # Make sure this image is in the same directory or provide the full path
style_image = image_loader(style_path)

cap = cv2.VideoCapture(0) # 0 for default webcam
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

print("Capturing and stylizing webcam. Press 'q' to exit.")

# METRIC: FPS calculation variables
frame_count = 0
start_time = time.time()
display_fps = 0

# METRIC: Reduce num_steps for faster real-time processing, increase for better quality
# For webcam, very few steps are feasible for real-time.
# Even 10 steps can be slow depending on CPU/GPU and image size.
# Start with a very low number like 5-10 for webcam, and increase if performance allows.
# The original paper uses 300-500 steps for still images.
webcam_num_steps = 200 # Significantly reduced for webcam

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to capture frame from webcam.")
        break

    # Resize frame to match `imsize` before converting to tensor
    # This ensures consistency with how style_image is loaded if its original aspect ratio differs
    resized_frame_for_tensor = cv2.resize(frame, (imsize, imsize))
    
    # Convert BGR frame (OpenCV default) to RGB for PIL and PyTorch
    content_image_pil = Image.fromarray(cv2.cvtColor(resized_frame_for_tensor, cv2.COLOR_BGR2RGB))
    content_image = loader(content_image_pil).unsqueeze(0).to(device)

    # The input_image can be initialized from content_image or white noise
    input_image = content_image.clone()
    # Alternatively, for a different starting point:
    # input_image = torch.randn(content_image.data.size(), device=device)


    # METRIC: Record time for one frame's style transfer
    loop_start_time = time.time()

    output_tensor, final_s_loss, final_c_loss = run_style_transfer(
        cnn, normalization_mean, normalization_std,
        content_image, style_image, input_image,
        num_steps=webcam_num_steps # Use reduced steps for webcam
    )
    
    loop_time = time.time() - loop_start_time

    styled_frame_cv2 = tensor_to_cv2(output_tensor)

    # METRIC: Calculate overall FPS
    frame_count += 1
    elapsed_time = time.time() - start_time
    if elapsed_time > 1: # Update FPS display every second
        display_fps = frame_count / elapsed_time
        frame_count = 0
        start_time = time.time()

    # METRIC: Display FPS and other metrics on the frame
    cv2.putText(styled_frame_cv2, f"FPS: {display_fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    cv2.putText(styled_frame_cv2, f"Style Loss: {final_s_loss:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
    cv2.putText(styled_frame_cv2, f"Content Loss: {final_c_loss:.2f}", (10, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
    cv2.putText(styled_frame_cv2, f"Loop Time: {loop_time:.2f}s", (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
    cv2.putText(styled_frame_cv2, f"Steps: {webcam_num_steps}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)


    # METRIC: PSNR and SSIM (require scikit-image and careful consideration of what to compare)
    # Example: Compare original content *before* style transfer (resized) with the stylized output
    # Ensure both images are in the same format (e.g., uint8, grayscale or RGB) and shape.
    original_content_for_metric = cv2.cvtColor(resized_frame_for_tensor, cv2.COLOR_BGR2RGB) # Use the resized input
    stylized_for_metric = cv2.cvtColor(styled_frame_cv2, cv2.COLOR_BGR2RGB) # Convert stylized output back to RGB
    
    if 'ssim' in globals() and 'psnr' in globals(): # Check if scikit-image functions were imported
        try:
            # Ensure images are uint8 for skimage metrics if they are not already
            original_content_for_metric_uint8 = (original_content_for_metric).astype(np.uint8)
            stylized_for_metric_uint8 = (styled_frame_cv2).astype(np.uint8) # styled_frame_cv2 is already BGR uint8
            
            # For SSIM, often better on grayscale, or specify multichannel=True for RGB
            current_ssim = ssim(original_content_for_metric_uint8, stylized_for_metric_uint8, multichannel=True, data_range=255)
            current_psnr = psnr(original_content_for_metric_uint8, stylized_for_metric_uint8, data_range=255)
            
            cv2.putText(styled_frame_cv2, f"SSIM: {current_ssim:.2f}", (10, 140), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
            cv2.putText(styled_frame_cv2, f"PSNR: {current_psnr:.2f}", (10, 160), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
        except Exception as e:
            # print(f"Metric calculation error: {e}") # Can be noisy
            pass


    cv2.imshow("Neural Style Transfer - Webcam", styled_frame_cv2)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Capturing and stylizing webcam. Press 'q' to exit.


exectuion orginal

In [34]:
style_path = "Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg"
style_image = image_loader(style_path)

cap = cv2.VideoCapture(0)
print("Capturing and stylizing webcam. Press 'q' to exit.")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.resize(frame, (imsize, imsize))
    content_image = loader(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))).unsqueeze(0).to(device)

    input_image = content_image.clone()

    output = run_style_transfer(
        cnn, normalization_mean, normalization_std,
        content_image, style_image, input_image,
        num_steps=200  # Reduce this for speed
    )

    styled_frame = tensor_to_cv2(output)

    cv2.imshow("Neural Style Transfer", styled_frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Capturing and stylizing webcam. Press 'q' to exit.


main loop logging part

In [57]:
style_path = "Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg" 
style_image = image_loader(style_path)

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

print("Capturing and stylizing webcam. Press 'q' to exit.")

frame_count = 0
start_time = time.time()
display_fps = 0.0 # Initialize as float

# === CONFIGURATION FOR CURRENT EXPERIMENT RUN START ===
# USER UPDATE: Modify these parameters for each experimental run
webcam_num_steps = 200              
# imsize is defined globally earlier (e.g., Cell In[3]), make sure it's what you want for this run.

CONFIG_STYLE_WEIGHT = 1e6
CONFIG_CONTENT_WEIGHT = 1
# USER UPDATE: Ensure this variable is defined for your experiment.
CONFIG_LOG_EVERY_K_STEPS = 5 # Example: Log convergence every 5 steps for webcam 

current_config_name = f"webcam_steps{webcam_num_steps}_imsize{imsize}_styleW{CONFIG_STYLE_WEIGHT:.0e}_contentW{CONFIG_CONTENT_WEIGHT}_style_{style_path.split('/')[-1].split('.')[0]}"
print(f"Current Configuration: {current_config_name}")
# === CONFIGURATION FOR CURRENT EXPERIMENT RUN END ===


while True:
    loop_start_time = time.time() 
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to capture frame from webcam.")
        break

    resized_frame_for_tensor = cv2.resize(frame, (imsize, imsize))
    content_image_pil = Image.fromarray(cv2.cvtColor(resized_frame_for_tensor, cv2.COLOR_BGR2RGB))
    content_image = loader(content_image_pil).unsqueeze(0).to(device)
    input_image = content_image.clone()

    # Unpack all four return values
    output_tensor, final_s_loss, final_c_loss, convergence_log_for_frame = run_style_transfer(
        cnn, normalization_mean, normalization_std,
        content_image, style_image, input_image,
        num_steps=webcam_num_steps,
        style_weight=CONFIG_STYLE_WEIGHT,   
        content_weight=CONFIG_CONTENT_WEIGHT,
        log_every_k_steps=CONFIG_LOG_EVERY_K_STEPS # Pass the defined config variable
    )
    
    current_loop_time = time.time() - loop_start_time

    styled_frame_cv2 = tensor_to_cv2(output_tensor)

    frame_count += 1
    elapsed_time = time.time() - start_time
    if elapsed_time >= 1.0: 
        current_display_fps = frame_count / elapsed_time
        if current_display_fps > 0: 
            display_fps = current_display_fps 
        if current_display_fps > 0: 
             frame_count = 0
             start_time = time.time()


    cv2.putText(styled_frame_cv2, f"FPS: {display_fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    cv2.putText(styled_frame_cv2, f"Style Loss: {final_s_loss:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
    cv2.putText(styled_frame_cv2, f"Content Loss: {final_c_loss:.2f}", (10, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
    cv2.putText(styled_frame_cv2, f"Loop Time: {current_loop_time:.2f}s", (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
    cv2.putText(styled_frame_cv2, f"Steps: {webcam_num_steps}", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

    # === LOGGING CODE - Log data for the current frame START ===
    log_entry = {
        'config_name': current_config_name,
        'timestamp': time.time(), 
        'num_steps_config': webcam_num_steps,
        'imsize_config': imsize,
        'style_weight_config': CONFIG_STYLE_WEIGHT,
        'content_weight_config': CONFIG_CONTENT_WEIGHT,
        'style_image_path': style_path,
        'loop_time_s': current_loop_time,
        'fps_calculated': display_fps, 
        'final_weighted_style_loss': final_s_loss,
        'final_weighted_content_loss': final_c_loss,
        # USER UPDATE: Decide how to log 'convergence_log_for_frame'. 
        # For CSV, you might log its length or convert to JSON string if small.
        'convergence_detail_steps': len(convergence_log_for_frame), 
        # 'convergence_log_json': pd.Series(convergence_log_for_frame).to_json(orient='records') # Example
    }
    experimental_data_log.append(log_entry)
    # === LOGGING CODE - Log data for the current frame END ===

    cv2.imshow("Neural Style Transfer - Webcam", styled_frame_cv2)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# === LOGGING CODE - Save all logged data to CSV START ===
output_csv_filename = 'style_transfer_experiment2_log.csv'
df_results = pd.DataFrame(experimental_data_log)
try:
    df_results.to_csv(output_csv_filename, index=False)
    print(f"Experimental data saved to {output_csv_filename}")
except Exception as e:
    print(f"Error saving experimental data to {output_csv_filename}: {e}")

Capturing and stylizing webcam. Press 'q' to exit.
Current Configuration: webcam_steps200_imsize256_styleW1e+06_contentW1_style_Van_Gogh_-_Starry_Night_-_Google_Art_Project
Experimental data saved to style_transfer_experiment2_log.csv
