# Neural Style Transfer Web Application

This notebook creates a simple web interface for neural style transfer using Gradio. Users can:
1. Upload a content image
2. Upload a style image
3. Select style intensity with a slider
4. Generate a stylized image

In [7]:
# Import required libraries
import os
import time
import torch
import numpy as np
import gradio as gr
from PIL import Image
import torchvision.transforms as transforms
from torchvision.utils import save_image
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [8]:
# Import model components from model.ipynb

# Custom VGG model for style transfer
class VGGStyleTransfer(nn.Module):
    def __init__(self, fine_tune=False):
        super(VGGStyleTransfer, self).__init__()
        vgg = models.vgg19(weights='DEFAULT').features.eval().to(device)
        
        # Use multiple content layers for better structural representation
        self.content_layers = ['conv4_2', 'conv5_2']
        self.style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
        
        self.layer_names = {
            'conv1_1': 0, 'conv1_2': 2,
            'conv2_1': 5, 'conv2_2': 7,
            'conv3_1': 10, 'conv3_2': 12, 'conv3_3': 14, 'conv3_4': 16,
            'conv4_1': 19, 'conv4_2': 21, 'conv4_3': 23, 'conv4_4': 25,
            'conv5_1': 28, 'conv5_2': 30, 'conv5_3': 32, 'conv5_4': 34
        }
        
        max_index = max(self.layer_names.values())
        self.model = nn.Sequential()
        for i, layer in enumerate(vgg):
            if i <= max_index:
                self.model.add_module(str(i), layer)
        
        # Fine-tune specific layers
        if fine_tune:
            for name, param in self.model.named_parameters():
                if '19' in name or '21' in name:  # conv4_1, conv4_2
                    param.requires_grad = True
                else:
                    param.requires_grad = False
        else:
            for param in self.model.parameters():
                param.requires_grad = False
    
    def forward(self, x):
        content_features = {}
        style_features = {}
        for name, layer in self.model.named_children():
            x = layer(x)
            layer_idx = int(name)
            for layer_name, idx in self.layer_names.items():
                if idx == layer_idx:
                    if layer_name in self.content_layers:
                        content_features[layer_name] = x.clone()
                    if layer_name in self.style_layers:
                        style_features[layer_name] = x.clone()
        return content_features, style_features

# Improved Gram matrix calculation
def gram_matrix(feature):
    batch_size, channels, height, width = feature.size()
    # Normalize features to prevent extreme values
    feature = feature / (feature.std() + 1e-8)
    feature = feature.view(batch_size * channels, height * width)
    gram = torch.mm(feature, feature.t())
    # Normalize by size for consistent weighting across layers
    return gram.div(batch_size * channels * height * width)

# Enhanced loss functions
class StyleTransferLoss:
    def __init__(self, content_layers, style_layers):
        self.content_layers = content_layers
        self.style_layers = style_layers
        # Weight different layers differently
        self.style_weights = {
            'conv1_1': 1.0,
            'conv2_1': 0.8,
            'conv3_1': 0.5,
            'conv4_1': 0.3, 
            'conv5_1': 0.1
        }
        self.content_weights = {
            'conv4_2': 1.0,
            'conv5_2': 0.5
        }
    
    def compute_loss(self, content_features, style_features, generated_features, threshold):
        content_loss = 0
        for layer in self.content_layers:
            weight = self.content_weights.get(layer, 1.0)
            content_loss += weight * nn.functional.mse_loss(
                generated_features[0][layer], content_features[layer]
            )
        
        style_loss = 0
        for layer in self.style_layers:
            weight = self.style_weights.get(layer, 1.0)
            gen_gram = gram_matrix(generated_features[1][layer])
            style_gram = gram_matrix(style_features[layer])
            style_loss += weight * nn.functional.mse_loss(gen_gram, style_gram)
        
        # Improved dynamic weighting for better style vs content balance
        # Use cubic function for smoother transition
        content_weight = max(0.2, (1.0 - threshold) ** 3)
        style_weight = max(0.2, threshold ** 2)
        
        # Adjust scales based on threshold for better balance
        content_scale = 1e5 if threshold < 0.5 else 5e4
        style_scale = 1e10 if threshold > 0.5 else 5e9
        
        content_loss *= content_weight * content_scale
        style_loss *= style_weight * style_scale
        
        # Enhanced total variation loss with better weighting
        tv_weight = 1e2 * (1.0 - threshold * 0.5)  # Reduce TV loss as style intensity increases
        tv_loss = (
            torch.sum(torch.abs(generated_features[2][:, :, :, :-1] - generated_features[2][:, :, :, 1:])) +
            torch.sum(torch.abs(generated_features[2][:, :, :-1, :] - generated_features[2][:, :, 1:, :]))
        ) * tv_weight
        
        # L2 regularization - reduced for better creativity
        l2_reg = torch.norm(generated_features[2], p=2) * 5e-4
        
        total_loss = content_loss + style_loss + tv_loss + l2_reg
        return total_loss, content_loss, style_loss, tv_loss

In [9]:
# Image transformation functions
def get_transform(img_size=512):
    """Define image transformations for preprocessing"""
    transform = transforms.Compose([
        transforms.Resize(img_size),
        transforms.CenterCrop((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    return transform

def preprocess_image(img, img_size=512, preserve_aspect=True):
    """Preprocess an image with optional aspect ratio preservation"""
    if preserve_aspect:
        # Calculate the new dimensions while preserving aspect ratio
        width, height = img.size
        if width > height:
            new_width = img_size
            new_height = int(height * img_size / width)
        else:
            new_height = img_size
            new_width = int(width * img_size / height)
        img = transforms.Resize((new_height, new_width))(img)
        # Center pad to square
        result = Image.new(img.mode, (img_size, img_size), (0, 0, 0))
        offset_x = (img_size - new_width) // 2
        offset_y = (img_size - new_height) // 2
        result.paste(img, (offset_x, offset_y))
        img = result
    
    # Apply standard transformations
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    return transform(img).unsqueeze(0).to(device)

def denormalize(tensor):
    """Convert tensor to numpy image for display"""
    mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device)
    std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device)
    img = tensor * std + mean
    img = img.clamp(0, 1).squeeze(0).detach().cpu().permute(1, 2, 0).numpy()
    return (img * 255).astype(np.uint8)

# Initialize the model (do it once to save time)
model = VGGStyleTransfer(fine_tune=False).to(device)
model.eval()  # Set to evaluation mode

# Create output directory
os.makedirs("output", exist_ok=True)

In [10]:
# Main style transfer function for the web interface
def style_transfer(content_img, style_img, style_intensity=0.5, num_steps=100, img_size=512, progress=gr.Progress()):
    """
    Apply style transfer to the content image using the style image.
    
    Args:
        content_img: Content image (PIL Image)
        style_img: Style image (PIL Image)
        style_intensity: Style intensity (0.0 to 1.0)
        num_steps: Number of optimization steps
        img_size: Size of the output image
        progress: Gradio progress bar
    
    Returns:
        Stylized image (numpy array)
    """
    if content_img is None or style_img is None:
        raise ValueError("Both content and style images must be provided")
    
    # Prepare images with enhanced preprocessing
    content_tensor = preprocess_image(content_img, img_size=img_size, preserve_aspect=True)
    style_tensor = preprocess_image(style_img, img_size=img_size, preserve_aspect=True)
    
    # Initialize the generated image with content image + small noise for better convergence
    generated_img = content_tensor.clone()
    noise = torch.randn_like(generated_img) * 0.03
    generated_img = generated_img + noise
    generated_img = torch.clamp(generated_img, 0, 1).requires_grad_(True)
    
    # Use advanced optimizer settings
    optimizer = optim.LBFGS([generated_img], lr=0.1, max_iter=1)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)
    loss_criterion = StyleTransferLoss(model.content_layers, model.style_layers)
    
    # Extract content and style features
    content_features, _ = model(content_tensor)
    _, style_features = model(style_tensor)
    
    # Track loss progress for early stopping
    best_loss = float('inf')
    no_improvement_count = 0
    
    # Style transfer optimization loop
    progress(0, desc="Starting style transfer...")
    
    for step in range(num_steps):
        def closure():
            optimizer.zero_grad()
            
            # Ensure pixel values are in valid range
            with torch.no_grad():
                generated_img.clamp_(0, 1)
            
            # Get features of generated image
            gen_content_features, gen_style_features = model(generated_img)
            gen_features = (gen_content_features, gen_style_features, generated_img)
            
            # Compute loss with enhanced weighting
            total_loss, content_loss, style_loss, tv_loss = loss_criterion.compute_loss(
                content_features, style_features, gen_features, style_intensity
            )
            
            # Calculate loss stats for logging
            stats = {
                "total": total_loss.item(),
                "content": content_loss.item(),
                "style": style_loss.item(),
                "tv": tv_loss.item()
            }
            
            # Backpropagate
            total_loss.backward()
            
            # Store stats for return
            closure.stats = stats
            closure.loss = total_loss.item()
            return total_loss
        
        # Run optimization step
        optimizer.step(closure)
        current_loss = closure.loss
        
        # Update learning rate
        if step % 20 == 0 and step > 0:
            scheduler.step()
        
        # Early stopping check
        if current_loss < best_loss:
            best_loss = current_loss
            no_improvement_count = 0
            # Save the best image state
            with torch.no_grad():
                best_img = generated_img.clone()
        else:
            no_improvement_count += 1
        
        # Early stopping if no improvement after 30 steps
        if no_improvement_count > 30 and step > 50:
            print("Early stopping due to no improvement")
            generated_img.data = best_img.data
            break
        
        # Update progress
        progress((step + 1) / num_steps, 
                desc=f"Step {step+1}/{num_steps}: Loss = {closure.stats['total']:.2f} " +
                      f"(Content: {closure.stats['content']:.2f}, Style: {closure.stats['style']:.2f})")
    
    # Ensure final image is in valid range and use the best result
    with torch.no_grad():
        generated_img.clamp_(0, 1)
    
    # Convert to numpy for display
    stylized_img = denormalize(generated_img)
    progress(1.0, desc="Style transfer complete!")
    
    # Save the result
    timestamp = int(time.time())
    output_path = os.path.join("output", f"stylized_{timestamp}.jpg")
    Image.fromarray(stylized_img).save(output_path)
    
    return stylized_img

In [11]:
# Create the Gradio interface
def create_interface():
    with gr.Blocks(title="Neural Style Transfer") as interface:
        gr.Markdown("""
        # Enhanced Neural Style Transfer App
        
        Upload a content image and a style image, then adjust the style intensity to create your stylized image!
        
        - Style Intensity: 0.0 = mostly content, 1.0 = mostly style
        - Steps: More steps give better results but take longer (200-500 recommended for best results)
        - Image Size: Larger sizes preserve more details but take longer to process
        """)
        
        with gr.Row():
            with gr.Column(scale=1):
                content_input = gr.Image(label="Content Image", type="pil")
                style_input = gr.Image(label="Style Image", type="pil")
            
                with gr.Row():
                    style_intensity = gr.Slider(
                        minimum=0.0, maximum=1.0, value=0.7, step=0.05,
                        label="Style Intensity"
                    )
                
                with gr.Row():
                    steps = gr.Slider(
                        minimum=50, maximum=500, value=200, step=50,
                        label="Steps (Iterations)"
                    )
                    
                    img_size = gr.Slider(
                        minimum=256, maximum=1024, value=512, step=64,
                        label="Image Size (pixels)"
                    )
                
                with gr.Row():
                    examples = gr.Examples(
                        examples=[
                            ["Content/test/0016.jpg", "style/test/1.jpg"],
                            ["Content/test/0771.jpg", "style/test/2.jpg"]
                        ],
                        inputs=[content_input, style_input],
                        label="Example Images"
                    )
                
                transfer_btn = gr.Button("Generate Stylized Image", variant="primary")
            
            with gr.Column(scale=1):
                output_image = gr.Image(label="Stylized Image")
                info_text = gr.Markdown("""
                ### Tips for better results:
                
                1. For **detailed paintings**, use style intensity 0.6-0.8 and at least 200 steps
                2. For **abstract patterns**, use style intensity 0.8-1.0
                3. For **subtle effects**, use style intensity 0.3-0.5
                4. **Higher resolution** images (512-1024px) produce better details but take longer
                5. If the result looks poor, try a different style image or adjust the intensity
                """)
        
        transfer_btn.click(
            fn=style_transfer,
            inputs=[content_input, style_input, style_intensity, steps, img_size],
            outputs=output_image
        )
    
    return interface

# Launch the interface
demo = create_interface()

In [12]:
# Launch the interface with sharing enabled
demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://d68668b77a534787c1.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
* Running on public URL: https://d68668b77a534787c1.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




## Instructions for Using the Web App

1. **Upload a Content Image**: This is the base image that will be stylized.
2. **Upload a Style Image**: This image provides the artistic style to apply.
3. **Adjust the Style Intensity**: Lower values preserve more of the content image details, while higher values apply more of the style image's artistic elements.
4. **Set the Number of Steps**: More steps typically yield better results but increase processing time. 200-300 steps is recommended for quality results.
5. **Select Image Size**: Higher resolutions (512-1024px) preserve more details but increase processing time.
6. **Click "Generate Stylized Image"**: Wait for the process to complete (progress will be shown).

## Technical Notes

- The app uses a VGG19 model pre-trained on ImageNet for feature extraction.
- Style transfer is performed through an optimization process with an advanced loss function that balances content preservation and style adaptation.
- Content representation now uses both `conv4_2` and `conv5_2` layers for better structural detail.
- Style representation uses layers `conv1_1`, `conv2_1`, `conv3_1`, `conv4_1`, `conv5_1` with decreasing weights for multi-scale style features.
- The optimization uses the L-BFGS optimizer with dynamic learning rate scheduling for better convergence.
- Total variation loss is applied to ensure spatial smoothness in the generated image.
- Early stopping is implemented to prevent over-optimization once quality plateaus.
- Images are preprocessed with proper aspect ratio preservation to prevent distortion.

## Troubleshooting Poor Results

- **Problem**: Stylized image looks too much like original content
  - **Solution**: Increase style intensity (0.7-0.9) and use more steps

- **Problem**: Style details don't appear clearly
  - **Solution**: Try a higher image resolution (512-1024) and more steps (300-500)

- **Problem**: Output appears blocky or distorted
  - **Solution**: Ensure your style image has clear, distinctive features and isn't too small

- **Problem**: Colors look washed out
  - **Solution**: Style images with vibrant colors work best; try a different style image