In [1]:
import torch
import torch.optim as optim
from torchvision import transforms, models
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import numpy as np

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Using device:', device)

Using device: cpu


In [3]:
normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device)
normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device)
normalization = transforms.Normalize(mean=normalization_mean, std=normalization_std)

In [4]:
def load_image(image_path, transform=None, max_size=None, shape=None):
    image = Image.open(image_path).convert('RGB')
    if max_size:
        size = max(image.size)
        if size > max_size:
            size = max_size
            image = image.resize((size, size), Image.ANTIALIAS)
    if shape:
        image = image.resize(shape, Image.LANCZOS)
    if transform:
        image = transform(image).unsqueeze(0)
    return image.to(device)

In [13]:
def generate_mask(image_path):
    # Load the image using OpenCV
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    if image is None:
        raise ValueError(f"Unable to load image from {image_path}")

    # Apply thresholding to create a binary mask
    _, mask = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)

    # Convert mask to RGB format
    mask_rgb = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB)

    return mask_rgb

In [14]:
def apply_mask(content_image, mask):
    return cv2.bitwise_and(content_image, mask)

In [15]:
content_image_path = '/content/tshirt-content.jpg'
style_image_path = '/content/tshirt-style-blue.jpg'
mask_image_path = '/content/masked.jpg'

In [16]:
import torchvision.transforms as transforms

# Define image transformations
transform = transforms.Compose([
    transforms.Resize(256),  # Resize the image to 256x256 pixels
    transforms.CenterCrop(224),  # Crop the center 224x224 pixels
    transforms.ToTensor(),  # Convert the image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the image
])

In [17]:
import os
print(os.listdir())

['.config', 'tshirt-style-blue.jpg', 'tshirt-content.jpg', 'sample_data']


In [37]:
content_image = load_image(content_image_path, transform=transform, max_size=512)
style_image = load_image(style_image_path, transform=transform, shape=[content_image.size(2), content_image.size(3)])
mask = generate_mask(content_image_path)
masked_content_image = apply_mask(content_image_np, resized_mask)
cv2.imwrite('/content/masked.jpg', masked_content_image)

  image = image.resize((size, size), Image.ANTIALIAS)


True

In [35]:
print(type(content_image_np), content_image_np.shape, content_image_np.dtype)
print(type(resized_mask), resized_mask.shape, resized_mask.dtype)

<class 'numpy.ndarray'> (224, 224, 3) uint8
<class 'numpy.ndarray'> (224, 224, 3) uint8


In [32]:
assert content_image_np.shape == mask.shape, "Dimensions of content_image_np and mask must match"

AssertionError: Dimensions of content_image_np and mask must match

In [34]:
content_image_np = content_image.detach().cpu().numpy()
content_image_np = np.transpose(content_image_np.squeeze(), (1, 2, 0))  # Assuming NHWC format

content_image_np = (content_image_np * 255).astype(np.uint8)

resized_mask = cv2.resize(mask, (224, 224))

In [38]:
vgg = models.vgg19(pretrained=True).features
for param in vgg.parameters():
    param.requires_grad = False
vgg.to(device)

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth
100%|██████████| 548M/548M [00:12<00:00, 45.0MB/s]


Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (17): ReLU(inplace=True)
  (18): MaxPoo

In [39]:
class ContentLoss(torch.nn.Module):
    def __init__(self, target):
        super(ContentLoss, self).__init__()
        self.target = target.detach()

    def forward(self, input):
        self.loss = torch.nn.functional.mse_loss(input, self.target)
        return input

In [40]:
class StyleLoss(torch.nn.Module):
    def __init__(self, target_feature):
        super(StyleLoss, self).__init__()
        self.target = self.gram_matrix(target_feature).detach()

    def forward(self, input):
        G = self.gram_matrix(input)
        self.loss = torch.nn.functional.mse_loss(G, self.target)
        return input

    def gram_matrix(self, input):
        a, b, c, d = input.size()
        features = input.view(a * b, c * d)
        G = torch.mm(features, features.t())
        return G.div(a * b * c * d)

In [41]:
def get_style_model_and_losses(cnn, normalization_mean, normalization_std,
                               content_img, style_img, content_layers, style_layers):
    normalization = Normalization(normalization_mean, normalization_std).to(device)
    content_losses = []
    style_losses = []
    model = torch.nn.Sequential(normalization)

    i = 0
    for layer in cnn.children():
        if isinstance(layer, torch.nn.Conv2d):
            i += 1
            name = 'conv_{}'.format(i)
        elif isinstance(layer, torch.nn.ReLU):
            name = 'relu_{}'.format(i)
            layer = torch.nn.ReLU(inplace=False)
        elif isinstance(layer, torch.nn.MaxPool2d):
            name = 'pool_{}'.format(i)
        elif isinstance(layer, torch.nn.BatchNorm2d):
            name = 'bn_{}'.format(i)
        else:
            raise RuntimeError('Unrecognized layer: {}'.format(layer.__class__.__name__))

        model.add_module(name, layer)

        if name in content_layers:
            target = model(content_img).detach()
            content_loss = ContentLoss(target)
            model.add_module("content_loss_{}".format(i), content_loss)
            content_losses.append(content_loss)

        if name in style_layers:
            target_feature = model(style_img).detach()
            style_loss = StyleLoss(target_feature)
            model.add_module("style_loss_{}".format(i), style_loss)
            style_losses.append(style_loss)

    for i in range(len(model) - 1, -1, -1):
        if isinstance(model[i], ContentLoss) or isinstance(model[i], StyleLoss):
            break

    model = model[:(i + 1)]
    return model, content_losses, style_losses

In [42]:
class Normalization(torch.nn.Module):
    def __init__(self, mean, std):
        super(Normalization, self).__init__()
        self.mean = mean.clone().detach().view(-1, 1, 1)
        self.std = std.clone().detach().view(-1, 1, 1)

    def forward(self, img):
        return (img - self.mean) / self.std

In [43]:
content_img = content_image
style_img = style_image

content_layers = ['conv_4']
style_layers = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']

input_img = content_img.clone()
optimizer = optim.LBFGS([input_img.requires_grad_()])

model, content_losses, style_losses = get_style_model_and_losses(vgg, normalization_mean, normalization_std,
                                                                 content_img, style_img, content_layers, style_layers)

In [44]:
# Style intensity control
style_weight = 1e6  # Adjust this dynamically
content_weight = 1

In [45]:

run = [0]
while run[0] <= 300:
    def closure():
        input_img.data.clamp_(0, 1)
        optimizer.zero_grad()
        model(input_img)
        style_score = 0
        content_score = 0
        for sl in style_losses:
            style_score += sl.loss
        for cl in content_losses:
            content_score += cl.loss

        loss = style_score * style_weight + content_score * content_weight
        loss.backward()
        run[0] += 1
        return style_score + content_score

    optimizer.step(closure)

input_img.data.clamp_(0, 1)

tensor([[[[1., 1., 0.,  ..., 0., 0., 1.],
          [0., 0., 0.,  ..., 0., 0., 1.],
          [1., 0., 0.,  ..., 0., 0., 1.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [1., 0., 0.,  ..., 0., 0., 0.]],

         [[0., 1., 1.,  ..., 1., 1., 1.],
          [0., 0., 1.,  ..., 0., 1., 0.],
          [1., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 1., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         [[0., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          ...,
          [1., 1., 1.,  ..., 1., 1., 1.],
          [0., 1., 1.,  ..., 1., 1., 1.],
          [0., 0., 0.,  ..., 1., 1., 1.]]]])

In [46]:
output = input_img.cpu().clone().squeeze(0)
output = transforms.ToPILImage()(output)
output.save('output_image.jpg')