**AI photo editing**

In [None]:
from PIL import Image
import requests
from transformers import SamModel, SamProcessor
from diffusers import DiffusionPipeline, AutoPipelineForText2Image, AutoPipelineForInpainting
from diffusers.utils import load_image, make_image_grid

import torch
import numpy as np

In [None]:
# Load the SAM model as we have seen in the class

Device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(Device)
model = SamModel.from_pretrained("facebook/sam-vit-base").to(Device)

processor = SamProcessor.from_pretrained("facebook/sam-vit-base", torch_dtype=torch.float16)

In [None]:
# Get the mask
def mask_to_rgb(mask):
    """
    Transforms a binary mask into an RGBA image for visualization
    """

    bg_transparent = np.zeros(mask.shape + (4, ), dtype=np.uint8)

    # Color the area we will replace in green
    # (this vector is [Red, Green, Blue, Alpha])
    bg_transparent[mask == 1] = [0, 255, 0, 127]

    return bg_transparent


def get_processed_inputs(image, input_points):

    # Use the processor to generate the right inputs for SAM
    inputs = processor(images=image, input_points=input_points, return_tensors="pt").to(Device)

    # Call SAM
    outputs = model(**inputs)

    # Now let's post process the outputs of SAM to obtain the masks
    masks = processor.image_processor.post_process_masks(
       outputs.pred_masks.cpu(),
       inputs["original_sizes"].cpu(),
       inputs["reshaped_input_sizes"].cpu()
    )

    # Here we select the mask with the highest score
    # as the mask we will use. You can experiment with also
    # other selection criteria, for example the largest mask
    # instead of the most confident mask
    best_mask = masks[0][0][outputs.iou_scores.argmax()]

    # NOTE: we invert the mask by using the ~ operator because
    # so that the subject pixels will have a value of 0 and the
    # background pixels a value of 1. This will make it more convenient
    # to infill the background
    return ~best_mask.cpu().numpy()

In [None]:
raw_image = Image.open("car.png").convert("RGB").resize((512, 512))
# These are the coordinates of two points on the car
input_points = [[[150, 170], [300, 250]]]
mask = get_processed_inputs(raw_image, input_points)
Image.fromarray(mask_to_rgb(mask)).resize((128, 128))

Inpainting

In [None]:
pipeline = AutoPipelineForInpainting.from_pretrained(
    'diffusers/stable-diffusion-xl-1.0-inpainting-0.1',
    torch_dtype=torch.float16
)

pipeline.enable_model_cpu_offload()

In [None]:
def inpaint(raw_image, input_mask, prompt, negative_prompt=None, seed=74294536, cfgs=7):

    mask_image = Image.fromarray(input_mask)

    rand_gen = torch.manual_seed(seed)

    image = pipeline(
        prompt=prompt,
        negative_prompt=negative_prompt,
        image=raw_image,
        mask_image=mask_image,
        generator=rand_gen,
        guidance_scale=cfgs
    ).images[0]

    return image

In [None]:
prompt = "a car driving on Mars. Studio lights, 1970s"
negative_prompt = "artifacts, low quality, distortion"

image = inpaint(raw_image, mask, prompt, negative_prompt)

In [None]:
fig = make_image_grid([raw_image, Image.fromarray(mask_to_rgb(mask)), image.resize((512, 512))], rows=1, cols=3)
fig

In [None]:
# We can use some took to playing with the masks