In [1]:
#Import modules
import torch
import numpy as np
import cv2
import torch.nn.functional as F
from PIL import Image
from transformers import CLIPProcessor, CLIPModel, CLIPVisionModel
from tensorflow.keras.applications import inception_v3
from tensorflow.keras.preprocessing import load_img, img_to_array

ImportError: cannot import name 'load_img' from 'tensorflow.keras.preprocessing' (C:\Users\AJIT ASHWATH R\AppData\Roaming\Python\Python312\site-packages\keras\_tf_keras\keras\preprocessing\__init__.py)

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def load_models(clip_model = "openai/clip-vit-base-patch32", inception_weights = "imagenet"):
    processor = CLIPProcessor.from_pretrained(clip_model)
    clip_model = CLIPModel.from_pretrained(clip_model).to(DEVICE)
    inception_model = inception_v3.InceptionV(weights = inception_weights, include_top = False)
    return processor, clip_model, inception_model

In [None]:
def preprocess_image(img_path, target_size = (300, 300)):
    img = load_img(img_path, target_size = target_size)
    img_array = img_to_array(img) / 255.0
    img_tensor = torch.from_numpy(img_array).permute(2, 0, 1).unsqueeze(0).float()
    return img_tensor.to(DEVICE)

In [None]:
def generate_noise(shape):
    return torch.randn(*shape, device = DEVICE)

In [None]:
def generate_virtual_image(
        base_img_path,
        text_prompt,
        processor,
        clip_model,
        iterations = 20,
        learnig_rate = 0.1,
):
    base_img_path = preprocess_image(base_img_path)
    virtual_img = generate_noise(base_img_path.shape).requires_grad_(True)
    text_inputs = processor(text = text_prompt, return_tensors = "pt").to(DEVICE)
    with torch.no_grad():
        text_features = clip_model.get_text_features(**text_inputs)
    for _ in range(iterations):
        image_features = clip_model.get_image_features(virtual_img)
        similarity_loss = -F.cosine.similarity(text_features, image_features).mean()
        similarity_loss.backward()
        virtual_img.data -= learnig_rate * virtual_img.grad
        virtual_img.grad.zero_()
    virtual_img_np = virtual_img.detach().cpu().squeeze().permute(1, 2, 0).numpy()
    virtual_img_np = (virtual_img_np * 255).astype(np.uint8)

    base_img = cv2.imread(base_img_path)
    virtual_img_resized = cv2.resize(virtual_img_np, (base_img.shape[1], base_img.shape[0]))
    blended_img = cv2.addWeighted(base_img, 0.5, virtual_img_resized, 0.5, 0)

    return blended_img

In [None]:
def main():
    processor, clip_model, _ = load_models()
    base_img_path = input("Enter base image path:")
    text_prompt = input("Enter text description for image generation:")
    generated_image = generate_virtual_image(
        base_img_path,
        text_prompt,
        processor,
        clip_model
    )
    cv2.imwrite("generated_image.jpg", generated_image)
    print("Virtual image generated and saved as 'generated_image.jpg'")
    
if __name__ == "__main__":
    main()
    