In [None]:
#Import modules
import torch
import numpy as np
import cv2
import torch.nn.functional as F
from PIL import Image
from transformers import CLIPProcessor, CLIPModel, CLIPVisionModel
from tensorflow.keras.applications import inception_v3
from tensorflow,keras.preprocessing import load_img, img_to_array

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def load_models(clip_model = "openai/clip-vit-base-patch32", inception_weights = "imagenet"):
    processor = CLIPProcessor.from_pretrained(clip_model)
    clip_model = CLIPModel.from_pretrained(clip_model).to(DEVICE)
    inception_model = inception_v3.InceptionV(weights = inception_weights, include_top = False)
    return processor, clip_model, inception_model

In [None]:
def preprocess_image(img_path, target_size = (300, 300)):
    img = load_img(img_path, target_size = target_size)
    img_array = img_to_array(img) / 255.0
    img_tensor = torch.from_numpy(img_array).permute(2, 0, 1).unsqueeze(0).float()
    return img_tensor.to(DEVICE)

In [None]:
#Function - Generate a virtual image
def virtual_image(base_img_path, text_prompt, iterations = 10):
    base_img = preprocess_img(base_img_path)
    virtual_img = generate_noise(base_img.shape)
    inputs = processer(text_prompt, return_tensors = 'pt')
    with torch.no_grad():
        text_features = model(**inputs).pooler_output
    for i in range(iterations):
        virutal_features = inception_model.predict(virtual_img)
        #Calculate the loss function
        loss = np.mean(virtual_features) - np.dot(text_features.squeeze(), virtual_features.mean(axis = (1, 2)))
        #Calculate the gradient descent
        grad_des = np.gradient(loss)[0]
        octa = np.stack([grad_des,(grad_des[:, :, 1:] + grad_des[:, :, :-1]) / 2, grad_des[:, :, :-2] + grad_des[:, :, 1:]])
        virtual_img += octa * 0.1
    #De-processing the virtual image
    virtual_img[:, :, 0] += 1.
    virtual_img[:, :, 1] += 1.
    virtual_img[:, :, 2] += 1.
    virtual_img *= 127.5
    virtual_img = np.clip(virtual_img, 0, 255).astype('uint8')
    #Combining virtual image with the org one
    base_img = cv2.imread(base_img_path)
    virtual_img = cv2.resize(virtual_img, (base_img.shape[1], base_img.shape[0]))
    alpha = 0.5
    blended_img = cv2.addWeighted(base_img, alpha, virtual_img, 1 - alpha, 0)
    return blended_img

In [None]:
#Main
base_img = "" #Path of the file
text_prompt = input("Enter what kind of image you want: ")
virtual = virtual_image(base_img, text_prompt, iterations = 20)
cv2.inwrite("generated_image.jpg", virtual)
#Done