In [None]:
import torch
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from diffusers import DDPMPipeline, DDPMScheduler
from transformers import CLIPModel, CLIPProcessor
import matplotlib.pyplot as plt
import os

os.makedirs("clip_output", exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch32").to(device)
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch32")

print("CLIP model loaded.")

diffusion = DDPMPipeline.from_pretrained("google/ddpm-celebahq-256").to(device)
scheduler = DDPMScheduler.from_config("google/ddpm-celebahq-256")

prompt = "An epic scene of Thor with a futuristic Asgard in the background"

print("Encoding text prompt with CLIP...")
text_inputs = clip_processor(text=[prompt], return_tensors="pt", padding=True).to(device)
text_features = clip_model.get_text_features(**text_inputs)

def preprocess_image(image):
    preprocess = Compose([
        Resize(224),
        CenterCrop(224),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    return preprocess(image).unsqueeze(0).to(device)

def generate_clip_guided_image(prompt, num_inference_steps=50):
    latents = torch.randn((1, 3, 256, 256), device=device)

    for step in range(num_inference_steps):
        latents = latents.detach().requires_grad_()

        with torch.no_grad():
            noise_prediction = diffusion.unet(latents, step).sample
            latents = scheduler.step(model_output=noise_prediction, sample=latents, timestep=step).prev_sample

        with torch.no_grad():
            image = latents.detach().cpu()
            image = (image / 2 + 0.5).clamp(0, 1)
            image = (image * 255).type(torch.uint8)
            image = image.permute(0, 2, 3, 1).numpy()
            image_pil = diffusion.numpy_to_pil(image[0])

        image_tensor = preprocess_image(image_pil)
        image_features = clip_model.get_image_features(image_tensor)
        similarity = torch.cosine_similarity(image_features, text_features).mean()

        similarity.backward(retain_graph=True)
        if latents.grad is not None:
            grad = latents.grad.detach()
            latents = latents.detach() - 0.1 * grad

        if step % 10 == 0:
            img_path = f"clip_output/step_{step}.png"
            image_pil.save(img_path)
            print(f"Step {step} saved to {img_path}")

    final_path = "clip_output/final_image.png"
    image_pil.save(final_path)
    print(f"Final image saved to {final_path}")
    return image_pil

final_image = generate_clip_guided_image(prompt)

plt.imshow(final_image)
plt.axis("off")
plt.title(prompt)
plt.show()


OSError: openai/clip-vit-large-patch32 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [None]:
import requests
from PIL import Image
import matplotlib.pyplot as plt
import os
from openai import OpenAI

api_key = "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"

client = OpenAI(
#  organization='org-eDlIO9iRlWeXkMSzgpKYN3Yk',
#  project='$PROJECT_ID',
  api_key=api_key
)

os.makedirs("dalle_output", exist_ok=True)


prompts = [
    "A futuristic Iron Man suit flying over New York City",
 #   "An Avengers team poster in a cyberpunk universe",
 #   "Thor summoning lightning in a stormy sky",
 #   "A serene view of Wakanda with advanced technology in the background",
 #   "Captain America holding his shield in a desert landscape during sunset",
]

def generate_image(prompt):
    response = client.images.generate(
        model="dall-e-3",
        prompt=prompt,
        size="1024x1024",
        quality="standard",
        n=1,
    )

    image_url = response.data[0].url
    print(image_url)
    image_data = requests.get(image_url).content
    image_path = f"dalle_output/image_1.png"
    with open(image_path,"wb") as f:
        f.write(image_data)



generate_image(prompts[0])

print(f"Images generated and saved to 'dalle_output' folder.")

https://oaidalleapiprodscus.blob.core.windows.net/private/org-eDlIO9iRlWeXkMSzgpKYN3Yk/user-aP0OcouEv5p4Ls9d7w6YAmPY/img-SrLijWMld3z4901rGh9a2r8o.png?st=2024-11-28T04%3A51%3A22Z&se=2024-11-28T06%3A51%3A22Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=d505667d-d6c1-4a0a-bac7-5c84a87759f8&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-11-28T03%3A14%3A34Z&ske=2024-11-29T03%3A14%3A34Z&sks=b&skv=2024-08-04&sig=00ei/GxwUAAmtBVGY833ysxnmgHNkoLLCaQxEjjRjm8%3D
Images generated and saved to 'dalle_output' folder.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.utils import save_image
import matplotlib.pyplot as plt
import os

# Create a folder to save the output images
os.makedirs("vae_output", exist_ok=True)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
batch_size = 64
epochs = 20
learning_rate = 1e-3
latent_dim = 2  # Reduced to 2D for latent space visualization
img_size = 28
channels = 1

# Dataset and DataLoader
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

dataloader = torch.utils.data.DataLoader(
    datasets.MNIST("mnist_data", train=True, download=True, transform=transform),
    batch_size=batch_size,
    shuffle=True
)

# VAE Model
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Flatten(),
            nn.Linear(img_size * img_size, 400),
            nn.ReLU()
        )
        self.mu_layer = nn.Linear(400, latent_dim)
        self.logvar_layer = nn.Linear(400, latent_dim)

        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 400),
            nn.ReLU(),
            nn.Linear(400, img_size * img_size),
            nn.Tanh()
        )

    def encode(self, x):
        x = self.encoder(x)
        mu = self.mu_layer(x)
        logvar = self.logvar_layer(x)
        return mu, logvar

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        x = self.decoder(z)
        return x.view(-1, channels, img_size, img_size)

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

# Loss function
def loss_function(reconstructed, original, mu, logvar):
    recon_loss = nn.functional.mse_loss(reconstructed, original, reduction="sum")
    kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + kl_div

# Initialize model, optimizer, and loss
vae = VAE().to(device)
optimizer = optim.Adam(vae.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    vae.train()
    train_loss = 0
    for imgs, _ in dataloader:
        imgs = imgs.to(device)

        optimizer.zero_grad()
        reconstructed, mu, logvar = vae(imgs)
        loss = loss_function(reconstructed, imgs, mu, logvar)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {train_loss / len(dataloader.dataset):.4f}")

    # Save reconstructed images
    with torch.no_grad():
        vae.eval()
        z = torch.randn(64, latent_dim).to(device)
        generated_imgs = vae.decode(z)
        save_image(generated_imgs, f"vae_output/generated_{epoch+1}.png", nrow=8, normalize=True)

print("Training complete. Check the 'vae_output' folder for generated images.")

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to mnist_data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 128MB/s]


Extracting mnist_data/MNIST/raw/train-images-idx3-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 24.9MB/s]

Extracting mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz





Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 107MB/s]


Extracting mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 8.44MB/s]

Extracting mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to mnist_data/MNIST/raw






Epoch [1/20], Loss: 164.2067
Epoch [2/20], Loss: 148.3514
Epoch [3/20], Loss: 144.8740
Epoch [4/20], Loss: 142.6529
Epoch [5/20], Loss: 141.0384
Epoch [6/20], Loss: 139.7893
Epoch [7/20], Loss: 138.7039
Epoch [8/20], Loss: 137.8498
Epoch [9/20], Loss: 137.1437
Epoch [10/20], Loss: 136.4962
Epoch [11/20], Loss: 136.0712
Epoch [12/20], Loss: 135.5019
Epoch [13/20], Loss: 135.1504
Epoch [14/20], Loss: 134.6237
Epoch [15/20], Loss: 134.2604
Epoch [16/20], Loss: 133.8644
Epoch [17/20], Loss: 133.4864
Epoch [18/20], Loss: 133.1413
Epoch [19/20], Loss: 132.9892
Epoch [20/20], Loss: 132.7110
Training complete. Check the 'vae_output' folder for generated images.


In [None]:
from transformers import pipeline
# from datasets import load_dataset

text_gen=pipeline("text-generation",model="gpt2",device=-1)
generated_text=text_gen("huggingface is ",max_length=20,num_return_sequences=1,truncation=True)

print(f" Generated text: {generated_text}")
import gradio as gr

def chatbot(input_text):
  generated_text=text_gen(input_text,max_length=20,num_return_sequences=1)
  return generated_text[0]["generated_text"]
gr.close_all()
gr.Interface(fn=chatbot,inputs="text",outputs="text",title="huggingface chatbot").launch()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


 Generated text: [{'generated_text': 'huggingface is ive got a point…\n\nThe only solution I can see is'}]
Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


2024/11/28 06:49:43 [W] [service.go:132] login to server failed: tls: failed to verify certificate: x509: certificate has expired or is not yet valid: current time 2024-11-28T06:49:43Z is after 2024-11-28T06:24:31Z


<IPython.core.display.Javascript object>



In [None]:
!pip install gradio



In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m26.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

In [None]:
from diffusers import StableDiffusionPipeline
pipe=StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1").to("cuda")
pipe=pipe.to('cuda')
prompt="a photo of a cat taken by a photographer"
image=pipe(prompt).images[0]
image.save("output.png")

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]