In [2]:
from diffusers import StableDiffusionPipeline
import torch

# Load the Stable Diffusion model from Hugging Face
model_id = "CompVis/stable-diffusion-v1-4"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float32)
pipe = pipe.to("cpu")  # Use "cpu" for inference since you don't have a GPU


Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with: 
```
pip install accelerate
```
.
Loading pipeline components...: 100%|████████████████████████████████████████████████████| 7/7 [00:04<00:00,  1.49it/s]


In [9]:
prompt = """A large, dark sculpture that sits on the grass. 
Shaped like a mushroom, or a cloud frozen in metal, made of bronze, 
has a deep brown color that seems to change with sunlight, sometimes lighter, sometimes black. 
the surface is smooth and a little rough with parts reflecting light and parts in the shadow. 
It is divided into sections like it has layers, 
with rounded parts that feel welcoming and sharp edges that show seriousness."""
image = pipe(prompt).images[0]  # Generates an image based on the prompt

# Display the image
image.show()


The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['parts that feel welcoming and sharp edges that show seriousness .']
100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [03:01<00:00,  3.63s/it]


In [10]:
image.save("firstfirst.png")


In [None]:
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image

class CustomDataset(Dataset):
    def __init__(self, image_paths, prompts, transform=None):
        self.image_paths = image_paths
        self.prompts = prompts
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        if self.transform:
            image = self.transform(image)
        prompt = self.prompts[idx]
        return image, prompt

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
])

# Example usage:
# image_paths = ["path/to/image1.jpg", "path/to/image2.jpg", ...]
# prompts = ["A description for image 1", "A description for image 2", ...]
dataset = CustomDataset(image_paths, prompts, transform=transform)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
