In [10]:
import os
from PIL import Image
from torch.utils.data import Dataset
from transformers import BlipProcessor, BlipForConditionalGeneration

In [11]:
class ImageFolderDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.images = [
            f for f in os.listdir(root_dir) if os.path.isfile(os.path.join(root_dir, f))
        ]
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path)

        if self.transform:
            image = self.transform(image)

        return image, img_name

In [12]:
sin_dir = "datasets/sin_dataset_img"

sin_dataset = ImageFolderDataset(f"{sin_dir}/images")

In [13]:
processor = BlipProcessor.from_pretrained("abhijit2111/Pic2Story")
model = BlipForConditionalGeneration.from_pretrained("abhijit2111/Pic2Story")

In [None]:
with open(f"{sin_dir}/captions.txt", "w") as file:
    for image, img_name in sin_dataset:
        inputs = processor(image, return_tensors="pt")
        out = model.generate(**inputs)
        generated_text = processor.decode(out[0], skip_special_tokens=True)

        file.write(f"{img_name};{generated_text}\n")

print("Результаты сохранены в captions.txt")