In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'thai-language-image-captioning:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-competitions-data%2Fkaggle-v2%2F75248%2F8348654%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240429%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240429T171813Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D1b43d4226c5502b7608295899dc44af6b58c355dfc57dec9d91dd41f54e2283927ae50cb1155957b93af27b0123aeacb780af2dc967e8574013f965158660d02853817c4b91355752ae27cd675c17aaf4997becf5f71a00c0e96b62e688c3d2f23071a24c35a3dd67084e7144141ba90d5d916c94495531ba0dfd890b7b239723a692a73d944f4ba0e27bd0a9887fa771044f0600f030e2004479cbc62773e41a28c8fe78bf791ab7324d75674979fd5a96d9fa51e88d53d4ac00cbb2e28f58fc966916e3cdfcd7adcb5f7e59ebb79bdaf8a7d4809e4737a82b73100b4bc5044b9c27308bb41be8d486333e77efbaf9e90bbf763ac9ff467c2bdb0f5f80258e5,coco-test-2017-images:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F745938%2F1291303%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240429%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240429T171813Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D08d640000c2e62682fada31fca012144fd1c8c32cdb90e1839ba4b239c78f3e9e9231185d74df62744e897f157e5681ef6e7de40061856005bba5a544d44007680e5e6e8e700f0a9c805038c9ce66b31983847d01b47066fe119d592d43fc21bef7246b8b27119859cc84dd9bacdcccff71a8360f07439f578c51e71d44b5d32a95de0955e75922434423f5f2cfe6a8be5cbb6bbc46b915d9b618925b56e0cf718c71ea8bde54f99badbd612de2c91eb2674788db4acb53607684e7a739b4ac56d05fa69f781b9a0bf6111590dbde7b1b0f59ef009b4b487fb0aded51fada536b69af2f2e28826ed0b0c44f4ee33efcad2430a3cbb3b7bd6064180f264ff71f0,expnoone:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4867984%2F8213574%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240429%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240429T171813Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D304091888703884536471adc6f7435f90ce530c89388067e0bd7d113e5d154024a2005fb839abf462a5e2bd3c7f6d73a2ee99c81ec1d947fc0b26a76af4a60b21e97235559291239256ce135335a766f8b9bf89800fb0bf831c09d5c20c6872059d85f99feaecea6d5e0c845b20ea9207381756e118c9475fc6ef34a6ac056532d0566aab7ddd8a64b1a68039495eb4f95437002f1f2003a863fdea8562477331128b1ca1f9ab0f7df48611976386273b1d329a86f36c62505c928592570ac21765a8ebf3917ab4715440b76afa043b96a4874092b970be4cd553bbf352b628df1045eea420ecf430ffc48a36cc5cfce39a454e265f406a60cc0e0b50f0fb4ab'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 1. Become Data

In [None]:
import numpy as np # linear algebra
import pandas as pd

sp_sm = pd.read_csv('/kaggle/input/thai-language-image-captioning/sample_submission.csv')
sp_sm.head()

In [None]:
import json
from tqdm import tqdm
with open('/kaggle/input/thai-language-image-captioning/ipu24_v0.4.1_coco.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

dicts_train = {
    'name': [],
#     'text_1': [],
#     'text_2': [],
#     'text_3': [],
}
dicts_validaton = {
    'name': [],
#     'text_1': [],
#     'text_2': [],
#     'text_3': [],
}
for key, value in tqdm(data.items()):
    if "train" in key:
        dicts_train['name'].append('/kaggle/input/coco-2017-dataset/coco2017/' + key + '.jpg')
    elif "val" in key:
        dicts_validaton['name'].append('/kaggle/input/coco-2017-dataset/coco2017/' + key + '.jpg')

data_train_1 = pd.DataFrame(dicts_train)
data_validaton = pd.DataFrame(dicts_validaton)
data_train_1

In [None]:
import os
def walk_through_dir(dir_path):
  """
  Walks through dir_path returning its contents.
  Args:
    dir_path (str or pathlib.Path): target directory

  Returns:
    A print out of:
      number of subdiretories in dir_path
      number of images (files) in each subdirectory
      name of each subdirectory
  """
  for dirpath, dirnames, filenames in os.walk(dir_path):
    print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

In [None]:
walk_through_dir("/kaggle/input/thai-language-image-captioning/test/test")

In [None]:
from pathlib import Path
test_dir = Path("/kaggle/input/thai-language-image-captioning/test/test")
test_dir

In [None]:
import random
from PIL import Image
import glob

# Set seed
random.seed(888) # <- try changing this and see what happens

# 1. Get all image paths (* means "any combination")
image_path_list = list(test_dir.glob("*/*.jpg"))

# 2. Get random image path
random_image_path = random.choice(image_path_list)

# 3. Get image class from path name (the image class is the name of the directory where the image is stored)
image_class = random_image_path.parent.stem

# 4. Open image
img = Image.open(random_image_path)

# 5. Print metadata
print(f"Random image path: {random_image_path}")
print(f"Image class: {image_class}")
print(f"Image height: {img.height}")
print(f"Image width: {img.width}")
img

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Turn the image into an array
img_as_array = np.asarray(img)

# Plot the image with matplotlib
plt.figure(figsize=(10, 7))
plt.imshow(img_as_array)
plt.title(f"Image class: {image_class} | Image shape: {img_as_array.shape} -> [height, width, color_channels]")
plt.axis(False);

# 2. Tranforming Data

In [None]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [None]:
# Write transform for image
data_transform = transforms.Compose([
    # Resize the images to 64x64
    transforms.Resize(size=(64, 64)),
    # Flip the images randomly on the horizontal
    transforms.RandomHorizontalFlip(p=0.5), # p = probability of flip, 0.5 = 50% chance
    # Turn the image into a torch.Tensor
    transforms.ToTensor() # this also converts all pixel values from 0 to 255 to be between 0.0 and 1.0
])

In [None]:
def plot_transformed_images(image_paths, transform, n=3, seed=56):
    """Plots a series of random images from image_paths.

    Will open n image paths from image_paths, transform them
    with transform and plot them side by side.

    Args:
        image_paths (list): List of target image paths.
        transform (PyTorch Transforms): Transforms to apply to images.
        n (int, optional): Number of images to plot. Defaults to 3.
        seed (int, optional): Random seed for the random generator. Defaults to 42.
    """
    random.seed(seed)
    random_image_paths = random.sample(image_paths, k=n)
    for image_path in random_image_paths:
        with Image.open(image_path) as f:
            fig, ax = plt.subplots(1, 2)
            ax[0].imshow(f)
            ax[0].set_title(f"Original \nSize: {f.size}")
            ax[0].axis("off")

            # Transform and plot image
            # Note: permute() will change shape of image to suit matplotlib
            # (PyTorch default is [C, H, W] but Matplotlib is [H, W, C])
            transformed_image = transform(f).permute(1, 2, 0)
            ax[1].imshow(transformed_image)
            ax[1].set_title(f"Transformed \nSize: {transformed_image.shape}")
            ax[1].axis("off")

            fig.suptitle(f"Class: {image_path.parent.stem}", fontsize=16)

plot_transformed_images(image_path_list,
                        transform=data_transform,
                        n=3)

# Loading Image Data Using ImageFolder

In [None]:
# Use ImageFolder to create dataset(s)
from torchvision import datasets
# train_data = datasets.ImageFolder(root=train_dir, # target folder of images
#                                   transform=data_transform, # transforms to perform on data (images)
#                                   target_transform=None) # transforms to perform on labels (if necessary)

test_data = datasets.ImageFolder(root=test_dir,
                                 transform=data_transform)

# print(f"Train data:\n{train_data}\nTest data:\n{test_data}")
print(f"Test data:\n{test_data}")

In [None]:
# Get class names as a list
class_names = test_data.classes
class_names

In [None]:
# Can also get class names as a dict
class_dict = test_data.class_to_idx
class_dict

In [None]:
# Check the lengths
len(test_data)

In [None]:
img, label = test_data[0][0], test_data[0][1]
print(f"Image tensor:\n{img}")
print(f"Image shape: {img.shape}")
print(f"Image datatype: {img.dtype}")
print(f"Image label: {label}")
print(f"Label datatype: {type(label)}")

In [None]:
# Rearrange the order of dimensions
img_permute = img.permute(1, 2, 0)

# Print out different shapes (before and after permute)
print(f"Original shape: {img.shape} -> [color_channels, height, width]")
print(f"Image permute shape: {img_permute.shape} -> [height, width, color_channels]")

# Plot the image
plt.figure(figsize=(10, 7))
plt.imshow(img.permute(1, 2, 0))
plt.axis("off")
plt.title(class_names[label], fontsize=14);

In [None]:
# Turn train and test Datasets into DataLoaders
from torch.utils.data import DataLoader
# train_dataloader = DataLoader(dataset=train_data,
#                               batch_size=1, # how many samples per batch?
#                               num_workers=1, # how many subprocesses to use for data loading? (higher = more)
#                               shuffle=True) # shuffle the data?

test_dataloader = DataLoader(dataset=test_data,
                             batch_size=16,
                             shuffle=False) # don't usually need to shuffle testing data

# train_dataloader, test_dataloader
test_dataloader

In [None]:
img, label = next(iter(test_dataloader))

# Batch size will now be 1, try changing the batch_size parameter above and see what happens
print(f"Image shape: {img.shape} -> [batch_size, color_channels, height, width]")
print(f"Label shape: {label.shape}")

# Loading Image Data with a Custom Dataset

In [None]:
import os
import pathlib
import torch

from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
from typing import Tuple, Dict, List

In [None]:
# Instance of torchvision.datasets.ImageFolder()
test_data.classes, test_data.class_to_idx

In [None]:
# Setup path for target directory
target_directory = test_dir
print(f"Target directory: {target_directory}")

# Get the class names from the target directory
class_names_found = sorted([entry.name for entry in list(os.scandir(test_dir))])
print(f"Class names found: {class_names_found}")

# Try to Predict

In [None]:
sp_sm = pd.read_csv('/kaggle/input/thai-language-image-captioning/sample_submission.csv')
sp_sm

In [None]:
sp_sm['caption'][2]

- old

In [None]:
from transformers import AutoProcessor, BlipForConditionalGeneration

processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", device_map="cuda")

In [None]:
from tqdm import tqdm
from PIL import Image

def predicts(image):
    inputs = processor(images=image, return_tensors="pt").to("cuda")
    pixel_values = inputs.pixel_values
    generated_ids = model.generate(pixel_values=pixel_values, max_length=200)
    generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return generated_caption

ans = []
for i in tqdm(sp_sm['image_id']):
    if 'test2017' in i:
        image_url = '/kaggle/input/coco-test-2017-images/' + i + '.jpg'
        image = Image.open(image_url)
        generated = predicts(image)
        ans.append(generated)
    else:
        image_url = '/kaggle/input/thai-language-image-captioning/test/' + i + '.jpg'
        image = Image.open(image_url)
        generated = predicts(image)
        ans.append(generated)


In [None]:
ans

In [None]:
sp_sm['caption'][3:] = ans[3:]
sp_sm

- new

In [None]:
import numpy as np # linear algebra
import pandas as pd

sp_sm = pd.read_csv('/kaggle/input/thai-language-image-captioning/sample_submission.csv')
sp_sm

In [None]:
food_travel = sp_sm.copy()
test2017_count = 0
for i in range(len(sp_sm)):
    if "test2017" in sp_sm['image_id'][i]:
        test2017_count += 1

food_travel = sp_sm[test2017_count:]
food_travel

In [None]:
# df_food = food_travel[food_travel['image_id'].str.contains('food', case=False)].reset_index(drop=True)
# df_food

In [None]:
# df_travel = food_travel[food_travel['image_id'].str.contains('food', case=False)].reset_index(drop=True)
# df_travel

In [None]:
!pip install -q git+https://github.com/huggingface/peft.git transformers bitsandbytes datasets

In [None]:
!pip install -i https://pypi.org/simple/ bitsandbytes

In [None]:
!pip install huggingface_hub

- boss git large coco

In [None]:
from transformers import AutoProcessor, GitForCausalLM

model = GitForCausalLM.from_pretrained("MagiBoss/EXP-Git-large-ip24", device_map=0)
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")

In [None]:
from PIL import Image

raw_image = Image.open('/kaggle/input/thai-language-image-captioning/test/test/travel/08002.jpg').convert('RGB')
inputs = processor(images=raw_image, return_tensors="pt").to("cuda")
pixel_values = inputs.pixel_values
generated_ids = model.generate(pixel_values=pixel_values, max_length=128)
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)
generated_caption

In [None]:
raw_image

In [None]:
# model = GitForCausalLM.from_pretrained("microsoft/git-large-coco", device_map=0)

# raw_image = Image.open('/kaggle/input/thai-language-image-captioning/test/test/food/00011.jpg').convert('RGB')
# inputs = processor(images=raw_image, return_tensors="pt").to("cuda")
# pixel_values = inputs.pixel_values
# generated_ids = model.generate(pixel_values=pixel_values, max_length=128)
# generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)
# generated_caption

- clean git large coco (+ Lora)

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
from peft import PeftModel, PeftConfig
from transformers import AutoProcessor, GitForCausalLM

peft_food_clean = "PakinClean/git-large-coco-food"
peft_travel_clean = "PakinClean/git-large-coco-travel"

config_food_clean = PeftConfig.from_pretrained(peft_food_clean)
config_travel_clean = PeftConfig.from_pretrained(peft_travel_clean)

mod_food_clean = GitForCausalLM.from_pretrained(config_food_clean.base_model_name_or_path, device_map=0)
mod_food_clean = PeftModel.from_pretrained(mod_food_clean, peft_food_clean)

mod_travel_clean = GitForCausalLM.from_pretrained(config_travel_clean.base_model_name_or_path, device_map=0)
mod_travel_clean = PeftModel.from_pretrained(mod_travel_clean, peft_travel_clean)

processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")

In [None]:
mod_pre = GitForCausalLM.from_pretrained("microsoft/git-large-coco", device_map=0)

In [None]:
from PIL import Image

raw_image = Image.open('/kaggle/input/thai-language-image-captioning/test/test/food/00000.jpg').convert('RGB')
inputs = processor(images=raw_image, return_tensors="pt").to("cuda")
pixel_values = inputs.pixel_values
generated_ids = mod_food_clean.generate(pixel_values=pixel_values, max_length=128)
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)
generated_caption

In [None]:
generated_ids = mod_pre.generate(pixel_values=pixel_values, max_length=128)
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)
generated_caption

In [None]:
raw_image

In [None]:
from PIL import Image

raw_image = Image.open('/kaggle/input/thai-language-image-captioning/test/test/travel/00001.jpg').convert('RGB')
inputs = processor(images=raw_image, return_tensors="pt").to("cuda")
pixel_values = inputs.pixel_values
generated_ids = mod_travel_clean.generate(pixel_values=pixel_values, max_length=128)
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)
generated_caption

In [None]:
raw_image

In [None]:
generated_ids = mod_pre.generate(pixel_values=pixel_values, max_length=128)
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)
generated_caption

## inferrence

### git-large

In [None]:
import torch
from tqdm.auto import tqdm
from PIL import Image

BATCH_SIZE = 64
image_ls = []
index_ls = []

def predicts(image_ls, model):
    inputs = processor(images=image_ls, return_tensors="pt").to("cuda")
    pixel_values = inputs.pixel_values
    generated_ids = model.generate(pixel_values=pixel_values, max_length=128)
    generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)
    return generated_caption

for index , series in tqdm(sp_sm.iterrows()) :
    if(type(series['caption']) == float) :
        if 'food' in series["image_id"]:
            image_url = '/kaggle/input/thai-language-image-captioning/test/' + series["image_id"] + '.jpg'
            modelo = mod_food_clean
        elif 'travel' in series["image_id"]:
            image_url = '/kaggle/input/thai-language-image-captioning/test/' + series["image_id"] + '.jpg'
            modelo = mod_travel_clean
        else:
            image_url = '/kaggle/input/coco-test-2017-images/' + series["image_id"] + '.jpg'
            modelo = mod_pre
        raw_image = Image.open(image_url).convert('RGB')
        image_ls.append(raw_image)
        index_ls.append(index)
        if (len(image_ls) < BATCH_SIZE):
            continue
        generated = predicts(image_ls, modelo)
#         print(generated)
        for i in range(BATCH_SIZE) :
          sp_sm.loc[index_ls[i], "caption"] = generated[i]
        image_ls = []
        index_ls = []

if len(image_ls):
  generated = predicts(image_ls)
  for i in range(len(image_ls)) :
    sp_sm.loc[index_ls[i], "caption"] = generated[i]
#   image_ls = []
#   index_ls = []

### mblip-mt0-xl

In [None]:
!pip install accelerate

In [None]:
import torch
from PIL import Image
import requests
from transformers import Blip2Processor, Blip2ForConditionalGeneration

processor = Blip2Processor.from_pretrained("Gregor/mblip-mt0-xl")
model = Blip2ForConditionalGeneration.from_pretrained("Gregor/mblip-mt0-xl",
                                                      load_in_8bit=True,
                                                      device_map="cuda",
                                                      torch_dtype=torch.bfloat16)

In [None]:
image = Image.open('/kaggle/input/thai-language-image-captioning/test/test/food/00000.jpg').convert('RGB')
prompt = "<ASSISTANT>: Describe this image to thai caption.\n<USER>:"

inputs = processor(images=image, text=prompt, return_tensors="pt")
generated_ids = model.generate(**inputs)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()

outputs = model(inputs)

In [None]:
import torch
from tqdm.auto import tqdm
from PIL import Image

BATCH_SIZE = 64
image_ls = []
index_ls = []

def predicts(image_ls, question):
    inputs = processor(images=image_ls, text=question, return_tensors="pt").to("cuda")
    generated_ids = model.generate(inputs, max_length=50)
    generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)
    return generated_caption

for index , series in tqdm(sp_sm.iterrows()) :
    if(type(series['caption']) == float) :
        if 'test2017' in series["image_id"]:
            image_url = '/kaggle/input/coco-test-2017-images/' + series["image_id"] + '.jpg'
        else:
            image_url = '/kaggle/input/thai-language-image-captioning/test/' + series["image_id"] + '.jpg'
        raw_image = Image.open(image_url).convert('RGB')
        image_ls.append(raw_image)
        index_ls.append(index)
        if (len(image_ls) < BATCH_SIZE):
            continue
        question = ['<ASSISTANT>: Describe this image to thai caption.\n<USER>:'] * BATCH_SIZE
        generated = predicts(image_ls, question)
        for i in range(BATCH_SIZE) :
          sp_sm.loc[index_ls[i], "caption"] = generated[i]
        image_ls = []
        index_ls = []
if len(image_ls):
  generated = predicts(image_ls)
  for i in range(len(image_ls)) :
    sp_sm.loc[index_ls[i], "caption"] = generated[i]

----

In [None]:
sp_sm

In [None]:
sp_sm.to_excel('mongkol_gitL.xlsx', index=False)

In [None]:
dt_1 = sp_sm['caption'][:10000]
dt_2 = sp_sm['caption'][10000:20000]
dt_3 = sp_sm['caption'][20000:30000]
dt_4 = sp_sm['caption'][30000:40000]
dt_5 = sp_sm['caption'][40000:]

dt_1.to_excel('test_1.xlsx', index=False)
dt_2.to_excel('test_2.xlsx', index=False)
dt_3.to_excel('test_3.xlsx', index=False)
dt_4.to_excel('test_4.xlsx', index=False)
dt_5.to_excel('test_5.xlsx', index=False)

In [None]:
sub = pd.read_csv('/kaggle/input/thai-language-image-captioning/sample_submission.csv')