In [1]:
import pandas as pd
import torch
from PIL import Image
import urllib.request

from transformers import AutoProcessor, Blip2ForConditionalGeneration

## Read data

In [2]:
from google.colab import drive
drive.mount('/content/drive')

path='/content/drive/My Drive/ErwModul_24/data/preprocessed_wit_1_percent_20_langs.tsv'
folder_path = '/content/drive/My Drive/ErwModul_24'

Mounted at /content/drive


In [3]:
df = pd.read_csv(path, sep='\t')
df

Unnamed: 0.1,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,0,en,https://upload.wikimedia.org/wikipedia/commons...,Great Sleigh Drive (1678):Frederick William pu...,Brandenburg-Prussia,Dutch and Scanian Wars
1,1,en,http://upload.wikimedia.org/wikipedia/commons/...,G9 crew: Stoker Drake at extreme right of pict...,HMS G9,Loss
2,2,en,https://upload.wikimedia.org/wikipedia/commons...,Huichol woman and child,Huichol art,The Huichol People
3,3,en,https://upload.wikimedia.org/wikipedia/commons...,Merner-Pfeiffer Hall part of the Conservatory ...,Baldwin Wallace University,April Reign
4,4,en,https://upload.wikimedia.org/wikipedia/commons...,"1917 advertisement featuring Lloyd as ""Lonesom...",Harold Lloyd,Silent shorts and features
...,...,...,...,...,...,...
16795,16795,vi,https://upload.wikimedia.org/wikipedia/commons...,"Núi Sugarloaf, Rio de Janeiro được sử dụng cho...",The Amazing Race 2,Chặng 1 (Hoa Kỳ → Brasil)
16796,16796,vi,https://upload.wikimedia.org/wikipedia/commons...,Septic river.,Ô nhiễm biển,Nguồn xả trực tiếp
16797,16797,vi,https://upload.wikimedia.org/wikipedia/commons...,Phục nguyên P. grangeri,Platybelodon,Mô tả
16798,16798,vi,http://upload.wikimedia.org/wikipedia/commons/...,Một góc thành phố Huế bị tàn phá trơ trụi. Ảnh...,Sự kiện Tết Mậu Thân,Tại Huế


## Load BLIP-2 model

In [4]:
# setup device to use
device = torch.device('cuda') if torch.cuda.is_available() else 'cpu'

In [5]:
# loads mBLIP pre-trained model
processor = AutoProcessor.from_pretrained('Gregor/mblip-mt0-xl')
model = Blip2ForConditionalGeneration.from_pretrained('Gregor/mblip-mt0-xl', torch_dtype=torch.float16)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/432 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/324 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/16.3M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/74.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/7.06k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/133k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.96G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/9.38G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
model.to(device)

Blip2ForConditionalGeneration(
  (vision_model): Blip2VisionModel(
    (embeddings): Blip2VisionEmbeddings(
      (patch_embedding): Conv2d(3, 1408, kernel_size=(14, 14), stride=(14, 14))
    )
    (encoder): Blip2Encoder(
      (layers): ModuleList(
        (0-38): 39 x Blip2EncoderLayer(
          (self_attn): Blip2Attention(
            (dropout): Dropout(p=0.0, inplace=False)
            (qkv): Linear(in_features=1408, out_features=4224, bias=True)
            (projection): Linear(in_features=1408, out_features=1408, bias=True)
          )
          (layer_norm1): LayerNorm((1408,), eps=1e-06, elementwise_affine=True)
          (mlp): Blip2MLP(
            (activation_fn): GELUActivation()
            (fc1): Linear(in_features=1408, out_features=6144, bias=True)
            (fc2): Linear(in_features=6144, out_features=1408, bias=True)
          )
          (layer_norm2): LayerNorm((1408,), eps=1e-06, elementwise_affine=True)
        )
      )
    )
    (post_layernorm): LayerNorm((

## Generate results for English

In [12]:
captions_eng = []
for i in range(0, 793):
  img_url_en = df['image_url'][i]
  try:
    img_path_en, _ = urllib.request.urlretrieve(img_url_en)

    # load image
    image_en = Image.open(img_path_en).convert('RGB')
  except:
    continue
  prompt = 'Page Title: {}, Section Title: {}. Caption the image:'.format(df['page_title'][i], df['section_title'][i])

  inputs = processor(image_en, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text_en = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_eng.append((df['caption_reference_description'][i],generated_text_en))

In [13]:
len(captions_eng)

756

In [14]:
print(captions_eng)

[('Great Sleigh Drive (1678):Frederick William pursues Swedish troops across the frozen Curonian Lagoon; fresco by Wilhelm Simmler, ca. 1891', 'Een schilderij van een paard en een man die op een paard rijden.'), ('G9 crew: Stoker Drake at extreme right of picture standing-easy. A B Baby centre of back row of 5.', 'A group of sailors posing for a photo on a ship.'), ('Huichol woman and child', 'The Huichol People are a group of people who live in the northern part of the United'), ('Merner-Pfeiffer Hall part of the Conservatory of Music, which hosts the yearly Bach Festival', 'A large brick building with a snowy lawn.'), ('1917 advertisement featuring Lloyd as "Lonesome Luke", with Snub Pollard and Bebe Daniels', 'Silent shorts and features'), ('Root Rectangles to Generate diagonals for Dynamic Symmetry', 'a diagram showing the lines of a triangle with a triangle between'), ('MV Vogelsberg in the port of Freetown 1958', 'A large ship is sitting in a dock.'), ('Karatoa River near Mahasth

In [54]:
with open('/content/drive/My Drive/ErwModul_24/results/eng_context_transl.txt', 'w') as writeEngFile:
    for cap in captions_eng:
        writeEngFile.write(cap[0]+'\n')
        writeEngFile.write('|\n')
        writeEngFile.write(cap[1]+'\n|||\n')

In [16]:
captions_eng2 = []
for i in range(0, 793):
  img_url_en = df['image_url'][i]
  try:
    img_path_en, _ = urllib.request.urlretrieve(img_url_en)

    # load image
    image_en = Image.open(img_path_en).convert('RGB')
  except:
    continue

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in English:'.format(df['page_title'][i], df['section_title'][i])

  inputs = processor(image_en, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text_en = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_eng2.append((df['caption_reference_description'][i],generated_text_en))

In [17]:
len(captions_eng)

756

In [55]:
with open('/content/drive/My Drive/ErwModul_24/results/eng_context.txt', 'w') as writeEng2File:
    for cap in captions_eng2:
        writeEng2File.write(cap[0]+'\n')
        writeEng2File.write('|\n')
        writeEng2File.write(cap[1]+'\n|||\n')

## Generate example for German

In [19]:
df_de = df[df['language'] == 'de'].reset_index()
df_de

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,840,840,de,https://upload.wikimedia.org/wikipedia/commons...,Südansicht der Znaimer Altstadt,Znojmo,Mittelalter
1,841,841,de,https://upload.wikimedia.org/wikipedia/commons...,Goldbronze-Relief der Stadt Uelzen vor dem Bra...,Uelzen,Mittelalter
2,842,842,de,https://upload.wikimedia.org/wikipedia/commons...,Haupthaus und Zehntscheune (rechts) des Gutes ...,Dunau,Geschichte
3,843,843,de,https://upload.wikimedia.org/wikipedia/commons...,Chrom(III)-acetylacetonat,Chrom(III)-acetylacetonat,Eigenschaften
4,844,844,de,https://upload.wikimedia.org/wikipedia/commons...,Schweizer Standardwagen-Zug aus Bern auf der L...,Straßenbahn Iași,Nach 1990
...,...,...,...,...,...,...,...
835,1675,1675,de,http://upload.wikimedia.org/wikipedia/commons/...,Bahnhof Lauterecken-Grumbach,Glantalbahn,Lauterecken-Grumbach
836,1676,1676,de,https://upload.wikimedia.org/wikipedia/commons...,Wanderung mit Bienenvölkern in großem Maßstab ...,Imker,Geschichte der Bienenwirtschaft
837,1677,1677,de,https://upload.wikimedia.org/wikipedia/commons...,Militärischer Konvoi in der Region Katerini,Griechische Militärdiktatur,Der Putsch auf Zypern
838,1678,1678,de,https://upload.wikimedia.org/wikipedia/commons...,Fahne des 13e RI in der Kathedrale von Nevers,13e régiment d’infanterie,Fahne und Auszeichnungen


In [20]:
captions_de = []
for i in range(0, 793):
  img_url_de = df_de['image_url'][i]
  try:
    img_path_de, _ = urllib.request.urlretrieve(img_url_de)

    # load image
    image_de = Image.open(img_path_de).convert('RGB')
  except:
    continue
  prompt = 'Titel der Seite: {}, Titel des Abschnitts: {}. Untertitel das Bild:'.format(df_de['page_title'][i], df_de['section_title'][i])

  inputs = processor(image_de, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text_de = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_de.append((df_de['caption_reference_description'][i],generated_text_de))

In [21]:
len(captions_de)

752

In [56]:
with open('/content/drive/My Drive/ErwModul_24/results/de_context_transl.txt', 'w') as writeDeFile:
    for cap in captions_de:
        writeDeFile.write(cap[0]+'\n')
        writeDeFile.write('|\n')
        writeDeFile.write(cap[1]+'\n|||\n')

In [25]:
captions_de2 = []
for i in range(0, 793):
  img_url_de = df_de['image_url'][i]
  try:
    img_path_de, _ = urllib.request.urlretrieve(img_url_de)

    # load image
    image_de = Image.open(img_path_de).convert('RGB')
  except:
    continue
  prompt = 'Page Title: {}, Section Title: {}. Caption the image in German:'.format(df_de['page_title'][i], df_de['section_title'][i])

  inputs = processor(image_de, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text_de = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_de2.append((df_de['caption_reference_description'][i],generated_text_de))

In [26]:
len(captions_de2)

751

In [57]:
with open('/content/drive/My Drive/ErwModul_24/results/de_context.txt', 'w') as writeDe2File:
    for cap in captions_de2:
        writeDe2File.write(cap[0]+'\n')
        writeDe2File.write('|\n')
        writeDe2File.write(cap[1]+'\n|||\n')

## Generate example for Spanish


In [29]:
df_es = df[df['language'] == 'es'].reset_index()
df_es

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,2520,2520,es,https://upload.wikimedia.org/wikipedia/commons...,"""The Modern Gilpins"" - Rivalidad entre el hunk...",Barnburners y Hunkers,Hunkers
1,2521,2521,es,https://upload.wikimedia.org/wikipedia/commons...,Mapa de los países operadores del GTX Boxer.,GTK Boxer,Usuarios
2,2522,2522,es,https://upload.wikimedia.org/wikipedia/commons...,Convento de Nuestra Señora del Carmen (Soria).,Catalina de Cristo,Fundaciones
3,2523,2523,es,https://upload.wikimedia.org/wikipedia/commons...,"Min-Amon, fin del Nuevo Imperio (ca. 1200 a. C.)",Esteatita,Usos históricos
4,2524,2524,es,https://upload.wikimedia.org/wikipedia/commons...,Es una de tres grandes casas donde se aloja la...,Jorge Denegre Vaught Peña,"""Dos siglos de discursos patrióticos, colecció..."
...,...,...,...,...,...,...,...
835,3355,3355,es,https://upload.wikimedia.org/wikipedia/commons...,Bitis arietans (adulto),Bitis arietans,Descripción
836,3356,3356,es,https://upload.wikimedia.org/wikipedia/commons...,Curva de la población humana que muestra el cr...,Albert Allen Bartlett,Crecimiento poblacional
837,3357,3357,es,https://upload.wikimedia.org/wikipedia/commons...,Mapa del Mar Egeo,Martino Zaccaria,Reconquista bizantina de Quíos
838,3358,3358,es,https://upload.wikimedia.org/wikipedia/commons...,Camión Mercedes-Benz Econic de recogida de des...,Mercedes-Benz Econic,Producción


In [30]:
captions_es = []
captions_es2 = []
for i in range(0, 800):
  img_url_es = df_es['image_url'][i]
  try:
    img_path_es, _ = urllib.request.urlretrieve(img_url_es)

    # load image
    image_es = Image.open(img_path_es).convert('RGB')
  except:
    continue

  prompt = 'Título de página: {}, Sección Título: {}. Capción de la imagen:'.format(df_es['page_title'][i], df_es['section_title'][i])
  inputs = processor(image_es, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text_es = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_es.append((df_es['caption_reference_description'][i],generated_text_es))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Spanish:'.format(df_es['page_title'][i], df_es['section_title'][i])
  inputs = processor(image_es, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text_es = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_es2.append((df_es['caption_reference_description'][i],generated_text_es))

In [31]:
len(captions_es)

762

In [58]:
with open('/content/drive/My Drive/ErwModul_24/results/es_context_transl.txt', 'w') as writefile:
    for cap in captions_es:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/es_context.txt', 'w') as writefile:
    for cap in captions_es2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

## Generate example for French


In [33]:
df_fr = df[df['language'] == 'fr'].reset_index()
df_fr

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,1680,1680,fr,http://upload.wikimedia.org/wikipedia/commons/...,Oratoire de Catherine de Médicis.,Château de Blois,L'oratoire
1,1681,1681,fr,https://upload.wikimedia.org/wikipedia/commons...,Un Lancaster lâchant des paillettes qui formen...,Reginald Victor Jones,Paillettes
2,1682,1682,fr,https://upload.wikimedia.org/wikipedia/commons...,Hessel Gerritsz - La Austrialia del Espiritu S...,Pedro Fernandes de Queirós,Biographie
3,1683,1683,fr,https://upload.wikimedia.org/wikipedia/commons...,Devitt avec le IWGP Junior Heavyweight Champio...,Finn Bálor,Palmarès
4,1684,1684,fr,http://upload.wikimedia.org/wikipedia/commons/...,Gravure d'une monnaie frappée par la républiqu...,République de Volterra,La menace pisane
...,...,...,...,...,...,...,...
835,2515,2515,fr,https://upload.wikimedia.org/wikipedia/commons...,L'ancienne exploitation française d'Alizay emp...,Metsä Board,Histoire
836,2516,2516,fr,https://upload.wikimedia.org/wikipedia/commons...,Circulation routière aux abords de Hong Kong.,Histoire de l'automobile,Évolution du marché
837,2517,2517,fr,https://upload.wikimedia.org/wikipedia/commons...,Occupation alliée du Japon. Défilé du 5th Roya...,1946,Asie et Pacifique
838,2518,2518,fr,https://upload.wikimedia.org/wikipedia/commons...,Avril 2016,Ko Kyeong-pyo,Spectacle de variété


In [34]:
captions_fr = []
captions_fr2 = []
for i in range(0, 800):
  img_url = df_fr['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'Titre de la page: {}, Titre de la section: {}. Captionnez l\'image:'.format(df_fr['page_title'][i], df_fr['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_fr.append((df_fr['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in French:'.format(df_fr['page_title'][i], df_fr['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_fr2.append((df_fr['caption_reference_description'][i],generated_text))

len(captions_fr)

759

In [59]:
with open('/content/drive/My Drive/ErwModul_24/results/fr_context_transl.txt', 'w') as writefile:
    for cap in captions_fr:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/fr_context.txt', 'w') as writefile:
    for cap in captions_fr2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

## Generate example for Italian


In [36]:
df_it = df[df['language'] == 'it'].reset_index()
df_it

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,3360,3360,it,https://upload.wikimedia.org/wikipedia/commons...,Maria Maddalena con il vaso degli oli,Ex chiesa di Santa Maria Maddalena (Bergamo),Storia
1,3361,3361,it,https://upload.wikimedia.org/wikipedia/commons...,"Il viceré d'Italia, Eugenio di Beauharnais, su...",Sesta coalizione,Ritirata dei resti della Grande Armata e defez...
2,3362,3362,it,https://upload.wikimedia.org/wikipedia/commons...,"Il castello: cortile interno, parete est, anti...",Fortificazioni medievali di Rovigo,Il Castello
3,3363,3363,it,https://upload.wikimedia.org/wikipedia/commons...,"Monumento memoriale a Gioacchino Ventura, Basi...",Gioacchino Ventura,Biografia
4,3364,3364,it,https://upload.wikimedia.org/wikipedia/commons...,Luciano Zauri ai tempi della Fiorentina (stagi...,Luciano Zauri,Fiorentina
...,...,...,...,...,...,...,...
835,4195,4195,it,https://upload.wikimedia.org/wikipedia/commons...,"L'Agnello, mosaico nell'interno della cupola d...",Concilio di Nicea I,Calcolo della data della Pasqua cristiana in m...
836,4196,4196,it,https://upload.wikimedia.org/wikipedia/commons...,Vecchio logo Brioni usato dal 1986 al 2016,Brioni (azienda),1945-2011
837,4197,4197,it,https://upload.wikimedia.org/wikipedia/commons...,"Dakosaurus, coccodrillo marino.",Titoniano,†Talattosuchi
838,4198,4198,it,http://upload.wikimedia.org/wikipedia/commons/...,Alberto Del Rio durante un match in AAA nel gi...,Alberto Del Rio,Asistencia Asesoría y Administración (2000–2005)


In [37]:
captions_it = []
captions_it2 = []
for i in range(0, 800):
  img_url = df_it['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'Titolo della pagina: {}, Titolo della sezione: {}. Capitulare l\'immagine:'.format(df_it['page_title'][i], df_it['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_it.append((df_it['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Italian:'.format(df_it['page_title'][i], df_it['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_it2.append((df_it['caption_reference_description'][i],generated_text))

len(captions_it)

766

In [60]:
with open('/content/drive/My Drive/ErwModul_24/results/it_context_transl.txt', 'w') as writefile:
    for cap in captions_it:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/it_context.txt', 'w') as writefile:
    for cap in captions_it2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

## Generate example for Japanese


In [39]:
df_ja = df[df['language'] == 'ja'].reset_index()
df_ja

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,5040,5040,ja,https://upload.wikimedia.org/wikipedia/commons...,紙本墨書南番文字（原本）,シャー・ナーメ,後代への影響
1,5041,5041,ja,https://upload.wikimedia.org/wikipedia/commons...,愛媛県立今治西高等学校伯方分校（Blue pog.svg伯方分校）及びその周辺（愛媛県側）,愛媛県立今治西高等学校伯方分校,概観
2,5042,5042,ja,https://upload.wikimedia.org/wikipedia/commons...,ハンプトン邸宅、1861年,ハンプトン国立史跡,19世紀
3,5043,5043,ja,https://upload.wikimedia.org/wikipedia/commons...,キャナルシティ博多停留所に進入する高千穂発のYOKAROバス,キャナルシティ博多,バス
4,5044,5044,ja,https://upload.wikimedia.org/wikipedia/commons...,掩体壕,神奈川県立体育センター,その他
...,...,...,...,...,...,...,...
835,5875,5875,ja,https://upload.wikimedia.org/wikipedia/commons...,バイアルスTL50,ホンダ・TL,バイアルス
836,5876,5876,ja,https://upload.wikimedia.org/wikipedia/commons...,冬のファルマス湾に沈む夕日。カッスル・ドライブから,ファルマス (イングランド),スポーツ
837,5877,5877,ja,https://upload.wikimedia.org/wikipedia/commons...,Houtribsluizenの水門設備,レリスタット,地理
838,5878,5878,ja,https://upload.wikimedia.org/wikipedia/commons...,『岡崎朝報』社主時代の竹内京治,竹内京治,新聞社経営


In [40]:
captions_ja = []
captions_ja2 = []
for i in range(0, 800):
  img_url = df_ja['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'ページタイトル: {},セクションタイトル: {}. 画像の字幕:'.format(df_ja['page_title'][i], df_ja['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_ja.append((df_ja['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Japanese:'.format(df_ja['page_title'][i], df_ja['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_ja2.append((df_ja['caption_reference_description'][i],generated_text))

len(captions_ja)



753

In [61]:
with open('/content/drive/My Drive/ErwModul_24/results/ja_context_transl.txt', 'w') as writefile:
    for cap in captions_ja:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/ja_context.txt', 'w') as writefile:
    for cap in captions_ja2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

## Generate results for Russian



In [42]:
df_ru = df[df['language'] == 'ru'].reset_index()
df_ru

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,4200,4200,ru,https://upload.wikimedia.org/wikipedia/commons...,Стандартные обозначения в треугольнике,Решение треугольников,Решение плоских треугольников
1,4201,4201,ru,https://upload.wikimedia.org/wikipedia/commons...,Знаки различия генерала рода войск на воротнике,Генерал рода войск,Австро-Венгрия
2,4202,4202,ru,https://upload.wikimedia.org/wikipedia/commons...,Здание мечети в пос. Урда (б. Ханская Ставка),Букеевская Орда,Население
3,4203,4203,ru,https://upload.wikimedia.org/wikipedia/commons...,Возведение «Часов мира» на Александерплац,Часы мира (Берлин),История
4,4204,4204,ru,https://upload.wikimedia.org/wikipedia/commons...,Церковь Сен-Сюльпис,Лёмбр,Ссылки
...,...,...,...,...,...,...,...
835,5035,5035,ru,http://upload.wikimedia.org/wikipedia/commons/...,Средняя линия треугольника,Средняя линия,Средняя линия треугольника
836,5036,5036,ru,https://upload.wikimedia.org/wikipedia/commons...,"Александровские казармы, 1869",Белгород-Днестровский,XX век
837,5037,5037,ru,https://upload.wikimedia.org/wikipedia/commons...,"На здании педучилища, в котором учился Герой, ...","Нехай, Даут Ереджибович",Память
838,5038,5038,ru,https://upload.wikimedia.org/wikipedia/commons...,Siphocampylus lucidus,Список угрожаемых видов цветковых растений,Семейство Калицеровые (Calyceraceae)


In [43]:
captions_ru = []
captions_ru2 = []
for i in range(0, 800):
  img_url = df_ru['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'Название страницы: {}, раздел Название: {}. Подзаголовок изображения:'.format(df_ru['page_title'][i], df_ru['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_ru.append((df_ru['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Russian:'.format(df_ru['page_title'][i], df_ru['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_ru2.append((df_ru['caption_reference_description'][i],generated_text))

len(captions_ru)

763

In [62]:
with open('/content/drive/My Drive/ErwModul_24/results/ru_context_transl.txt', 'w') as writefile:
    for cap in captions_ru:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/ru_context.txt', 'w') as writefile:
    for cap in captions_ru2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

## Generate results for Polish



In [45]:
df_pl = df[df['language'] == 'pl'].reset_index()
df_pl

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,5880,5880,pl,https://upload.wikimedia.org/wikipedia/commons...,Lowell H. Smith i Lt. John P. Richter podczas ...,Tankowanie w powietrzu,Geneza
1,5881,5881,pl,https://upload.wikimedia.org/wikipedia/commons...,"W 2007, w Wilnie, podczas ceremonii uhonorowan...",Jonas Mačiulis (koszykarz),Osiągnięcia
2,5882,5882,pl,https://upload.wikimedia.org/wikipedia/commons...,Willa ogrodowa (pl. Wolności 10) − siedziba Są...,Plac Wolności w Katowicach,Obiekty historyczne
3,5883,5883,pl,https://upload.wikimedia.org/wikipedia/commons...,Pomnik ofiar katastrofy lotniczej 16.09.1984 r...,Katastrofa lotnicza w Polskiej Nowej Wsi,Ofiary katastrofy
4,5884,5884,pl,https://upload.wikimedia.org/wikipedia/commons...,Jaroslav Špaček w barwach Montréal Canadiens (...,Jaroslav Špaček,Kariera zawodnicza
...,...,...,...,...,...,...,...
835,6715,6715,pl,https://upload.wikimedia.org/wikipedia/commons...,Edykuła Grobu na rycinie Konrada von Grünenber...,Bazylika Grobu Świętego,Konstantyn IX Monomach
836,6716,6716,pl,https://upload.wikimedia.org/wikipedia/commons...,PZL I-22 Iryda-M96 podzczas Air Fair w Bydgoszczy,PZL I-22 Iryda,Służba
837,6717,6717,pl,https://upload.wikimedia.org/wikipedia/commons...,"Zajezdnia tramwajowa Helenówek, siedziba MKT w...",Tramwaje w Łodzi,Zajezdnie i inne budynki wyposażone w sieć tra...
838,6718,6718,pl,https://upload.wikimedia.org/wikipedia/commons...,Symbol Księżycowego Królestwa.,Chibiusa Tsukino,Księżniczka Usagi Small Lady Serenity


In [46]:
captions_pl = []
captions_pl2 = []
for i in range(0, 800):
  img_url = df_pl['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'Tytuł strony: {}, Sekcja Tytuł: {}. Podpisz obraz:'.format(df_pl['page_title'][i], df_pl['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_pl.append((df_pl['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Polish:'.format(df_pl['page_title'][i], df_pl['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_pl2.append((df_pl['caption_reference_description'][i],generated_text))

len(captions_pl)

758

In [63]:
with open('/content/drive/My Drive/ErwModul_24/results/pl_context_transl.txt', 'w') as writefile:
    for cap in captions_pl:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/pl_context.txt', 'w') as writefile:
    for cap in captions_pl2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

## Generate results for Portuguese



In [48]:
df_pt = df[df['language'] == 'pt'].reset_index()
df_pt

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,6720,6720,pt,https://upload.wikimedia.org/wikipedia/commons...,"A confluência do Cuando (no centro, esquerda) ...",Rio Cuando,Bibliografia
1,6721,6721,pt,https://upload.wikimedia.org/wikipedia/commons...,Braga do Monte do Picoto.,Braga,Fotografias e vistas panorâmicas
2,6722,6722,pt,https://upload.wikimedia.org/wikipedia/commons...,Mapa topográfico da Suíça,Suíça,Geografia
3,6723,6723,pt,https://upload.wikimedia.org/wikipedia/commons...,Casa numa pintura de Ettore Roesler Franz (c.1...,Casa di Ettore Fieramosca,História
4,6724,6724,pt,https://upload.wikimedia.org/wikipedia/commons...,No debate de sua posse como presidenta da Comu...,Cristina Cifuentes,Presidência da Comunidade de Madrid
...,...,...,...,...,...,...,...
835,7555,7555,pt,https://upload.wikimedia.org/wikipedia/commons...,O Eiger e o Mönch vistos de perto do Kleine Sc...,Eiger,Descrição
836,7556,7556,pt,https://upload.wikimedia.org/wikipedia/commons...,Pátio interior do Palazzo Medici Riccardi.,Palazzo Medici Riccardi,O museu dos Médici e a época contemporânea
837,7557,7557,pt,https://upload.wikimedia.org/wikipedia/commons...,"Centro de Edmonton, capital de Alberta.",Alberta,Municípios
838,7558,7558,pt,https://upload.wikimedia.org/wikipedia/commons...,Vista aérea de Vancouver,Vancouver,Geografia


In [49]:
captions_pt = []
captions_pt2 = []
for i in range(0, 800):
  img_url = df_pt['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'Título da página: {}, Seção Título: {}. Caption a imagem:'.format(df_pt['page_title'][i], df_pt['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_pt.append((df_pt['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Portuguese:'.format(df_pt['page_title'][i], df_pt['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_pt2.append((df_pt['caption_reference_description'][i],generated_text))

len(captions_pt)

758

In [64]:
with open('/content/drive/My Drive/ErwModul_24/results/pt_context_transl.txt', 'w') as writefile:
    for cap in captions_pt:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/pt_context.txt', 'w') as writefile:
    for cap in captions_pt2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

## Generate results for Traditional Chinese



In [51]:
df_zh_TW = df[df['language'] == 'zh-TW'].reset_index()
df_zh_TW

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,7560,7560,zh-TW,https://upload.wikimedia.org/wikipedia/commons...,殷志源出席2016 MelOn Music Awards,殷志源,水晶男孩重組
1,7561,7561,zh-TW,https://upload.wikimedia.org/wikipedia/commons...,從這部成書於1407年以教會拉丁語寫成的聖經可以看出，這個古羅馬的語言對後世文化（主要指歐洲...,拉丁语,晚期拉丁語
2,7562,7562,zh-TW,http://upload.wikimedia.org/wikipedia/commons/...,約1936年的科隆號,科隆号轻巡洋舰,第二次世界大戰
3,7563,7563,zh-TW,https://upload.wikimedia.org/wikipedia/commons...,2016年安徽各地級市地區生產總值,安徽各地级市地区生产总值列表,2016年
4,7564,7564,zh-TW,https://upload.wikimedia.org/wikipedia/commons...,平底煱的培根,煙肉,健康風險
...,...,...,...,...,...,...,...
835,8395,8395,zh-TW,https://upload.wikimedia.org/wikipedia/commons...,四合院被視為中國傳統文化的一部分。,文化,中文詞源
836,8396,8396,zh-TW,https://upload.wikimedia.org/wikipedia/commons...,蘭桂坊酒店九如坊入口,蘭桂坊酒店,酒店設施
837,8397,8397,zh-TW,https://upload.wikimedia.org/wikipedia/commons...,陶里克樓,青岛第十一中学,陶里克樓
838,8398,8398,zh-TW,https://upload.wikimedia.org/wikipedia/commons...,金庸批評中國不去收回蘇聯侵佔170萬平方公里的外東北（本圖）和外西北，卻要收回香港。有指這促...,金鐘 (編輯),後人分析金庸與鄧小平


In [52]:
captions_zh_TW = []
captions_zh_TW2 = []
for i in range(0, 800):
  img_url = df_zh_TW['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = '頁面標題: {},部分標題: {}. 字幕圖片:'.format(df_zh_TW['page_title'][i], df_zh_TW['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_zh_TW.append((df_zh_TW['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Chinese (Traditional):'.format(df_zh_TW['page_title'][i], df_zh_TW['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_zh_TW2.append((df_zh_TW['caption_reference_description'][i],generated_text))

len(captions_zh_TW)

748

In [65]:
with open('/content/drive/My Drive/ErwModul_24/results/zh_TW_context_transl.txt', 'w') as writefile:
    for cap in captions_zh_TW:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/zh_TW_context.txt', 'w') as writefile:
    for cap in captions_zh_TW2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

# Half-Time


## Generate results for Simplified Chinese



In [7]:
df_zh = df[df['language'] == 'zh'].reset_index()
df_zh

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,8400,8400,zh,https://upload.wikimedia.org/wikipedia/commons...,庄周梦蝶,庄子,文学贡献
1,8401,8401,zh,https://upload.wikimedia.org/wikipedia/commons...,申公豹,封神演義角色列表,二代弟子
2,8402,8402,zh,https://upload.wikimedia.org/wikipedia/commons...,瑞士卢塞恩，接触网维修工程车正在抢修接触网,無軌電車,缺点
3,8403,8403,zh,https://upload.wikimedia.org/wikipedia/commons...,卡祖笛组装步骤,卡祖笛,历史
4,8404,8404,zh,https://upload.wikimedia.org/wikipedia/commons...,1998年，正在庆祝成为双冠王的阿森纳女子队,阿森纳女子足球俱乐部,主场球场
...,...,...,...,...,...,...,...
835,9235,9235,zh,https://upload.wikimedia.org/wikipedia/commons...,配属上海机务段(沪)的和谐1D型电力机车第0147号牵引K79次列车进入株洲站,K79/80次列车,机车交路
836,9236,9236,zh,https://upload.wikimedia.org/wikipedia/commons...,数字相机用点色数来描述屏幕的精细程度，每个像素包含了红绿蓝三个点色数。,每英寸像素,数字相机
837,9237,9237,zh,https://upload.wikimedia.org/wikipedia/commons...,船上的花园及酒吧,喬治·華盛頓號郵輪,设计及建造
838,9238,9238,zh,https://upload.wikimedia.org/wikipedia/commons...,1941年3月，义军的M13/40坦克于的黎波里的街道上。,M13/40坦克,初次的行动


In [8]:
captions_zh = []
captions_zh2 = []
for i in range(0, 800):
  img_url = df_zh['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = '页面标题: {},部分标题: {}. 字幕图像:'.format(df_zh['page_title'][i], df_zh['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_zh.append((df_zh['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Chinese (Simplified):'.format(df_zh['page_title'][i], df_zh['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_zh2.append((df_zh['caption_reference_description'][i],generated_text))

len(captions_zh)

  return F.conv2d(input, weight, bias, self.stride,


740

In [9]:
with open('/content/drive/My Drive/ErwModul_24/results/zh_context_transl.txt', 'w') as writefile:
    for cap in captions_zh:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/zh_context.txt', 'w') as writefile:
    for cap in captions_zh2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/zh_context_transl.txt', 'w') as writefile:
    for cap in captions_zh:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/zh_context.txt', 'w') as writefile:
    for cap in captions_zh2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

## Generate results for Ukrainian



In [10]:
df_uk = df[df['language'] == 'uk'].reset_index()
df_uk

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,9240,9240,uk,https://upload.wikimedia.org/wikipedia/commons...,Вид на озеро Севан із траси Єреван—Тбілісі,Севан (озеро),Узбережжя
1,9241,9241,uk,https://upload.wikimedia.org/wikipedia/commons...,Крабоподібна туманність,Астрономія у Стародавньому Китаї,Відомі факти
2,9242,9242,uk,https://upload.wikimedia.org/wikipedia/commons...,Ганс Тільковскі у матчі групового етапу чемпіо...,Ганс Тільковскі,Виступи за збірну
3,9243,9243,uk,https://upload.wikimedia.org/wikipedia/commons...,Комплекс до перебудови. Фото 1870-х,Комплекс монастиря та костелу бернардинів (Луцьк),Перебудова
4,9244,9244,uk,https://upload.wikimedia.org/wikipedia/commons...,"Ден Хармон, автор серіалу",Спільнота (телесеріал),Критика
...,...,...,...,...,...,...,...
835,10075,10075,uk,https://upload.wikimedia.org/wikipedia/commons...,Замок Tyresö в 1661 році,Марія Їллєншерна,Біографія
836,10076,10076,uk,https://upload.wikimedia.org/wikipedia/commons...,"Староакадемічний корпус НаУКМА, де розташована...",Національний університет «Києво-Могилянська ак...,Наукова бібліотека
837,10077,10077,uk,https://upload.wikimedia.org/wikipedia/commons...,Пляж Аркадія ранньою весною,Приморський район (Одеса),Загальні відомості
838,10078,10078,uk,https://upload.wikimedia.org/wikipedia/commons...,Голосування в виборчому окрузі № 70 в Оломоуці...,Вибори до Сенату Чехії 2018,Результати


In [11]:
captions_uk = []
captions_uk2 = []
for i in range(0, 800):
  img_url = df_uk['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'Назва сторінки: {}, розділ Назва: {}. Підголовка зображення:'.format(df_uk['page_title'][i], df_uk['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_uk.append((df_uk['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Ukrainian:'.format(df_uk['page_title'][i], df_uk['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_uk2.append((df_uk['caption_reference_description'][i],generated_text))

len(captions_uk)

747

In [12]:
with open('/content/drive/My Drive/ErwModul_24/results/uk_context_transl.txt', 'w') as writefile:
    for cap in captions_uk:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/uk_context.txt', 'w') as writefile:
    for cap in captions_uk2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/uk_context_transl.txt', 'w') as writefile:
    for cap in captions_uk:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/uk_context.txt', 'w') as writefile:
    for cap in captions_uk2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

## Generate results for Dutch



In [13]:
df_nl = df[df['language'] == 'nl'].reset_index()
df_nl

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,10080,10080,nl,https://upload.wikimedia.org/wikipedia/commons...,Grafkapel van de familie De Smeth te Alphen aa...,De Smeth,Enkele telgen
1,10081,10081,nl,https://upload.wikimedia.org/wikipedia/commons...,Keizerin Maria Theresia,Geschiedenis van Brussel,Van Spanje naar Oostenrijk
2,10082,10082,nl,https://upload.wikimedia.org/wikipedia/commons...,Timo Mäkinen\noverleden op 4 mei 2017,Lijst van personen overleden in 2017/Voorstel,Mei
3,10083,10083,nl,https://upload.wikimedia.org/wikipedia/commons...,Een jong dat de inkeping boven de vestibule va...,Pinacosaurus,Onderscheidende kenmerken
4,10084,10084,nl,https://upload.wikimedia.org/wikipedia/commons...,Thomas Isaacq (?-1539-40) (knielend op het lin...,Wapenkoning,Nederland
...,...,...,...,...,...,...,...
835,10915,10915,nl,https://upload.wikimedia.org/wikipedia/commons...,dagbouw Schöningen in 2012,Dagbouw Schöningen,Externe link
836,10916,10916,nl,https://upload.wikimedia.org/wikipedia/commons...,Zicht op het federaal paleis in Bern in 1857.,Geschiedenis van Zwitserland,Federale grondwet van 1848
837,10917,10917,nl,https://upload.wikimedia.org/wikipedia/commons...,"satellietfoto, die de vorming van meren toont ...",Terugtrekking van gletsjers sinds 1850,Azië
838,10918,10918,nl,https://upload.wikimedia.org/wikipedia/commons...,De Amateurtuindersvereniging Utrecht-Zuid best...,Lunetten (Utrecht),Groen


In [14]:
captions_nl = []
captions_nl2 = []
for i in range(0, 800):
  img_url = df_nl['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'Titel pagina: {}, sectie titel: {}. Onderschrift van de afbeelding:'.format(df_nl['page_title'][i], df_nl['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_nl.append((df_nl['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Dutch:'.format(df_nl['page_title'][i], df_nl['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_nl2.append((df_nl['caption_reference_description'][i],generated_text))

len(captions_nl)

767

In [15]:
with open('/content/drive/My Drive/ErwModul_24/results/nl_context_transl.txt', 'w') as writefile:
    for cap in captions_nl:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/nl_context.txt', 'w') as writefile:
    for cap in captions_nl2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/nl_context_transl.txt', 'w') as writefile:
    for cap in captions_nl:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/nl_context.txt', 'w') as writefile:
    for cap in captions_nl2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

## Generate results for Czech



In [16]:
df_cs = df[df['language'] == 'cs'].reset_index()
df_cs

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,10920,10920,cs,https://upload.wikimedia.org/wikipedia/commons...,Psychrometr,Psychrometr,Augustův psychrometr
1,10921,10921,cs,https://upload.wikimedia.org/wikipedia/commons...,Krakovský groš (avers),Groš,Odkazy
2,10922,10922,cs,https://upload.wikimedia.org/wikipedia/commons...,Srbské okresy,Srbsko,Administrativní dělení Srbska
3,10923,10923,cs,https://upload.wikimedia.org/wikipedia/commons...,"Autoportrét, 1962",Jean-Louis Swiners,Fotograf a fotoreportér
4,10924,10924,cs,http://upload.wikimedia.org/wikipedia/commons/...,Schéma schránky rozsivek,Rozsivky,Základní informace
...,...,...,...,...,...,...,...
835,11755,11755,cs,http://upload.wikimedia.org/wikipedia/commons/...,Slovenský Národný Dom,Kysáč,Historie
836,11756,11756,cs,https://upload.wikimedia.org/wikipedia/commons...,Hlava trubce,Trubec,Vzhled trubce
837,11757,11757,cs,http://upload.wikimedia.org/wikipedia/commons/...,Nákres ponorky Sankt Petersburg,Projekt 677 Lada,Pozadí vzniku
838,11758,11758,cs,https://upload.wikimedia.org/wikipedia/commons...,Reliéf postavy na Tichém náměstí v Třebíči.,Jiří Svoboda (výtvarník),Biografie


In [17]:
captions_cs = []
captions_cs2 = []
for i in range(0, 800):
  img_url = df_cs['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'Název stránky: {}, Sekce Název: {}. Podpis obrázku:'.format(df_cs['page_title'][i], df_cs['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_cs.append((df_cs['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Czech:'.format(df_cs['page_title'][i], df_cs['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_cs2.append((df_cs['caption_reference_description'][i],generated_text))

len(captions_cs)



767

In [18]:
with open('/content/drive/My Drive/ErwModul_24/results/cs_context_transl.txt', 'w') as writefile:
    for cap in captions_cs:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/cs_context.txt', 'w') as writefile:
    for cap in captions_cs2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/cs_context_transl.txt', 'w') as writefile:
    for cap in captions_cs:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/cs_context.txt', 'w') as writefile:
    for cap in captions_cs2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

## Generate results for Catalan



In [19]:
df_ca = df[df['language'] == 'ca'].reset_index()
df_ca

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,11760,11760,ca,https://upload.wikimedia.org/wikipedia/commons...,"Una al·legoria medieval comú de la follia, pin...",El Boig,Com a trumfo major
1,11761,11761,ca,https://upload.wikimedia.org/wikipedia/commons...,"Josep i la dona de Putifar, cap a 1645, oli so...",Bartolomé Esteban Murillo,Recepció i valoració crítica
2,11762,11762,ca,https://upload.wikimedia.org/wikipedia/commons...,Anatoli Papànov el 1942,Anatoli Papànov,Primers anys i la guerra amb Alemanya
3,11763,11763,ca,https://upload.wikimedia.org/wikipedia/commons...,Desembussador amb bomba d'alta pressió profess...,Desembussador actiu,Desembussadors mecànics
4,11764,11764,ca,https://upload.wikimedia.org/wikipedia/commons...,Antiga Casa de la Vila.,Mollet del Vallès,Lloc i edificis d'interès
...,...,...,...,...,...,...,...
835,12595,12595,ca,https://upload.wikimedia.org/wikipedia/commons...,"Exèrcit rus (1756-1762), incloent-hi un granad...",Història dels uniformes militars,Segle XVIII
836,12596,12596,ca,https://upload.wikimedia.org/wikipedia/commons...,Entrega Premi Ferran Termes 2015,Jaume Alonso-Cuevillas i Sayrol,Biografia
837,12597,12597,ca,https://upload.wikimedia.org/wikipedia/commons...,"Soldat de l'Afrika Korps, protegint-se de les ...",Heer,La campanya del nord d'Àfrica
838,12598,12598,ca,http://upload.wikimedia.org/wikipedia/commons/...,Sala principal del museu amb funció de distrib...,Museu Nacional d'Art de Catalunya,75è aniversari (2009)


In [20]:
captions_ca = []
captions_ca2 = []
for i in range(0, 800):
  img_url = df_ca['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'Titull de pàgina: {}, Secció Titull: {}. Capció de la imatge:'.format(df_ca['page_title'][i], df_ca['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_ca.append((df_ca['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Catalan:'.format(df_ca['page_title'][i], df_ca['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_ca2.append((df_ca['caption_reference_description'][i],generated_text))

len(captions_ca)

765

In [21]:
with open('/content/drive/My Drive/ErwModul_24/results/ca_context_transl.txt', 'w') as writefile:
    for cap in captions_ca:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/ca_context.txt', 'w') as writefile:
    for cap in captions_ca2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/ca_context_transl.txt', 'w') as writefile:
    for cap in captions_ca:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/ca_context.txt', 'w') as writefile:
    for cap in captions_ca2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

## Generate results for Hungarian



In [22]:
df_hu = df[df['language'] == 'hu'].reset_index()
df_hu

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,12600,12600,hu,https://upload.wikimedia.org/wikipedia/commons...,Az állat rekonstrukciója,Baryonyx,Megjelenése
1,12601,12601,hu,https://upload.wikimedia.org/wikipedia/commons...,Ezana felirata Meroéban,Ezana akszúmi király,Külpolitika
2,12602,12602,hu,https://upload.wikimedia.org/wikipedia/commons...,A Crait bolygón játszódó csatajeleneteket Sala...,Star Wars: Az utolsó Jedik,Gyártás és forgatás
3,12603,12603,hu,https://upload.wikimedia.org/wikipedia/commons...,Az Északi Flotta jelvénye,Az Oroszországi Föderáció katonai körzetei,Az Északi Flotta
4,12604,12604,hu,https://upload.wikimedia.org/wikipedia/commons...,Eugene Bullard,Hadiérem (Franciaország),Nevezetes kitüntetettek
...,...,...,...,...,...,...,...
835,13435,13435,hu,https://upload.wikimedia.org/wikipedia/commons...,Mivel a tartósítási módszerek során a különböz...,Koboldcápa,Rendszertani besorolása
836,13436,13436,hu,https://upload.wikimedia.org/wikipedia/commons...,"Németországi fellépése, 2014. július 19-én",Anastacia,Resurrection (2014–2015)
837,13437,13437,hu,https://upload.wikimedia.org/wikipedia/commons...,"A HMS Calcutta (1831) galériája (Portsmouth), ...",Rudyard Kipling,Gyermekkora és ifjúsága
838,13438,13438,hu,https://upload.wikimedia.org/wikipedia/commons...,A rendészeti iroda avatása,Solymár,Mentőszolgálat


In [23]:
captions_hu = []
captions_hu2 = []
for i in range(0, 800):
  img_url = df_hu['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'Oldalakcím: {}, szakaszcím: {}. A kép aláírása:'.format(df_hu['page_title'][i], df_hu['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_hu.append((df_hu['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Hungarian:'.format(df_hu['page_title'][i], df_hu['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_hu2.append((df_hu['caption_reference_description'][i],generated_text))

len(captions_hu)

665

In [24]:
with open('/content/drive/My Drive/ErwModul_24/results/hu_context_transl.txt', 'w') as writefile:
    for cap in captions_hu:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/hu_context.txt', 'w') as writefile:
    for cap in captions_hu2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/hu_context_transl.txt', 'w') as writefile:
    for cap in captions_hu:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/hu_context.txt', 'w') as writefile:
    for cap in captions_hu2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

## Generate results for Arabic



In [25]:
df_ar = df[df['language'] == 'ar'].reset_index()
df_ar

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,13440,13440,ar,https://upload.wikimedia.org/wikipedia/commons...,فرديناند الثاني، إمبراطور روماني مقدس,الانتخابات الإمبراطورية,انتخابات 1619
1,13441,13441,ar,http://upload.wikimedia.org/wikipedia/commons/...,خربة قمران قرب أريحا، فلسطين.,البحر الميت,التاريخ
2,13442,13442,ar,https://upload.wikimedia.org/wikipedia/commons...,يقطينات هالوين,خريف,الارتباطات
3,13443,13443,ar,https://upload.wikimedia.org/wikipedia/commons...,خريطة لمستخدمون الحاليون لإم-48 باتون باللون ا...,إم-48 باتون,المستخدمون
4,13444,13444,ar,https://upload.wikimedia.org/wikipedia/commons...,مخطوطة يصف فيها الإدريسي فنلندا,الإدريسي,نشأته
...,...,...,...,...,...,...,...
835,14275,14275,ar,https://upload.wikimedia.org/wikipedia/commons...,نبتة الزعيتيرة,الزعيتيرة,صور لنبتة الزعيتيرة
836,14276,14276,ar,https://upload.wikimedia.org/wikipedia/commons...,خريطة تصويت النواب على التعديل الذي يمدد زواج ...,زواج المثليين في أيرلندا الشمالية,التقنين عبر برلمان وستمنستر (2019)
837,14277,14277,ar,https://upload.wikimedia.org/wikipedia/commons...,الأمير محمد بن نايف آل سعود مع وزيرة الخارجية ...,محمد بن نايف بن عبد العزيز آل سعود,ولايته للعهد
838,14278,14278,ar,https://upload.wikimedia.org/wikipedia/commons...,مبنى مشرعين ولاية رود آيلاند,رود آيلاند,القانون والحكومة


In [26]:
captions_ar = []
captions_ar2 = []
for i in range(0, 800):
  img_url = df_ar['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'عنوان الصفحة: {}، عنوان القسم: {}. تسمية توضيحية للصورة:'.format(df_ar['page_title'][i], df_ar['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_ar.append((df_ar['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Arabic:'.format(df_ar['page_title'][i], df_ar['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_ar2.append((df_ar['caption_reference_description'][i],generated_text))

len(captions_ar)



731

In [27]:
with open('/content/drive/My Drive/ErwModul_24/results/ar_context_transl.txt', 'w') as writefile:
    for cap in captions_ar:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/ar_context.txt', 'w') as writefile:
    for cap in captions_ar2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/ar_context_transl.txt', 'w') as writefile:
    for cap in captions_ar:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/ar_context.txt', 'w') as writefile:
    for cap in captions_ar2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

## Generate results for Hebrew



In [28]:
df_iw = df[df['language'] == 'iw'].reset_index()
df_iw

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,14280,14280,iw,https://upload.wikimedia.org/wikipedia/commons...,דגל הצבא,הכוחות המזוינים של סלובניה,היסטוריה
1,14281,14281,iw,https://upload.wikimedia.org/wikipedia/commons...,"וושינגטון בגיל ארבעים, 1772",ג'ורג' וושינגטון,בין המלחמות: מאונט ורנון (1759–1774)
2,14282,14282,iw,https://upload.wikimedia.org/wikipedia/commons...,במעגל הקווינטות ניתן לראות שכל שלושה צעדים ימי...,סולם יחסי,ניתוח ודוגמאות
3,14283,14283,iw,https://upload.wikimedia.org/wikipedia/commons...,שורשים של המשוואה לרמות אנרגיה מקוונטטות בבור ...,בור פוטנציאל סופי,מציאת פונקציות הגל עבור המצב הקשור
4,14284,14284,iw,https://upload.wikimedia.org/wikipedia/commons...,בניית תא שיט בתעלה ב-1906,הפארק הלאומי אוורגליידס,פיתוח הקרקע ושימורה
...,...,...,...,...,...,...,...
835,15115,15115,iw,https://upload.wikimedia.org/wikipedia/commons...,"מראה בתצוגה ""הסודות הכמוסים של בון"", המוזיאון ...",בון (קומיקס),עלילה
836,15116,15116,iw,https://upload.wikimedia.org/wikipedia/commons...,"המתאבק ולנטין רייצ'ב (למטה), במהלך קרב הגמר במ...",בולגריה במשחקים האולימפיים,אולימפיאדת מוסקבה (1980)
837,15117,15117,iw,https://upload.wikimedia.org/wikipedia/commons...,"נס קצין בכיר, לו היה זכאי לשם בתפקידו כמפקד שי...",ישראל לשם,פיקוד על שייטת הצוללות
838,15118,15118,iw,https://upload.wikimedia.org/wikipedia/commons...,"מבצר קופשטיין, בתמונה משנת 1889 מאת הצייר הפול...",אן-ז'וזף טרואן דה מריקור,ההגליה והמעצר


In [29]:
captions_iw = []
captions_iw2 = []
for i in range(0, 800):
  img_url = df_iw['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'כותרת עמוד: {}, כותרת מקטע: {}. כיתוב התמונה:'.format(df_iw['page_title'][i], df_iw['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_iw.append((df_iw['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Hebrew:'.format(df_iw['page_title'][i], df_iw['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_iw2.append((df_iw['caption_reference_description'][i],generated_text))

len(captions_iw)



768

In [30]:
with open('/content/drive/My Drive/ErwModul_24/results/iw_context_transl.txt', 'w') as writefile:
    for cap in captions_iw:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/iw_context.txt', 'w') as writefile:
    for cap in captions_iw2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/iw_context_transl.txt', 'w') as writefile:
    for cap in captions_iw:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/iw_context.txt', 'w') as writefile:
    for cap in captions_iw2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

## Generate results for Swedish



In [31]:
df_sv = df[df['language'] == 'sv'].reset_index()
df_sv

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,15120,15120,sv,http://upload.wikimedia.org/wikipedia/commons/...,Bilden visar en cotehardie,Tudortiden,Mode
1,15121,15121,sv,https://upload.wikimedia.org/wikipedia/commons...,Sängkammarinteriören,Clason-Hebbeska huset,Interiörbilder
2,15122,15122,sv,https://upload.wikimedia.org/wikipedia/commons...,Lastning av spannmål på ett kinesiskt bulkfart...,Hamn,Sverige
3,15123,15123,sv,http://upload.wikimedia.org/wikipedia/commons/...,Ett montage som visar hur tennfigurer gjuts.,Tennfigur,Tillverkning
4,15124,15124,sv,http://upload.wikimedia.org/wikipedia/commons/...,"I Port Moody, British Columbia, Kanada.",Grå strömstare,Utseende och läte
...,...,...,...,...,...,...,...
835,15955,15955,sv,https://upload.wikimedia.org/wikipedia/commons...,"Vattenfall och ""Jerusalems kvarm i Röttleån",Gränna socken,Geografi
836,15956,15956,sv,https://upload.wikimedia.org/wikipedia/commons...,Olaus Magnus karta Carta Marina från 1539.,Valross,Valrossen och människan
837,15957,15957,sv,https://upload.wikimedia.org/wikipedia/commons...,Utrikesminister Daniel Webster,Millard Fillmore,Ministrar
838,15958,15958,sv,http://upload.wikimedia.org/wikipedia/commons/...,"Landegode fyr, Bodø.",Fyrar i Norge,Nordland


In [32]:
captions_sv = []
captions_sv2 = []
for i in range(0, 800):
  img_url = df_sv['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'Sidans titel: {}, avsnittets titel: {}. Bildtext till bilden:'.format(df_sv['page_title'][i], df_sv['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_sv.append((df_sv['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Swedish:'.format(df_sv['page_title'][i], df_sv['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_sv2.append((df_sv['caption_reference_description'][i],generated_text))

len(captions_sv)

765

In [33]:
with open('/content/drive/My Drive/ErwModul_24/results/sv_context_transl.txt', 'w') as writefile:
    for cap in captions_sv:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/sv_context.txt', 'w') as writefile:
    for cap in captions_sv2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/sv_context_transl.txt', 'w') as writefile:
    for cap in captions_sv:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/sv_context.txt', 'w') as writefile:
    for cap in captions_sv2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

## Generate results for Vietnamese



In [34]:
df_vi = df[df['language'] == 'vi'].reset_index()
df_vi

Unnamed: 0.1,index,Unnamed: 0,language,image_url,caption_reference_description,page_title,section_title
0,15960,15960,vi,https://upload.wikimedia.org/wikipedia/commons...,"Trao đổi nhiệt qua bộ trung gian, kết hợp dự t...",Trao đổi địa nhiệt,Dùng bộ trao đổi trung gian
1,15961,15961,vi,https://upload.wikimedia.org/wikipedia/commons...,Tháp Luân Đôn là một lâu đài thời trung cổ lịc...,Luân Đôn,Kiến trúc
2,15962,15962,vi,https://upload.wikimedia.org/wikipedia/commons...,Lothar Matthäus đã chơi 25 trận đấu World Cup ...,Giải vô địch bóng đá thế giới,Các kỷ lục và thống kê
3,15963,15963,vi,https://upload.wikimedia.org/wikipedia/commons...,Thành Châu Đốc trong bản đồ Nam Kỳ Lục Tỉnh (B...,Châu Đốc,Nguồn gốc tên gọi
4,15964,15964,vi,https://upload.wikimedia.org/wikipedia/commons...,Joan Smalls,Danh sách người mẫu của Victoria's Secret,Người mẫu xuất hiện trong các show của Victori...
...,...,...,...,...,...,...,...
835,16795,16795,vi,https://upload.wikimedia.org/wikipedia/commons...,"Núi Sugarloaf, Rio de Janeiro được sử dụng cho...",The Amazing Race 2,Chặng 1 (Hoa Kỳ → Brasil)
836,16796,16796,vi,https://upload.wikimedia.org/wikipedia/commons...,Septic river.,Ô nhiễm biển,Nguồn xả trực tiếp
837,16797,16797,vi,https://upload.wikimedia.org/wikipedia/commons...,Phục nguyên P. grangeri,Platybelodon,Mô tả
838,16798,16798,vi,http://upload.wikimedia.org/wikipedia/commons/...,Một góc thành phố Huế bị tàn phá trơ trụi. Ảnh...,Sự kiện Tết Mậu Thân,Tại Huế


In [35]:
captions_vi = []
captions_vi2 = []
for i in range(0, 800):
  img_url = df_vi['image_url'][i]
  try:
    img_path, _ = urllib.request.urlretrieve(img_url)

    # load image
    image = Image.open(img_path).convert('RGB')
  except:
    continue

  prompt = 'Trang tiêu đề: {}, Phần tiêu đề: {}. Đăng chú ý hình ảnh:'.format(df_vi['page_title'][i], df_vi['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_vi.append((df_vi['caption_reference_description'][i],generated_text))

  prompt = 'Page Title: {}, Section Title: {}. Caption the image in Vietnamese:'.format(df_vi['page_title'][i], df_vi['section_title'][i])
  inputs = processor(image, text=prompt, return_tensors='pt').to(device, torch.float16)

  generated_ids = model.generate(**inputs, max_new_tokens=30)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  captions_vi2.append((df_vi['caption_reference_description'][i],generated_text))

len(captions_vi)

735

In [36]:
with open('/content/drive/My Drive/ErwModul_24/results/vi_context_transl.txt', 'w') as writefile:
    for cap in captions_vi:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/vi_context.txt', 'w') as writefile:
    for cap in captions_vi2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n|||\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/vi_context_transl.txt', 'w') as writefile:
    for cap in captions_vi:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')

with open('/content/drive/My Drive/ErwModul_24/results/backup/vi_context.txt', 'w') as writefile:
    for cap in captions_vi2:
        writefile.write(cap[0]+'\n')
        writefile.write('|\n')
        writefile.write(cap[1]+'\n\n')