In [1]:
import pandas as pd

# load the manually curated caption_url csv
# contains image paths and descriptions in english
df = pd.read_csv("caption_url.csv")
print(df)

                                              caption  \
0         an empty chair sitting in front of a window   
1   a house under construction with a building bei...   
2            a little boy that is sitting in a basket   
3         a man in an apron working on crates of fish   
4   a view from a bridge over a river in central park   
..                                                ...   
95  santa claus, standing at the base of a christm...   
96      an old white house with a tree in front of it   
97  the stained glass window in the cathedral at t...   
98  three men in their firemen's uniforms standing...   
99                 a lake with water in the mountains   

                                                  url  
0   http://static.flickr.com/2723/4385058960_b0f29...  
1   http://static.flickr.com/3074/2965635243_d45c3...  
2   http://static.flickr.com/188/431442759_e0c519a...  
3   http://static.flickr.com/3276/2989279316_9e8e3...  
4   http://static.flickr.com/2691/4

In [2]:
lang = input('Enter language code (ur for Urdu, fr for french, zh for chineese):')

In [3]:
# used a lightweight model for translation purposes
# it should support major langauges
# I tested it for urdu
import torch
from transformers import pipeline

pipeline = pipeline(
    task="translation",
    model= f"Helsinki-NLP/opus-mt-en-{lang}")

  from .autonotebook import tqdm as notebook_tqdm
Device set to use cpu


In [10]:
# create a new dataset for the updated caption and cosine thingy
translated_df = pd.DataFrame(columns=['eng_caption', 'translated_caption', 'url'])

In [5]:
# load the embedder
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

model = SentenceTransformer('sentence-transformers/LaBSE')

In [6]:
# takes the english caption and the translator model and returns the translated string
def translate_caption(en_caption, translator):
    result = translator(en_caption, max_length=512)
    return result[0]['translation_text']

# takes the original english caption, translated caption, the embedding model and finds
# cosine similarity
def compute_similarity(en_caption, translated_caption, model):
    embeddings = model.encode([en_caption, translated_caption])
    similarity = cosine_similarity(embeddings[0].reshape(1, -1) , embeddings[1].reshape(1, -1))[0][0]
    return similarity

In [11]:
# iterate over the original data frame
# get translation against each row, calculate its similarity
# if similarity score is above or equal to 0.75, append it to the new dataframe else ignore
for index, row in df.iterrows():
    translated_caption = translate_caption(row['caption'], pipeline)
    similarity = compute_similarity(row['caption'], translated_caption, model)
    # print(translated_caption)
    # print(similarity)
    if similarity >= 0.75:
        new_row = pd.DataFrame({"url": [row['url']], "eng_caption": [row['caption']], "translated_caption": [translated_caption]})
        translated_df = pd.concat([translated_df, new_row], ignore_index=True)

In [12]:
print(translated_df.head())
# save the dataframe as a csv
translated_df.to_csv('translated_captions.csv')

                                         eng_caption  \
0        an empty chair sitting in front of a window   
1  a house under construction with a building bei...   
2           a little boy that is sitting in a basket   
3        a man in an apron working on crates of fish   
4  a view from a bridge over a river in central park   

                                  translated_caption  \
0  کھڑکی کے سامنے تختوں پر ایک چھوٹا سا تخت بیٹھت...   
1       تعمیر کے تحت تعمیر کا گھر تعمیر کِیا گیا تھا   
2              ایک چھوٹا لڑکا ایک ٹوکری میں بیٹھا ہے   
3  مچھلیوں میں ایک آدمی مچھلی کے جال میں کام کرتا ہے   
4     وسطی پارک کے ایک سرے سے لیکر وسطی پارک کا منظر   

                                                 url  
0  http://static.flickr.com/2723/4385058960_b0f29...  
1  http://static.flickr.com/3074/2965635243_d45c3...  
2  http://static.flickr.com/188/431442759_e0c519a...  
3  http://static.flickr.com/3276/2989279316_9e8e3...  
4  http://static.flickr.com/2691/4304191133_cbbf8..