In [70]:
# ! pip install datasets tqdm

In [71]:
import collections
import random
import os
import time
import json
from PIL import Image
import io
import urllib
import uuid
from concurrent.futures import ThreadPoolExecutor
from functools import partial

import numpy as np
from tqdm import tqdm
from datasets import load_dataset
from datasets.utils.file_utils import get_datasets_user_agent
import matplotlib.pyplot as plt
import tensorflow as tf
from datasets import load_dataset

In [72]:
! rm -rf images
! mkdir images

USER_AGENT = get_datasets_user_agent()

def fetch_single_image(image_url, timeout=None, retries=0):
    request = urllib.request.Request(
        image_url,
        data=None,
        headers={"user-agent": USER_AGENT},
    )
    with urllib.request.urlopen(request, timeout=timeout) as req:
        if 'png' in image_url:
          png = Image.open(io.BytesIO(req.read())).convert('RGBA')
          png.load() # required for png.split()
          background = Image.new("RGB", png.size, (255, 255, 255))
          background.paste(png, mask=png.split()[3]) # 3 is the alpha channel
          background = background.resize((100,100))
          image_id = str(uuid.uuid4())
          image_path = "images/" + image_id + ".jpg"
          background.save(image_path, 'JPEG', quality=80)
        else:
          image = Image.open(io.BytesIO(req.read()))
          image = image.resize((100,100))
          image_id = str(uuid.uuid4())
          image_path = "images/" + image_id + ".jpg"
          image.save(image_path)
    return image_path

def fetch_images(batch, num_threads, timeout=None, retries=3):
    fetch_single_image_with_args = partial(fetch_single_image, timeout=timeout, retries=retries)
    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        batch["image_path"] = list(executor.map(fetch_single_image_with_args, batch["image_url"]))
    return batch



In [73]:
df_test = load_dataset("csv", data_files='test.csv', download_mode='force_redownload')
df_test = df_test.filter(lambda f:f['ISO639-3'] == 'tha')

num_threads = 20
df_test = df_test.map(fetch_images, batched=True, batch_size=100, fn_kwargs={"num_threads": num_threads})



Downloading and preparing dataset csv/default (download: 34.65 KiB, generated: 36.21 KiB, post-processed: Unknown size, total: 70.86 KiB) to /root/.cache/huggingface/datasets/csv/default-78431c32af159c58/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/200 [00:00<?, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-78431c32af159c58/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [74]:
pip install azure-cognitiveservices-vision-computervision

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [75]:
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials

region = 'eastus'
key = '2fa1e04ff2124cb99129d6a9638d6152'

credentials = CognitiveServicesCredentials(key)
client = ComputerVisionClient(
    endpoint="https://" + region + ".api.cognitive.microsoft.com/",
    credentials=credentials
)

In [76]:
def cap_image(image):
  tags = client.describe_image_in_stream(image)
  return tags.captions[0].text

In [77]:
prediction = {}
i=0
for image_path in tqdm(df_test['train']['image_path']):
    try:
      local_image = open(image_path, "rb")
      prediction[df_test['train']['Id'][i]] = (cap_image(local_image))
    except:
      print(df_test['train']['Id'][i])
      time.sleep(1)
    i = i+1

100%|██████████| 69/69 [00:31<00:00,  2.21it/s]


In [78]:
import requests, uuid, json

# Add your key and endpoint
key = "7a93535c1e4548218adc1e77b43d994b"
endpoint = "https://api.cognitive.microsofttranslator.com"

# location, also known as region.
# required if you're using a multi-service or regional (not global) resource. It can be found in the Azure portal on the Keys and Endpoint page.
location = "eastus"

path = '/translate'
constructed_url = endpoint + path

headers = {
    'Ocp-Apim-Subscription-Key': key,
    # location required if you're using a multi-service or regional (not global) resource.
    'Ocp-Apim-Subscription-Region': location,
    'Content-type': 'application/json',
    'X-ClientTraceId': str(uuid.uuid4())
}

In [79]:
params = {
    'api-version': '3.0',
    'from': ['en'],
    'to': ['th']
}

body = []
for k,x in enumerate(prediction):
  body.append({'text': prediction[x]})

request = requests.post(constructed_url, params=params, headers=headers, json=body)
response = request.json()

In [80]:
captions_thai = []

for _ in tqdm(response):
  captions_thai.append(_['translations'][0]['text'])

100%|██████████| 69/69 [00:00<00:00, 271743.64it/s]


In [81]:
import pandas as pd

submission = pd.DataFrame(list(zip(df_test['train']['Id'], captions_thai)), columns = ['Id', 'Predicted'])
submission.shape

submission.to_csv('tha_azure_translation.csv', index=0)

In [82]:
df = df_test.filter(lambda f:f['Id'] == '0cbcf57f-5726-4184-bb15-13ac7f067ae3_tha')
df['train']['image_path']

  0%|          | 0/1 [00:00<?, ?ba/s]

['images/9fa7ffd1-3a5e-40f3-8101-713ae1209cae.jpg']

In [83]:
local_image = open(image_path, "rb")