In [10]:
import json
import re
from googletrans import Translator
import time

# File path
file = "translated_file_2.json"

# Initialize translator globally
translator = Translator()

# Function to check for Unicode escape sequences
def contains_unicode_escape(s):
    # Adjusted regex to allow for optional spaces between \ and u
    pattern = r'\\\s*u[0-9a-fA-F]{4}|\\\s*U[0-9a-fA-F]{8}'
    return bool(re.search(pattern, s))

# Function to decode Unicode escape sequences into characters
def decode_unicode_escape(s):
    try:
        # Decode Unicode escape sequences
        return s.encode('utf-8').decode('unicode_escape')
    except UnicodeDecodeError:
        # Handle case where decoding fails
        print(f"Unicode decode error in text: {s}")
        return s  # Return the original text in case of failure

# New function: Remove spaces and decode Unicode escape sequences
def fix_and_decode_unicode(s):
    # Remove spaces between backslashes and 'u' (or 'U') in Unicode escapes
    fixed_string = re.sub(r'\\\s*u', r'\\u', s)
    fixed_string = re.sub(r'\\\s*U', r'\\U', fixed_string)
    
    # Decode Unicode escape sequences
    try:
        decoded_string = fixed_string.encode('utf-8').decode('unicode_escape')
    except UnicodeDecodeError as e:
        print(f"Error decoding: {e}")
        decoded_string = fixed_string  # Fall back to original string if decode fails
    
    return decoded_string

# Function to translate text to English and handle rate limits
def translate_to_english(text):
    try:
        if not text.strip():  # Skip empty strings
            return text
        # API request to Google Translate
        translation = translator.translate(text, dest='en')
        return translation.text
    except Exception as e:
        print(f"Error translating '{text}': {e}")
        # Wait and retry after 5 seconds if there's an issue with the API
        time.sleep(5)
        try:
            translation = translator.translate(text, dest='en')
            return translation.text
        except Exception as retry_error:
            print(f"Retry failed for '{text}': {retry_error}")
            return text  # Return the original text in case of failure

# Load JSON data from file
with open(file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Iterate through each document in the JSON array
for i, item in enumerate(data):
    for field in ['category']:  # Process only the 'category' field
        if field in item and item[field] is not None and isinstance(item[field], str):
            # Check for Unicode escape sequences
            if contains_unicode_escape(item[field]):
                # Fix Unicode escapes and decode, then translate
                fixed_value = fix_and_decode_unicode(item[field])
                translated_value = translate_to_english(fixed_value)
                item[field] = translated_value
            else:
                # Directly translate if no Unicode escape is found
                translated_value = translate_to_english(item[field])
                item[field] = translated_value

    # Save progress every 1000 items to prevent data loss
    if i % 1000 == 0:
        with open('translated_file_partial.json', 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=4)
        print(f"Progress saved at item {i}")

    time.sleep(0.1)  # Add a short delay to avoid rate limiting

# Final save to translated file
with open('translated_file_3.json', 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=4)

print("Translation completed and saved.")


Progress saved at item 0
Error decoding: 'unicodeescape' codec can't decode bytes in position 11-15: truncated \uXXXX escape
Error translating 'not found': 'Translator' object has no attribute 'raise_Exception'
Error translating 'Wedding rings': 'Translator' object has no attribute 'raise_Exception'
Error translating 'Male rings': 'Translator' object has no attribute 'raise_Exception'
Error translating 'not found': 'Translator' object has no attribute 'raise_Exception'
Error translating 'Men’s Wedding Rings': 'Translator' object has no attribute 'raise_Exception'
Error translating 'Men's alliances': 'Translator' object has no attribute 'raise_Exception'
Error translating 'not found': 'Translator' object has no attribute 'raise_Exception'
Error translating 'Men's rings': 'Translator' object has no attribute 'raise_Exception'
Error translating 'not found': 'Translator' object has no attribute 'raise_Exception'
Error translating 'Exclusive Deals': 'Translator' object has no attribute 'rai