In [None]:
!pip install googletrans==4.0.0-rc1

Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2025.1.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting httpcore==0.9.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading httpcore-0.9.1-py3-none-any.whl.metadata (4.6 kB)
Collecting h11<0.10,>=0.8 (from httpcore==0.9.*->httpx==0.13.3->googl

In [None]:
import pandas as pd
from googletrans import Translator
from json.decoder import JSONDecodeError
import time
import random

In [None]:
class CustomTranslator(Translator):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.raise_Exception = False

    def _translate(self, text, dest, src):
        return super()._translate(text, dest, src)
def translate_to_english(text):
    translator = CustomTranslator()
    if not text or not isinstance(text, str) or text.isspace():
        return text

    detection = translator.detect(text)
    if detection.lang != 'en':
        try:
            # Introduce a delay and retry mechanism
            for _ in range(3):  # Retry up to 3 times
                try:
                    translation = translator.translate(text, src=detection.lang, dest='en')
                    return translation.text if translation else text
                except JSONDecodeError:
                    # Wait for a random interval before retrying
                    wait_time = random.uniform(3, 8)  # Wait between 1 and 5 seconds
                    print(f"JSONDecodeError encountered. Retrying in {wait_time:.2f} seconds...")
                    time.sleep(wait_time)
            # If all retries fail, return original text
            print(f"Translation failed after multiple retries for text: {text}")
            return text
        except Exception as e:
            print(f"Translation error: {e}, Text: {text}")
            return text
    else:
        return text

In [None]:
df_non_english_50 = pd.read_csv('df_non_english_50%.csv')

In [None]:
df_non_english_50.head()

Unnamed: 0,id,name,description,neighborhood_overview,host_about,is_english,name_translated,description_translated
0,275343,10min walk to MRT & a Cozy Room with window! (1),**IMPORTANT NOTES: READ BEFORE YOU BOOK! <br ...,,K2 Guesthouse is designed for guests who want ...,False,10min walk to MRT & a Cozy Room with window! (1),**IMPORTANT NOTES: READ BEFORE YOU BOOK! <br ...
1,275344,15 mins to Outram MRT Single Room (2),Lovely home for the special guest !,Bus stop <br />Food center <br />Supermarket,K2 Guesthouse is designed for guests who want ...,False,15 mins to Outram MRT Single Room (2),Lovely home for the special guest !
2,654191,Partial-Ensuite bedroom+Living room @ farrerpark,This Room's features include:<br />+ Personal ...,- Apartment is within area of petain road/kitc...,"Managing 88 rooms of various budgets, types an...",False,Partial-Ensuite bedroom+Living room @ farrerpark,This Room's features include:<br />+ Personal ...
3,1508829,Lavender QueenBed *rmFr: 5m walk MRT,"- Airconditioned Double-bed/Queen-size room, w...",- Apartment is within lavender neighbourhood (...,"Managing 88 rooms of various budgets, types an...",False,Lavender QueenBed *rmFr: 5m walk MRT,"- Airconditioned Double-bed/Queen-size room, w..."
4,1710048,Fireworks view8! highrise Ensuite Not4 Tourist,"+ Lockable Room & Fully Private, Airconditione...",- Apartment is at Chinatown MRT exit C.<br />-...,We rent rooms on behalf of various owners in s...,False,Fireworks view8! highrise Ensuite Not4 Tourist,"+ Lockable Room & Fully Private, Airconditione..."


In [None]:
df_non_english_50['neighborhood_overview_translated'] = df_non_english_50['neighborhood_overview'].apply(translate_to_english)

In [None]:
df_non_english_50['host_about_translated'] = df_non_english_50['host_about'].apply(translate_to_english)

In [None]:
df_translated = df_non_english_50.drop(columns=['name', 'description', 'neighborhood_overview', 'host_about'])

In [None]:
df_translated.to_csv('df_translated.csv', index=False)

Check if the final translations contain non-english text

In [None]:
df_translated

Unnamed: 0,id,is_english,name_translated,description_translated,neighborhood_overview_translated,host_about_translated
0,275343,False,10min walk to MRT & a Cozy Room with window! (1),**IMPORTANT NOTES: READ BEFORE YOU BOOK! <br ...,,K2 Guesthouse is designed for guests who want ...
1,275344,False,15 mins to Outram MRT Single Room (2),Lovely home for the special guest !,Bus stop <br />Food center <br />Supermarket,K2 Guesthouse is designed for guests who want ...
2,654191,False,Partial-Ensuite bedroom+Living room @ farrerpark,This Room's features include:<br />+ Personal ...,- Apartment is within area of petain road/kitc...,"Managing 88 rooms of various budgets, types an..."
3,1508829,False,Lavender QueenBed *rmFr: 5m walk MRT,"- Airconditioned Double-bed/Queen-size room, w...",- Apartment is within lavender neighbourhood (...,"Managing 88 rooms of various budgets, types an..."
4,1710048,False,Fireworks view8! highrise Ensuite Not4 Tourist,"+ Lockable Room & Fully Private, Airconditione...",- Apartment is at Chinatown MRT exit C.<br />-...,We rent rooms on behalf of various owners in s...
...,...,...,...,...,...,...
619,1239034501912776786,False,Room at Red Land MRT - Near Expo,3mins walk to Tanah Merah MRT <br />1 MRT stat...,,
620,1241036805012484393,False,Beautiful 2 bedrooms apartment,Have fun with the whole family at this stylish...,,Dubai property agt
621,1241238978192145855,False,Rooms in an apartment hotel in Singapore,Studio (No Window) Apartment is perfect for a ...,,
622,1245858798730925675,False,Bali resort luxury condo,Beautiful one bedroom plus one study (can be t...,,"Hi, my name is Peng, a tech professional and y..."


#Translation - reviews.csv

In [None]:
df_non_english_reviews = pd.read_csv('df_non_english_reviews.csv')

In [None]:
# split df_non_english_reviews into 4 pandas df
import numpy as np

# determine the number of rows per split
num_splits = 4
split_size = len(df_non_english_reviews) // num_splits

# split the DataFrame
df_r1 = df_non_english_reviews.iloc[:split_size]
df_r2 = df_non_english_reviews.iloc[split_size:2*split_size]
df_r3 = df_non_english_reviews.iloc[2*split_size:3*split_size]
df_r4 = df_non_english_reviews.iloc[3*split_size:]

# if there are any remaining rows due to an uneven split, append them to df_r4
if len(df_non_english_reviews) % num_splits != 0:
    df_r4 = df_non_english_reviews.iloc[3*split_size:]

# display the sizes of the new DataFrames
print(len(df_r1), len(df_r2), len(df_r3), len(df_r4))


1871 1871 1871 1873


In [None]:
df_r1['comments_translated'] = df_r1['comments'].apply(translate_to_english)

Translation error: invalid source language, Text: Es una casa impresionant, super acollidora i anfitrions amabilisims
Translation error: invalid source language, Text: 住宿位置很好，交通十分方便，要買的手信很容易便找到。房間適合一人住，窗外景色開揚，冷氣，熱水浴，WiFi 都齊全。喜歡揾野食，住在這裡就很適合。如有問題，Ong 很快回覆。


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_r1['comments_translated'] = df_r1['comments'].apply(translate_to_english)


In [None]:
df_r2['comments_translated'] = df_r2['comments'].apply(translate_to_english)

Translation error: invalid source language, Text: 設計美觀又舒適嘅地方
Translation error: invalid source language, Text: 房東非常的好人 遇到問題可以找他 他都會盡她所能來幫助你 他也會常常回覆信息 真係很好 
Translation error: invalid source language, Text: Acomodação bem equipada e fácil acesso aos transportes públicos?


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_r2['comments_translated'] = df_r2['comments'].apply(translate_to_english)


In [None]:
df_r3['comments_translated'] = df_r3['comments'].apply(translate_to_english)

Translation error: invalid source language, Text: Obrigado Andy pela recepção e comunicação durante a reserva, me senti em casa no lugar que adoro visitar, até a próxima…
Translation error: invalid source language, Text: Excelente opção para curta estadia em Singapura, optamos pela opção de 8pm-10am. A zona é excelente com imensa cultura e bons restaurantes. Check in através de app, com instruções claras (tive uma duvida e o anfitrião respondeu rapidamente).
Translation error: invalid source language, Text: Sitio prático e conveniente para uma ou duas noites de passagem em Singapura. Existem cacifos, se levarmos o nosso próprio cadeado para  deixar a bagagem temporariamente.<br/>Tem de se instalar uma aplicação para fazer o check-in e abrir as portas do quarto.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_r3['comments_translated'] = df_r3['comments'].apply(translate_to_english)


In [None]:
df_r4['comments_translated'] = df_r4['comments'].apply(translate_to_english)

Translation error: invalid source language, Text: Estúdio confortável com tudo o que é necessário e com uma localização privilegiada. Equipa muito prestável recomendamos vivamente este alojamento para quem visite Singapura.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_r4['comments_translated'] = df_r4['comments'].apply(translate_to_english)


In [None]:
# Concatenate the dataframe
df_reviews_translated = pd.concat([df_r1, df_r2, df_r3, df_r4], ignore_index=True)

In [None]:
#export df_reviews_translated as csv file
df_reviews_translated.to_csv('df_reviews_translated.csv', index=False)