### Retrieve raw HTML from main page

In [2]:
from selenium import webdriver

# set the url of the website to fetch
url = 'https://www.classcentral.com'

# create a new Chrome browser instance
browser = webdriver.Chrome()
browser.implicitly_wait(5)

# navigate to the website
browser.get(url)

# get the raw HTML content
html = browser.page_source

# close the browser
browser.quit()

with open('website-A.html', 'w', encoding='utf-8') as file:
    file.write(str(html))


### Parse the website to find translatable tags

In [3]:
from bs4 import BeautifulSoup
import requests

# Scrape the website A
# soup = BeautifulSoup(html, 'html.parser')
file = open('website-A.html', 'r', encoding='utf-8')
soup = BeautifulSoup(file, 'html.parser')
file.close()

nombres = [tag.name for tag in soup.find_all()]
nombres_unicos = []
for nombre in nombres:
    if not nombre in nombres_unicos: nombres_unicos.append(nombre)

print(f'All tags: {nombres_unicos}')

All tags: ['html', 'head', 'meta', 'title', 'link', 'script', 'style', 'body', 'div', 'header', 'nav', 'a', 'i', 'span', 'button', 'h2', 'section', 'ul', 'li', 'h3', 'p', 'img', 'h4', 'time', 'h5', 'ol', 'main', 'form', 'input', 'picture', 'source', 'strong', 'svg', 'path', 'aside', 'footer', 'noscript']


In [60]:
# Show the list of values by tag
tag_to_show = "noscript"
found = soup.find_all(name=tag_to_show)
found


[<noscript><p><img alt="Clicky" height="1" src="//in.getclicky.com/100717250ns.gif" width="1"/></p></noscript>]

In [4]:
TRANSLATABLE_TAGS = ['a', 'meta', 'title', 'span', 'button', 'h2', 'section', 'ul', 'li', 'h3', 'p', 'h4', 'h5', 'ol', 'aside', 'footer']

### Translate those tags

#### First define a translation/post-processing function

In [5]:
def validate_tag(tag) -> bool:
    if tag.string is None: return False

    if tag.name == 'meta':
        return True if tag.string.starts_with("Class Central") else False
        
    print(f'nuevo tag {tag.name} encontrado: \n\t{tag.string}')
    print(f'\t{tag.get_text()}')
    return True

In [6]:
from google.cloud import translate_v2 as translate
import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "norse-coral-377922-03fa1bb10bdb.json"

# Set up the Google Cloud Translation API client
client = translate.Client()

def do_translation(text_input: str, lan: str) -> str:
    translation = client.translate(text_input, target_language=lan) # translate the text using the Google Translate API
    translated_text = translation['translatedText'].replace('&quot;', '"')
    print(f'\ttraduccion: \n\t{translated_text}')
    
    return translated_text

#### Then set target language and call the translation function

In [7]:
TARGET_LANGUAGE = 'es'

# Translate the text of the website A
for tag in soup.find_all(TRANSLATABLE_TAGS): # loop over isolated tags
    if not validate_tag(tag): continue
    tag.string = do_translation(text_input=tag.string, lan=TARGET_LANGUAGE)


nuevo tag title encontrado: 
	Class Central • Find the best courses, wherever they exist.
	Class Central • Find the best courses, wherever they exist.
	traduccion: 
	Class Central • Encuentra los mejores cursos, dondequiera que existan.
nuevo tag span encontrado: 
	Class Central
	Class Central
	traduccion: 
	centro de clase
nuevo tag button encontrado: 
	
            Courses
          
	
            Courses
          
	traduccion: 
	Cursos
nuevo tag h2 encontrado: 
	Class Central
	Class Central
	traduccion: 
	centro de clase
nuevo tag span encontrado: 
	Rankings
	Rankings
	traduccion: 
	clasificaciones
nuevo tag span encontrado: 
	Collections
	Collections
	traduccion: 
	Colecciones
nuevo tag h3 encontrado: 
	Subjects
	Subjects
	traduccion: 
	Asignaturas
nuevo tag a encontrado: 
	View all
	View all
	traduccion: 
	Ver todo
nuevo tag span encontrado: 
	Computer Science
	Computer Science
	traduccion: 
	Ciencias de la Computación
nuevo tag span encontrado: 
	Health & Medicine
	Health & Medi

#### Finally save that file as output

In [11]:
# Save the translated HTML as a new file
with open(f'website-A_translated_{TARGET_LANGUAGE}.html', 'w', encoding='utf-8') as file:
    file.write(str(soup))