# Libraries

## Install packages

Google Text To Speech module: convert text into speech in several languages

In [None]:
!pip install gTTS

In [None]:
from gtts import gTTS
from IPython.display import Audio

Tesseract allows to extract text from an image and convert it into a string.

To install the library, first install _tesseract-ocr_ package and then _pytesseract_ one.

In [None]:
!sudo apt install tesseract-ocr
!pip install pytesseract

Now, import the libraries that will be necessary to use the module.

In [None]:
try:
  from PIL import Image
except ImportError:
  import Image
import cv2
import pytesseract

Check where the .exe file is stored in Google Drive

In [None]:
!which tesseract

Establish the environment to run the .exe properly

In [None]:
pytesseract.pytesseract.tesseract_cmd = (
    r'/usr/bin/tesseract'
    )

# Google Text To Speech 

Benchmark of testings

In [None]:
text = "Hello! My name is Hugo"
tts = gTTS(text)
tts.save("hi.wav")
sound_file = 'hi.wav'
Audio(sound_file, autoplay=True)

# Text recognition

Check the available languages supported by tesseract

In [None]:
pytesseract.get_languages()

Load an image from Drive using cv2 library. By default, cv2 read expect images in BGR format, so we need to convert them into RGB format, expected by the tesseract package.

In [None]:
image_bgr = cv2.imread(r'/content/drive/MyDrive/Colab Notebooks/Projects/NLP/Resources/photo.jpeg')
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

Recognize the words in image and save them into an auxiliary string variable

In [None]:
text = pytesseract.image_to_string(image_rgb, lang = 'eng', config = '-psm 1')
print(text)

Convert text into speech using gTTS. Then, save it into an auxiliary .wav file

In [None]:
tts = gTTS(text, lang = 'es-us')
tts.save("hi.wav")

Reproduce the audio file using Audio package

In [None]:
sound_file = 'hi.wav'
Audio(sound_file, autoplay=True)

## Convert pdf into images

To convert pdf file into images, we will need two libraries: _poppler_ and _pdf2image_

In [None]:
!apt-get install poppler-utils &> /dev/null
!pip install pdf2image &> /dev/null

In [None]:
import os
import zipfile

from pdf2image import convert_from_path, convert_from_bytes
from IPython.display import display, Image
from google.colab import files

### Upload pdf function

In [None]:
print("\nPlease upload PDF files.")
uploaded = files.upload()
for fn in uploaded.keys():
    print("")
    if(fn.lower()[-4:] != ".pdf"):
        print(f"{fn} is not PDF file!")
        continue
    images = convert_from_bytes(uploaded[fn], size=800)
    with zipfile.ZipFile(f"{fn[:-4]}.zip", "w", compression=zipfile.ZIP_DEFLATED) as new_zip:
        for i, page in enumerate(images):
            name = f"{fn[:-4]}_{i+1}.png"
            page.save(name, "PNG")
            new_zip.write(name, arcname=name)
            print(f"{fn} p.{i+1} > {name}")
    print("Convert completed.\nThe download will start...")
    files.download(f"{fn[:-4]}.zip")
print("\nFinished.")

### Use pdf from Drive

In [None]:
pdf_path = '/content/drive/MyDrive/Colab Notebooks/Projects/NLP/Resources/prueba.pdf'
pdf_image = convert_from_path(pdf_path)
for i, page in enumerate(pdf_image):
  name = f"pdf_{i+1}.png"
  page.save(name, "PNG")

In [None]:
path_to_pdf_image = '/content/pdf_1.png'
pdf_text = pytesseract.image_to_string(path_to_pdf_image, lang = 'eng', config = '-psm 1')

`pdf_image` is a PIL image. We need to convert it into cv2 format

In [None]:
print(pdf_text)

Finally, we convert the text into speech using gTTS

In [None]:
tts = gTTS(pdf_text, lang = 'es-us')
tts.save("pdf.wav")
sound_file = 'pdf.wav'
Audio(sound_file, autoplay=True)

## From an image take with the webcam

### Import libraries

In [None]:
from IPython.display import display, Javascript, Image
import numpy as np
from google.colab.output import eval_js
from base64 import b64decode, b64encode
from google.colab.patches import cv2_imshow

### Helper Functions

In [None]:
# Function to convert JavaScript objects into OpenCV images
def js_to_image(js_reply):

  # Decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])

  # Convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype = np.uint8)

  # Decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags = 1)

  return img

In [None]:
def take_photo(filename='photo.jpg', quality=0.8):
  js = Javascript('''
    async function takePhoto(quality) {
      const div = document.createElement('div');
      const capture = document.createElement('button');
      capture.textContent = 'Capture';
      div.appendChild(capture);

      const video = document.createElement('video');
      video.style.display = 'block';
      const stream = await navigator.mediaDevices.getUserMedia({video: true});

      document.body.appendChild(div);
      div.appendChild(video);
      video.srcObject = stream;
      await video.play();

      // Resize the output to fit the video element.
      google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

      // Wait for Capture to be clicked.
      await new Promise((resolve) => capture.onclick = resolve);

      const canvas = document.createElement('canvas');
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      canvas.getContext('2d').drawImage(video, 0, 0);
      stream.getVideoTracks()[0].stop();
      div.remove();
      return canvas.toDataURL('image/jpeg', quality);
    }
    ''')
  display(js)

  data = eval_js('takePhoto({})'.format(quality))

  # put data in opencv format
  img = js_to_image(data)

  # save image
  cv2.imwrite(filename, img)
  
  return filename

### Take a picture with the webcam

In [None]:
from IPython.display import Image
try:
  filename = take_photo()
  print('Saved to {}'.format(filename))
  
  # Show the image which was just taken.
  display(Image(filename))
except Exception as err:
  # Errors will be thrown if the user does not have a webcam or if they do not
  # grant the page permission to access it.
  print(str(err))

### Text recognition

In [None]:
photo_bgr = cv2.imread(r'/content/photo.jpg')
photo_rgb = cv2.cvtColor(photo_bgr, cv2.COLOR_BGR2RGB)
text = pytesseract.image_to_string(photo_rgb, lang = 'eng', config = '-psm 1')
text = text.replace('-','')
text = text.replace('|','')
text = text.replace('_','')
print(text)

In [None]:
tts = gTTS(text, lang = 'es-us')
tts.save("hi.wav")
sound_file = 'hi.wav'
Audio(sound_file, autoplay=True)