diff --git a/PDF_to_audio/README.md b/PDF_to_audio/README.md new file mode 100644 index 0000000000..1dfbbdd8ca --- /dev/null +++ b/PDF_to_audio/README.md @@ -0,0 +1,17 @@ +# Readme +## Use this code +First you'll need to type those lines : +``` + pip install pyttsx3 + + pip install PyPDF2 +``` + +## Improving the code +- Add the possiblity to save to .MP3 +- Select the pages we would like to read +- Find a better TTS Voice + +## Known issues +- Some PDF don't use spaces but positionning, at the moment I can't figure out how to take this into account other than using OCR. +- \ No newline at end of file diff --git a/PDF_to_audio/pdf-test.pdf b/PDF_to_audio/pdf-test.pdf new file mode 100644 index 0000000000..f46dbe561d Binary files /dev/null and b/PDF_to_audio/pdf-test.pdf differ diff --git a/PDF_to_audio/pdf_to_audio.py b/PDF_to_audio/pdf_to_audio.py new file mode 100644 index 0000000000..6ffec42ea9 --- /dev/null +++ b/PDF_to_audio/pdf_to_audio.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +""" +Created on Sun Oct 11 19:50:06 2020 + +@author: quent +""" +import PyPDF2 +import pyttsx3 +from tkinter import Tk +from tkinter.filedialog import askopenfilename + + +Tk().withdraw() # We could make our own GUI but let's use the default one +FILE_PATH = askopenfilename() # open the dialog GUI + +with open(FILE_PATH, "rb") as f: # open the file in reading (rb) mode and call it f + pdf = PyPDF2.PdfFileReader(f) + #parse every page + for page in pdf.pages: + text = page.extractText() + ## speaking part #### + engine = pyttsx3.init() + engine.say(text) + engine.runAndWait()