! Run below 2 lines after converting code block to install dependencies

In [1]:
import os
import re
import json
import boto3
import random
import spacy
import subprocess
import tempfile
import pandas as pd
from collections import Counter
from ebooklib import epub

### TTS

In [2]:
from google.cloud import texttospeech

In [3]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "../your-project-creds.json"

In [4]:
# Initial voice lists
INITIAL_FEMALE_VOICES = [
    ['pl-PL-Wavenet-D', 0], ['pl-PL-Wavenet-E', 0], 
    ['pl-PL-Wavenet-D', 2], ['pl-PL-Wavenet-E', 2], ['pl-PL-Wavenet-A', 2],
    ['pl-PL-Wavenet-D', -2], ['pl-PL-Wavenet-E', -2], ['pl-PL-Wavenet-A', -2],
    ['pl-PL-Wavenet-D', 4], ['pl-PL-Wavenet-E', 4], ['pl-PL-Wavenet-A', 4],
    ['pl-PL-Wavenet-D', -4], ['pl-PL-Wavenet-E', -4], ['pl-PL-Wavenet-A', -4]]
INITIAL_MALE_VOICES = [
    ['pl-PL-Wavenet-B', 0], ['pl-PL-Wavenet-C', 0],
    ['pl-PL-Wavenet-B', 2], ['pl-PL-Wavenet-C', 2],
    ['pl-PL-Wavenet-B', -2], ['pl-PL-Wavenet-C', -2],
    ['pl-PL-Wavenet-B', 4], ['pl-PL-Wavenet-C', 4],
    ['pl-PL-Wavenet-B', -4], ['pl-PL-Wavenet-C', -4]
]

# Store voice assignments for names
voice_assignments = {}
default_voice = ['pl-PL-Wavenet-A', 0]
gtts_char_limit = 5000

def split_title(title):
    """Split title into main title and subtitle if ':' exists."""
    if ':' in title:
        main_title, subtitle = title.split(':', 1)
        return f'<emphasis level="strong">{main_title.strip()}</emphasis><break time="250ms"/><emphasis level="moderate">{subtitle.strip()}</emphasis><break time="500ms"/>'
    else:
        return f'<emphasis level="strong">{title}</emphasis><break time="500ms"/>'

def process_paragraph(paragraph, female_voices, male_voices):
    """Process a paragraph based on whether it starts with a name."""
    words = paragraph.split(':')
    if len(words) > 1 and len(words[0].split(" ")) < 3:
        name = words[0]
        rest_of_paragraph = ':'.join(words[1:])
        
        # Assign voice based on gender and ensure consistency
        if name not in voice_assignments:
            if name[-1] == 'a' and female_voices:
                voice_assignments[name] = female_voices.pop(0)
            elif male_voices:
                voice_assignments[name] = male_voices.pop(0)
        return rest_of_paragraph, voice_assignments[name]
    else:
        return paragraph, default_voice

def add_ssml(story):
    """Add SSML tags to a story."""
    # Reset voice lists for each story
    female_voices = INITIAL_FEMALE_VOICES.copy()
    male_voices = INITIAL_MALE_VOICES.copy()

    paragraphs = [
        {
            "ssml": split_title(story['title']),
            "voice": default_voice
        }
    ]
    
    for p in story['paragraphs']:
        pp, voice = process_paragraph(p, female_voices, male_voices)
        if paragraphs[-1]["voice"] == voice and (len(paragraphs[-1]["ssml"]) + len(pp)) < gtts_char_limit:
            paragraphs[-1]["ssml"] = paragraphs[-1]["ssml"] + '<break time="500ms"/>' + pp
        else:
            paragraphs.append({"ssml": pp, "voice": voice})
            
    # Reset voice assignments for the next story
    voice_assignments.clear()
    for p in paragraphs:
        p["ssml"] = f"<speak>{p['ssml']}</speak>"
    
    return paragraphs

In [5]:
def concatenate_mp3s(file_list, output_file):
    """
    Concatenate multiple MP3 files into a single file using ffmpeg.
    
    Parameters:
    - file_list: List of MP3 file paths to concatenate.
    - output_file: Path of the output MP3 file.
    """
    
    # Create the input string for ffmpeg
    input_str = "|".join(file_list)
    
    # Form the ffmpeg command
    cmd = [
        'ffmpeg',
        '-i', f'concat:{input_str}',
        '-acodec', 'copy',
        output_file
    ]
    
    # Execute the command
    subprocess.run(cmd)

# Example usage:
# concatenate_mp3s(["file1.mp3", "file2.mp3", "file3.mp3"], "output.mp3")

In [6]:
def split_text_into_chunks(full_text):
    # Split the text into paragraphs to avoid breaking sentences
    paragraphs = full_text.split("\n")
    
    # AWS Polly's limit for neural voices is 6000 characters
    CHUNK_SIZE = 3000
    chunk_texts = []
    temp_chunk = ""

    # Organize paragraphs to fit within the character limit without breaking them
    for paragraph in paragraphs:
        if len(temp_chunk) + len(paragraph) < CHUNK_SIZE:
            temp_chunk += paragraph + "\n"
        else:
            chunk_texts.append(temp_chunk.strip())
            temp_chunk = paragraph + "\n"

    # Add any remaining text
    if temp_chunk:
        chunk_texts.append(temp_chunk.strip())
    return chunk_texts

In [7]:
def generate_audio_with_polly(story, output_path):
    text = story['title'] + "\n\n" + "\n".join(story['paragraphs'])
    
    # Split the text into chunks and store them
    chunk_texts = split_text_into_chunks(text)
    
    # Temporary file paths will be stored here
    temp_files = []
    client = boto3.client('polly')

    for chunk in chunk_texts:
        response = client.synthesize_speech(Text=chunk, OutputFormat='mp3', VoiceId='Ewa')
        if "AudioStream" in response:
            # Save each chunk to a temporary file
            temp_fd, temp_filename = tempfile.mkstemp(suffix=".mp3")
            with os.fdopen(temp_fd, 'wb') as tmpf:
                tmpf.write(response['AudioStream'].read())
            temp_files.append(temp_filename)
    
    # Ensure the 'audio' directory exists
    audio_folder = os.path.dirname(output_path)
    if not os.path.exists(audio_folder):
        os.makedirs(audio_folder)
    
    # Use ffmpeg to concatenate the audio files
    concatenate_mp3s(temp_files, output_path)
    
    # Clean up the temporary files
    for temp_file in temp_files:
        os.remove(temp_file)

In [8]:
def google_tts(voice_param, ssml_input, language, output):
    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.SynthesisInput(ssml=ssml_input)

    voice = texttospeech.VoiceSelectionParams(
        language_code=language,
        name=voice_param[0]  # You can pick different voices here
    )

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3,
        speaking_rate=0.9,  # Set the speed here
        pitch=voice_param[1]  # Increase pitch by 2 semitones
    )

    response = client.synthesize_speech(
        input=input_text,
        voice=voice,
        audio_config=audio_config
    )

    with open(output, "wb") as out:
        out.write(response.audio_content)

In [9]:
def generate_audio_with_google_tts(story, output_path):    
    # Split the text into chunks and store them
    chunk_texts = add_ssml(story)
    
    # Temporary file paths will be stored here
    temp_files = []
    client = boto3.client('polly')

    for chunk in chunk_texts:
        # Save each chunk to a temporary file
        temp_fd, temp_filename = tempfile.mkstemp(suffix=".mp3")
        temp_files.append(temp_filename)
        google_tts(chunk["voice"], chunk["ssml"], "pl-PL", temp_filename)
    
    # Ensure the 'audio' directory exists
    audio_folder = os.path.dirname(output_path)
    if not os.path.exists(audio_folder):
        os.makedirs(audio_folder)
    
    # Use ffmpeg to concatenate the audio files
    concatenate_mp3s(temp_files, output_path)
    
    # Clean up the temporary files
    for temp_file in temp_files:
        try:
            os.remove(temp_file)
        except Exception as e:
            pass

# Audiobook

In [10]:
from pydub import AudioSegment
from mutagen.m4a import M4A
from mutagen.mp4 import MP4, MP4Cover

In [11]:
import hashlib

In [12]:
def generate_file_checksum(file_path):
    sha256_hash = hashlib.sha256()
    with open(file_path,"rb") as f:
        # Read only 4K at a time to avoid running out of memory
        for byte_block in iter(lambda: f.read(4096),b""):
            sha256_hash.update(byte_block)
    return sha256_hash.hexdigest()

def generate_checksum(mp3_file, chapter_titles, book_title, book_cover, book_text):
    mp3_checksum = generate_file_checksum(mp3_file)
    checksum_str = mp3_checksum + str(chapter_titles) + book_title + book_cover + book_text
    return hashlib.sha256(checksum_str.encode()).hexdigest()

In [13]:
def sanitize_title(title):
    main_title = title.split(":")[0]  # Keep only text before ':'
    sanitized_title = re.sub('[^\w\s-]', '', main_title)  # Remove illegal characters
    return sanitized_title.strip()

In [14]:
def create_apple_audiobooks(mp3_files, chapter_titles, book_title, book_cover, book_text, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    version = 1 # change when the logic changes
    if len(mp3_files) != len(chapter_titles):
        print("Warning: The number of mp3_files doesn't match the number of chapter_titles.")
        return

    checksum_filename = f'{output_folder}/checksums.json'
    try:
        with open(checksum_filename, 'r') as f:
            old_checksums = json.load(f)
    except FileNotFoundError:
        old_checksums = {}

    new_checksums = {}

    for ind, mp3_file in enumerate(mp3_files):
        chapter_title = chapter_titles[ind]
        sanitized_title = sanitize_title(chapter_title)
        new_checksum = generate_checksum(mp3_file, sanitized_title, f"{book_title}-v{version}", book_cover, book_text)
        new_checksums[sanitized_title] = new_checksum

        if sanitized_title in old_checksums and new_checksum == old_checksums[sanitized_title]:
            continue

        audio = AudioSegment.from_mp3(mp3_file)
        m4b_filename = f'{output_folder}/{sanitized_title}.m4b'
        audio.export(m4b_filename, format="ipod")

        metadata = MP4(m4b_filename)
        metadata["\xa9nam"] = chapter_title
        metadata["\xa9aut"] = "Fatih Kurt"  # Author
        metadata["\xa9alb"] = book_title  # Album
        metadata["\xa9gen"] = "Audiobook"  # Genre
        metadata["aART"] = "Fatih Kurt"  # Album Artist
        metadata["\xa9day"] = "2023"  # Year
        metadata["\xa9wrt"] = "Fatih Kurt"  # Writer
        metadata["\xa9grp"] = book_title  # Content Group
        metadata["covr"] = [MP4Cover(open(book_cover, "rb").read(), imageformat=MP4Cover.FORMAT_JPEG)]  # Cover art
        metadata["cpil"] = 1  # Compilation (True or False)
        metadata["pgap"] = 1  # Gapless playback (True or False)
        metadata["trkn"] = [(ind+1, len(mp3_files))]  # Track number and total tracks

        metadata.save()
        print(f"Successfully generated {sanitized_title}.m4b")

    # Save new checksums
    with open(checksum_filename, 'w') as f:
        json.dump(new_checksums, f)

# Ebook

### NLP

In [15]:
class WordRootFinder:
    def __init__(self, cache_file="root_cache.json"):
        self.nlp = spacy.load("pl_core_news_sm")
        self.root_cache = {}
        self.cache_counter = 0
        self.cache_file = cache_file
        self.load_cache()

    def load_cache(self):
        try:
            with open(self.cache_file, "r", encoding='utf-8') as f:
                self.root_cache = json.load(f)
        except FileNotFoundError:
            self.root_cache = {}

    def save_cache(self):
        with open(self.cache_file, "w", encoding='utf-8') as f:
            json.dump(self.root_cache, f, ensure_ascii=False, indent=4)

    def root(self, word):
        if word in self.root_cache:
            return self.root_cache[word]

        doc = self.nlp(word)
        for token in doc:
            lemma = token.lemma_
            ll = lemma.split(" ")
            if len(ll) > 1 and (ll[1] == "być" or ll[1] == "by"):
                root = ll[0]
            elif len(ll) > 1:
                print(f"Unknown combined word: {word} / {lemma}")
                root = lemma
            else:
                root = lemma

            self.root_cache[word] = root
            self.cache_counter += 1

            if self.cache_counter >= 10:
                self.save_cache()
                self.cache_counter = 0

            return root

In [16]:
word_root_finder = WordRootFinder()

## Book Prep

In [17]:
pl50k = {}
with open("../pl_50k.txt", "r", encoding="utf-8") as f:
    for l in f.readlines():
        ps = l.split()
        pl50k[ps[0]] = {"rank": len(pl50k) + 1, "count": int(ps[1])}

In [18]:
def parse_stories(filename, break_chapters, max_story_length=2700):
    with open(filename, 'r', encoding="utf-8") as file:
        content = file.read()

    stories = content.split('\n\n\n')
    for story in stories:
        is_split, part = False, 0
        total_chars = 0
        paragraphs, words = [], []
        lines = story.strip().split('\n\n')
        if len(lines) >= 2 and lines[-2] == "Słownictwo:":
            word_margin = 2
            words = [lines[-2]] + lines[-1].strip().split('\n')
        else:
            word_margin = 0
            words = []

        title = lines[0]
        i = 1
        while i < len(lines)-word_margin:
            line = lines[i]
            if line == '':
                i += 1
            elif line[0].isalpha() or line[0].isdigit() or True:
                i += 1
                if break_chapters and total_chars > (max_story_length / 2) and len(line) > (max_story_length - total_chars) and len(paragraphs) > 0:
                    is_split = True
                    part += 1
                    stitle = f"Część {part}"
                    partp = []
                    if part == 1:
                        stitle = f"{title}"
                        partp = [f"Część {part}"]
                    yield {
                        'title': stitle,
                        'paragraphs': partp + paragraphs,
                        'words': []
                    }
                    total_chars = 0
                    paragraphs = []
                else:
                    total_chars += len(line)
                    paragraphs.append(line)
            else:
                print(f"unexpected entry: {line}")
        if len(paragraphs) > 0:
            stitle = title
            if is_split:
                stitle = f"Część {part+1}"
            yield {
                'title': stitle,
                'paragraphs': paragraphs,
                'words': words
            }

In [19]:
def get_dictionary():
    known_words = {}
    with open("../known_words.json", "r", encoding="utf-8") as f:
        known_words = json.load(f)
    samples = pd.read_csv("../samples.csv", delimiter=";")
    res = {}
    for _, row in samples.iterrows():
        if row["Word"].lower() not in known_words:
            res[row["Word"].lower()] = row["Translation"]
    return known_words, res

In [20]:
def encapsulate_chapter(title, chapter_content):
    return f'''<html>
    <head>
        <title>{title}</title>
        <link href=""style/book-style.css" rel="stylesheet" type="text/css" />
    </head>
    <body>
        <h1>{title}</h1>
        {chapter_content}
    </body>
    </html>'''

In [21]:
def generate_question_html(word, correct_translation, incorrect_answers, chapterid):
    all_options = [correct_translation] + incorrect_answers
    random.shuffle(all_options)
    title = word
    root = word_root_finder.root(word)
    if root != word:
        title = combine_root_with_word(word, root)
    question_html = f'''
    <fieldset class="noBreak">
        <legend><b>{title}</b></legend>
        <div class="options">
    '''
    for idx, option in enumerate(all_options):
        qid = f"label_{chapterid}_{word}_{idx}"
        question_html += f'''
            <div class="question" id="{qid}" onclick="checkAnswer('{word}', '{correct_translation}', '{qid}', '{chapterid}')">
                <label>{option}</label>
            </div>
        '''
    question_html += '</div></fieldset></br>'
    return question_html

def combine_root_with_word(word, root):
    if word == root:
        return word
    
    common_length = 0
    
    for i in range(min(len(word), len(root))):
        if word[i] == root[i]:
            common_length += 1
        else:
            break

    common = root[:common_length]
    unique_ending1 = word[common_length:]
    unique_ending2 = root[common_length:]
    if len(unique_ending1) == 0:
        return f"{common}({unique_ending2})"
    if len(unique_ending2) == 0:
        return f"{common}({unique_ending1})"

    return f"{common}({unique_ending2}/{unique_ending1})"


def generate_incorrect_answers(word, correct_translation, translations):
    options = [v for k, v in translations if k != word and correct_translation != v]
    if len(options) < 4:
        return options
    return random.sample(options, 3)


def add_translation_test(translations, chapterid):
    questions_html = f'<h2>Test słownictwa (Słowa: {len(translations)})</h2>'
    for word, correct_translation in translations:
        incorrect_answers = generate_incorrect_answers(word, correct_translation, translations)
        questions_html += generate_question_html(word, correct_translation, incorrect_answers, chapterid)
    return questions_html

In [22]:
class book_handler:
    def __init__(self, title, book_dir, poster, break_chapters=False):
        self.tts = True # enable tts generation
        self.title = title
        self.book_dir = book_dir
        self.poster = poster
        self.stories = []
        self.foot_notes = []
        self.better_translate = {}
        self.stories = list(parse_stories(os.path.join(book_dir, "book.txt"), break_chapters))
        self.translated_word_ctr = Counter()

    def add_tooltips(self, paragraph, dictionary, known_words, used = set()):
        # dictonary does not have known words
        words = re.findall(r'\b[^\W_]+\b', paragraph, flags=re.UNICODE)
        for i in range(len(words)):
            word = words[i]
            if word[0] == "<" or word[-1] == ">":
                continue
            wl = self.remove_non_chars(word)
            wl_root = word_root_finder.root(wl.lower())
            if wl_root is None:
                wl_root = wl.lower()
            if wl_root in dictionary and wl.lower() in dictionary and wl_root != dictionary[wl_root].lower():
                used.add(wl.lower())
                self.translated_word_ctr[wl.lower()] += 1
                html = f"""<span class='non-breaking'><span class='overlay-text' data-translation='{dictionary[wl.lower()]}'></span>{word}</span>"""
                words[i] = html
            if wl_root not in known_words and wl_root not in dictionary and len(wl_root) > 0 and wl_root not in self.better_translate:
                self.better_translate[wl_root] = word
        return ' '.join(words)
    
    def remove_non_chars(self, text):
        # Define a pattern to match non-characters
        pattern = r'[^a-zA-ZąćęłńóśźżĄĆĘŁŃÓŚŹŻ\s]'
    
        # Remove non-characters using regular expressions
        cleaned_text = re.sub(pattern, '', text)
    
        return cleaned_text.lower()

    def process_story(self, ind, story, known_words, dictionary):
        start_ind = len(self.foot_notes)
        used = set()
        title = self.add_tooltips(story["title"], dictionary, known_words, used)
        paragraphs = ''
        for paragraph in story['paragraphs']:
            p_tooltip = self.add_tooltips(paragraph, dictionary, known_words, used)
            paragraphs += f'<p><button class="audioControl paused" onclick="toggleAudio()"></button>{p_tooltip}</p> ' # use \t for tab character
        top_words = self.top_words_to_learn()
        sublist = []
        for w, c in top_words:
            if w in used:
                sublist.append([w, dictionary[self.remove_non_chars(w.lower())]])
        test = add_translation_test(sublist, f"chapter{ind}")
        words = ''
        fns = f'<section epub:type="footnotes" id="ch{ind}footnotes"><hr /><ol class="footnotes">'
        for fn in self.foot_notes[start_ind:]:
            fns += f'<li epub:type="footnote" id="fn{fn["id"]}">{fn["def"]}</li>'
        fns += '</ol></section>'
        return encapsulate_chapter(title, f'{paragraphs}{test}{fns}{words}') # add page break after each story
    
    def create_epub(self):
        book = self.setup_book_metadata()
        self.add_cover_image(book)
        self.add_media_files(book)
        mp3_files, chapter_titles = self.add_chapters_to_book(book)
        self.write_epub_file(book)
        cover = os.path.join(self.book_dir, self.poster)
        data = open(os.path.join(self.book_dir, "book.txt"), 'r', encoding="utf-8").read()
        output = os.path.join(self.book_dir, "audio-book")
        create_apple_audiobooks(mp3_files, chapter_titles, self.title, cover, data, output)
        self.report()

    def setup_book_metadata(self):
        book = epub.EpubBook()
        book.set_identifier('sample_id')
        book.set_title(self.title)
        book.set_language('pl')
        book.add_author("Fatih Kurt")
        return book

    def add_cover_image(self, book):
        book.set_cover(self.poster, open(os.path.join(self.book_dir, self.poster), 'rb').read())

    def add_media_files(self, book):
        media_dir = os.path.join(self.book_dir, "images")
        self.add_media_from_dir(book, media_dir)
        self.add_media_from_dir(book, "book-images")

    def add_media_from_dir(self, book, media_dir):
        if media_dir and os.path.isdir(media_dir):
            for media_file in os.listdir(media_dir):
                if media_file.endswith(('jpg', 'jpeg', 'png', 'gif', 'svg')):
                    book.add_item(epub.EpubImage(
                        file_name=media_file,
                        content=open(os.path.join(media_dir, media_file), 'rb').read()
                    ))

    def add_chapters_to_book(self, book):
        style = open("book-style.css", "r").read()
        nav_css = epub.EpubItem(uid="style_nav", file_name="style/book-style.css", media_type="text/css", content=style)
        script = open("book-script.js", "r").read()
        nav_script = epub.EpubItem(uid="script_nav", file_name="script/book-script.js", media_type="application/javascript", content=script)
        chapter_objs = []
        mp3_files, chapter_titles = [], []
        known_words, dictionary = get_dictionary()
        for ind, story in enumerate(self.stories):
            chapter_titles.append(story['title'])
            clean_title = re.sub(r'[^a-zA-Z0-9]', '_', story['title'])
            c = epub.EpubHtml(title=story['title'], file_name=f"{clean_title}-{ind}.xhtml", lang='pl')
    
            # Check for audio file and add it if it exists
            audio_path = os.path.join(self.book_dir, "audio", f"{ind+1}.mp3")
            if not os.path.exists(audio_path):
                # Generate audio if it doesn't exist using AWS Polly
                if self.tts:
                    generate_audio_with_google_tts(story, audio_path)
                else:
                    s = " ".join(story["paragraphs"])
                    print(f"TTS disabled: {audio_path}: {s[:100]}...{s[-100:]}")
            audio_player = ""
            if os.path.exists(audio_path):
                mp3_files.append(audio_path)
                audio_item = epub.EpubItem(
                    uid=f"audio_{ind+1}",
                    file_name=f"audio/{ind+1}.mp3",
                    media_type="audio/mpeg",
                    content=open(audio_path, 'rb').read()
                )
                audio_player = f'<audio id="myAudio" style="width: 100%;" onplay="updateButtons()" onpause="updateButtons()" controls><source src="{audio_item.file_name}" type="audio/mpeg">Your browser does not support the audio tag.</audio>'
                book.add_item(audio_item)
                c.add_link(href=audio_item.file_name, rel="alternate", type=audio_item.media_type, title=f"Audio for {story['title']}")
            
            c.content = audio_player + self.process_story(ind, story, known_words, dictionary)
            c.add_link(href=nav_css.file_name, rel="stylesheet", type="text/css", title="CSS for Chapter")
            c.add_item(nav_script)
            book.add_item(c)
            chapter_objs.append(c)
    
        book.toc = chapter_objs
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())
        book.add_item(nav_css)
        book.add_item(nav_script)
        book.spine = ['nav'] + chapter_objs
        return mp3_files, chapter_titles

    def write_epub_file(self, book):
        epub.write_epub(os.path.join(self.book_dir, f"{self.title}.epub"), book, {})

    def top_words_to_learn(self, percentile=100):
        sorted_counts = sorted(self.translated_word_ctr.items(), key=lambda x: x[1], reverse=True)
        s = 0
        word_groups = {}
        for k,v in sorted_counts:
            s += v
            wr = word_root_finder.root(k)
            if wr not in word_groups:
                word_groups[wr] = {"w":[k], "t": v}
            else:
                word_groups[wr]["w"].append(k)
                word_groups[wr]["t"] += v
        sorted_root_counts = sorted(word_groups.items(), key=lambda x: x[1]["t"], reverse=True)
        res, ss = [], 0
        for k,v in sorted_root_counts:
            ss += v["t"]
            if k in v["w"]:
                res.append([k, v["t"]])
            else:
                res.append([v["w"][0], v["t"]])
            if percentile <= (100.0 * ss)/s:
                break
        return res

    def report(self):
        print(f"{len(self.better_translate)} words need translation.")

Disable TTS by setting `.tts = False`

In [23]:
polish_short_stories = book_handler("Polskie opowiadania science-fiction", "polish-short-stories", "poster.png")
polish_short_stories.tts = False
polish_short_stories.create_epub()

0 words need translation.


In [24]:
polish_short_stories = book_handler("Polskie opowiadania science-fiction", "polish-short-stories", "poster.png")
polish_short_stories.create_epub()

0 words need translation.


In [25]:
polish_short_conversations = book_handler("Polskie krótkie konwersacje dla B1", "short-conversations-polish-b1", "cover.jpg")
polish_short_conversations.create_epub()

0 words need translation.


In [26]:
print("\n".join(list(polish_short_stories.better_translate)))




In [27]:
print("\n".join(list(polish_short_conversations.better_translate)))




Use Below method to create a word test on MEMRISE

In [28]:
def get_memrise_test(book, percentile=100):
    known_words, dictionary = get_dictionary()
    print("\n".join([f"{k}\t{dictionary[k]}\t{v}" for k,v in book.top_words_to_learn(percentile=percentile)]))

In [29]:
get_memrise_test(polish_short_conversations, percentile=50)

rozwijać	to develop	18
związane	bound	11
podejście	approach	10
wyzwanie	challenge	9
rozwój	development	9
wyjątkowy	exceptional	9
wychowywania	upbringing	7
korzyści	benefits	6
cenne	valuable	6
doceniać	to appreciate	6
równowaga	balance	6
utrzymywać	maintain	6
spełnienia	fulfillment	6
poglądami	perspectives	6
zastanawiać	wonder	6
odnaleźć	find	5
dostosować	adjust	5
przetrwać	survive	5
poruszyć	touch	5
przesiadka	change	5
sąsiedztwo	neighborhood	5
rodzicielstwo	parenthood	5
narzeczony	fiancé	5
współpraca	cooperation	5
odnawiać	renew	4
wpływ	influence	4
oczekiwania	expectations	4
podejmować	undertake	4
niepokój	worry	4
narzędzia	tools	4
wychowanie	upbringing	4
łączyła	she connected	3
zyskać	gain	3
wychowawczymi	educational	3
przyjemność	pleasure	3
bezradna	helpless	3
przeczytanej	read	3
zastanowić	to consider	3
nastawienia	attitude	3
wyzwania	challenges	3
wytrzymałość	endurance	3
rozstanie	farewell	3
wpłynąć	affect	3
osobiście	personally	3
wyznała	confessed	3
poprawę	improvement	3
wzajemne

# Other

In [30]:
def get_aws_account_details():
    sts_client = boto3.client('sts')
    iam_client = boto3.client('iam')
    
    response = sts_client.get_caller_identity()
    account_id = response['Account']

    # Fetch account alias. This does not give the true email of the AWS account.
    # It gives an alias that can be used to login via the AWS Management Console.
    aliases = iam_client.list_account_aliases()['AccountAliases']
    email = None

    return account_id, aliases

Test Google credentials

In [31]:
account_id, aliases = get_aws_account_details()
print(f"You are using AWS account ID: {account_id}")
if email:
    print(f"Account alias (aliases): {','.join(aliases)}")
else:
    print("No account alias found.")

NoCredentialsError: Unable to locate credentials