# Flash Card Maker
Assuming you have a language course with audio (*.wav*) files, this code
- makes text transcription of each audio file (using *Google Speech Recognition API*)
- translates it (using *Google Translate API*)
- creates an **Anki** deck of flashcards with selected lessons

**Use-case**: imagine you purchased an audio course or audio+book course based on which you want to make ANKI flashcards. You prefer to automate transcription and translation rather than typing in thousands of sentences manually. 

## Prerequisites
- Google's Deep Translator (`pip install deep-translator`)
- Speech Recogniser (`pip install SpeechRecognition`)

In [None]:
#!/usr/bin/env python3

In [None]:
from deep_translator import GoogleTranslator
TRANSLATOR = GoogleTranslator(source='id', target='en')
import speech_recognition as sr
SPEECH_RECOGNIZER = sr.Recognizer() # once for all
from os import path
import os
import glob
from shutil import copyfile
import re

In [None]:
class Phrase(object):
    """Phrase representation in audio, text and translated text formats.
    The project requires both mp3 and wav files for each audio recording."""
    def __init__(self, audio_file):
        self.audio_file = audio_file.replace(".mp3", ".wav")
        if not os.path.exists(self.audio_file):
            raise FileNotFoundError(f"{self.audio_file} not found!")
        print("I'm making phrase from", audio_file)
        self.transcribe()
        self.translate()
    def read(self, fname):
        with open(fname, 'r') as f:
            return f.read()
    def get_target_phrase(self):
        return self.read(self.target_phrase_file)
    def set_target_phrase(self, text):
        "Apply correction"
        with open(self.target_phrase_file, 'w') as ofile:
            ofile.write(text)
    def get_source_phrase(self):
        return self.read(self.translation_file)
    def set_source_phrase(self, text):
        "Apply correction"
        with open(self.translation_file, 'w') as ofile:
            ofile.write(text)
    def get_audio(self):
        return self.audio_file
    def get_text(self):
        with open(self.target_phrase_file, 'r') as fin:
            return fin.read()
    def transcribe(self, overwrite=False):
        self.target_phrase_file = self.audio_file.replace(".wav", ".txt")
        if os.path.exists(self.target_phrase_file) and not overwrite:
            print(f"{self.target_phrase_file} already exists. Skipping.")
            return
        with sr.AudioFile(self.audio_file) as source:
            audio = SPEECH_RECOGNIZER.record(source)
        # recognize speech using Google Speech Recognition
        try:
            # for testing purposes, we're just using the default API key
            # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
            # instead of `r.recognize_google(audio)`
            text = SPEECH_RECOGNIZER.recognize_google(audio, language='id')
            print("Google Speech Recognition thinks you said " + text)
        except sr.UnknownValueError:
            print("Google Speech Recognition could not understand audio")
        except sr.RequestError as e:
            print("Could not request results from Google Speech Recognition service; {0}".format(e))
        # Save to file
        with open(self.target_phrase_file, 'w') as ofile:
            ofile.write(text)
        print(f"Written {self.target_phrase_file}")
            
    def translate(self, overwrite=False):
        self.translation_file = self.audio_file.replace(".wav", "_tr.txt")
        if os.path.exists(self.translation_file) and not overwrite:
            print(f"{self.translation_file} already exists. Skipping.")
            return
        translation = TRANSLATOR.translate(self.get_text())
        with open(self.translation_file, 'w') as ofile:
            ofile.write(translation)

In [None]:
class Card(object):
    "Flashcard representation. Possible multiple card types per single 'row' card"
    def __init__(self, phrase, tag=None):
        self.target_phrase = phrase.get_target_phrase()
        self.source_phrase = phrase.get_source_phrase()
        self.sound = self.make_sound_handle(phrase.get_audio())
        self.tag = tag
        
#     def read(self, fname):
#         print(f"[READ] Reading {fname}")
#         with open(fname, 'r') as fin:
#             return fin.read()
    def make_sound_handle(self, sound):
        if sound:
            lesson_name, sfile = sound.replace('.wav','.mp3').split('/')[-2:]
            lesson_id = lesson_name[:4]
            return f"[sound:{lesson_id}_{sfile}]"
    def listening_card(self):
        return f'Listen {self.sound}; {self.target_phrase} ({self.source_phrase}); {self.tag}\n'
        
    def active_translation_card(self):
        return f'{self.source_phrase}; {self.target_phrase} {self.sound}; {self.tag}\n'      

In [None]:
class Lesson(object):
    """Representation of a lesson or folder containing audio, and possibly, text files.
    Creates flashcards for a given lesson."""
    def __init__(self, dir_name, skip=None):
        self.skip = skip
        self.dir_name = dir_name
        self.phrases = [Phrase(f) for f in sorted(self.audio_files())]
        self.tag = dir_name.split('/')[-1][:4] # e.g. L001
        self.build_cards()
    def get_phrases(self):
        return self.phrases
    def get_cards(self):
        return self.all_cards
    def get_name(self):
        return self.dir_name.split('/')[-1]
    def make_raw_cards(self):
        self.raw_cards = [Card(ph, tag=self.tag) for ph in self.phrases]
    def make_listening_cards(self):
        self.listening_cards = [c.listening_card() for c in self.raw_cards]
    def make_translation_cards(self):
        self.translation_cards = [c.active_translation_card() for c in self.raw_cards]
    def build_cards(self):
        self.make_raw_cards()
        self.make_listening_cards()
        self.make_translation_cards()
        self.all_cards = self.listening_cards + self.translation_cards
    def content(self):
        return sorted([f for f in os.listdir(self.dir_name) \
                if os.path.isfile(os.path.join(self.dir_name, f))])
    
    def audio_files(self):
        audio = glob.glob(os.path.join(self.dir_name, "*mp3"))
        if self.skip:
            return [x for s in self.skip for x in audio if s not in x]
        return audio

In [None]:
arr = ["abc", "def"]
arr2 = [x for y in ["bc"] for x in arr if y not in x]
print(arr2)

In [None]:
class Course(object):
    """Navigate across the course raw files"""
    def __init__(self, 
                 mother_dir="/home/dawid/Music/Indonesian_Assimil/",
                 lesson_regexp="L[0-9]*"):
        self.MOTHER_DIR = mother_dir
        self.lesson_regexp = lesson_regexp
        self.lessons = sorted([f.split('/')[-1] \
               for f in glob.glob(os.path.join(self.MOTHER_DIR, 
                                               self.lesson_regexp))])
    def get_mother_dir(self):
        return self.MOTHER_DIR
    def print_lessons(self):
        for les in self.lessons:
            print(les)
    def get_lesson_name(self, index):
        regexp = 'L' + "{:03d}".format(index)
        lname = regexp + '-Indonesian_ASSIMIL'
        if lname in self.lessons:
            return lname
        raise KeyError(f"Index {index} is not correct!")
    def get_lesson(self, index_or_name, skip=None):
        if index_or_name in self.lessons:
            lname = index_or_name
        else:
            lname = self.get_lesson_name(index_or_name)
        return Lesson(os.path.join(self.MOTHER_DIR,
                                   lname),
                      skip=skip)

main = Course()
#main.print_lessons()
#les = main.get_lesson(3)

In [None]:
ANKI_DB = "/home/dawid/.var/app/net.ankiweb.Anki/data/Anki2/User 1/collection.media/"
class DeckMaker(object):
    "Create/update ANKI deck based on the audio course"
    def __init__(self, deckname, course, update_existing=False, skip=None):
        self.deckname = deckname.replace(".txt", "") + ".txt"
        self.deckfile = os.path.join(ANKI_DB, self.deckname)
        self.exists = False
        if os.path.exists(self.deckfile):
            print(f"The deck file {self.deckfile} already exists.\nI will update it")
            self.exists = True
        else:
            print(f"Creating new deck {self.deckname} in the file {self.deckfile}.")
        self.course = course
        self.update_existing = update_existing
        self.skip = skip
    
    def print(self):
        try:
            with open(self.deckfile, 'r') as fin:
                cont = fin.readlines()
            print(cont)
        except FileNotFoundError:
            print("The deck is empty")
    def unique_fname(self, full_fname):
        lesson, phrase_file = full_fname.split('/')[-2:]
        return lesson[:4] + '_' + phrase_file
    
    def copy_sound(self, lesson):
        "Copy all mp3 files from lesson `lesson` to the ANKI database"
        for afile in lesson.audio_files():
            print(f"I wanna copy the file {afile}")
            res_fname = os.path.join(ANKI_DB, self.unique_fname(afile))
            if not os.path.exists(res_fname):
                print(f"Copying {afile} -> {res_fname}")
                copyfile(afile, res_fname)
            else:
                print(f"{res_fname} already exists. Skipping.")
    def render_cards(self, lesson):
        "Make all the cards for the lesson `lesson`"
        return lesson.get_cards()
    
    def card_exists(self, card, old_lines):
        if not old_lines:
            return False
        question_patterns = ", ".join(re.findall(r".*(\[.*\]).*;.*;.*\n", old_lines))
        answer_patterns = ", ".join(re.findall(r".*;.*(\[.*\]).*;.*\n", old_lines))
        m_question = re.match(r".*(\[.*\]).*;.*;.*", card)
        m_answer = re.match(r".*;.*(\[.*\]).*;.*", card)
        for match, found_patterns in zip((m_question, m_answer),
                                         (question_patterns, answer_patterns)):
            if match: # Found [sound..] in the given card-side (i.e. question or answer)
                if match.group(1) in found_patterns: # it corresponds to the relevant card-side
                    return True
        return False
        
    def validate(self, lesson):
        if isinstance(lesson, int):
            lesson = self.course.get_lesson(lesson, skip=self.skip)
        print("Please check the phrases. You may correct them")
        err = False
        for phrase in lesson.get_phrases():
            print(phrase.get_target_phrase(), '-->', phrase.get_source_phrase())
            res = input("Is it correct? [y/n]")
            if res != 'y':
                err = True
                correction = input("Please correct the original phrase or click Enter:")
                if correction != '':
                    phrase.set_target_phrase(correction)
                correction = input("Please correct the translation or click Enter:")
                if correction != '':
                    phrase.set_source_phrase(correction)
        if err:
            lesson.build_cards()
        
    def update_deck(self, cards):
        # TODO: update doesn't work yet: appends new cards rather than replacing them.
        with open(self.deckfile, 'a+') as fout:
            try:
                old_lines = fout.read()
            except FileNotFoundError:
                old_lines = None
            for card in cards:
                if self.card_exists(card, old_lines) and not self.update_existing:
                    print(f"{card} already in the old deck. Ignoring.")
                    continue
                fout.write(card)
    
    def add_lesson(self, index):
        lesson_handle = self.course.get_lesson(index, skip=self.skip)
        
        self.copy_sound(lesson_handle)
        cards = self.render_cards(lesson_handle)
        self.validate(lesson_handle)
        #print("[DeckMaker] Here are the cards:\n", cards)
        self.update_deck(cards)
       
    def add_lessons(self, start, stop):
        for ind in range(start, stop+1):
            self.add_lesson(ind)

In [None]:
deck = DeckMaker("Assimil_Indonesian2", course=main, skip=("T00",))
deck.print()

In [None]:
deck.add_lesson(27)
#deck.add_lessons(27,30) # add lessons 1,2,3

In [None]:
#deck.validate(27)

In [None]:
#deck.add_lesson(27)