In [256]:
# figs, width/height
# add regular text (quotes...)


In [257]:
import os
import pypandoc
import shutil
import re

WD = 'G:/Meine Ablage/Supervision/UZH-module/Slides'
FIGSDIR = os.path.join(WD, 'figs')
FILENAME = 'SessionOverview.docx'
DOCX = os.path.join(WD, FILENAME)
AUX_FIGS = ['UZH_logo_full.jpg']

In [258]:
with open('./Beamer/doc.txt', 'r') as f:
    doc = f.read()

output = pypandoc.convert_file(DOCX, 'plain', 
                               outputfile = './Beamer/tmp.txt', encoding='utf-8',
                               extra_args=('--standalone','--wrap=none')
                               )
with open('./Beamer/tmp.txt', 'r') as f:
    text_orig = f.readlines()

In [259]:
class Slide:

    def __init__(self, title, index):
        self.title = title
        self.items = []
        self.figs = []
        self.refs = None
        self.section = None
        self.spb = None
        self.spa = None
        self.text_after = []
        self.text_before = []
        self.script = []
        self.index = index
        print("%d. %s"%(index, title))

    def add_fig(self, figstring):
        fig = figstring.split(',')
        figname = fig[0].strip()
        if len(fig) > 1:
            figformat = fig[1].strip()
        else:
            figformat = None
        self.figs.append((figname, figformat))

    def add_text(self, text):
        if len(self.items) == 0:
            self.text_before.append(text)
        else:
            self.text_after.append(text)

    def add_item(self, item):
        self.items.append(item)

    def add_refs(self, refs):
        self.refs = refs

    def add_section(self, refs):
        self.refs = refs

    def add_space_before(self, spb):
        self.spb = float(spb.strip())

    def add_space_after(self, spa):
        self.spa = float(spa.strip())

    def add_script(self, script):
        self.script.append(script)

    def get_script(self, i, n, add_items=True):
        script_text = ''
        script_text_clean = ''
        
        header = '%s (%d/%d)'%(self.title, i, n)
        script_text += header + '\n\n'
        script_text +=  len(header)*'-' + '\n\n'
        if add_items and len(self.items)>0:
            for item in self.items:
                script_text += '%s\n'%item
                script_text_clean += '%s\n'%item
        if len(self.script)>0:
            for text in self.script:
                script_text += '%s\n'%text
                script_text_clean += '%s\n'%text

        if script_text != '':
            script_text += 62*'*' + '\n' 
            script_text += 62*'*' + '\n\n' 
        
        script_text = re.sub(r'\n\s*\n', '\n\n', script_text)
        script_text_clean = re.sub(r'\n\s*\n', '\n\n', script_text_clean)
        script_text_clean += 'NEW SLIDE\n'
        return script_text, script_text_clean

    def get_ref_list(self):
        
        if self.refs is not None:
            return [ x.strip() for x in self.refs.split(',') ]
        else:
            return []

    def create(self):

        if self.section is not None:
            text = '\section{%s}\n'%self.section
            text += '\\begin{frame}{%s}\n'%self.title
        else:
            text = '\\begin{frame}{%s}\n'%self.title

        if self.spb is not None:
            text += '\\vspace{%.1fcm}\n'%self.spb

        if len(self.text_before)>0:
            for text_before in self.text_before:
                text += '%s\n\n'%text_before

        if len(self.items)>0:
            text += '\\begin{itemize}\n'
            for item in self.items:
                text += '\item %s\n'%item
            text += '\end{itemize}\n'

        if len(self.text_after)>0:
            for text_after in self.text_before:
                text += '%s\n\n'%text_after

        for fig in self.figs:
            figname, figformat = fig
            if figformat == 'portrait':
                text += '\\begin{tikzpicture}[remember picture, overlay]\n'
                text += '\\node[xshift=-1.2cm,yshift=-1.5cm] at (current page.north east){\n'
                text += '\includegraphics[width=1.5cm]{figs/%s}\n'%figname
                text += '};\n'
                text += '\end{tikzpicture}\n'
            elif figformat is not None:
                text += '\\begin{figure}\n'
                text += '\includegraphics[width=%s\\textwidth]{figs/%s}\n'%(figformat, figname)
                text += '\end{figure}\n'
            else:
                text += '\\begin{figure}\n'
                text += '\includegraphics[width=0.5\\textwidth]{figs/%s}\n'%figname
                text += '\end{figure}\n'

        if self.spa is not None:
            text += '\\vspace{%.1fcm}\n'%self.spa

        if self.refs is not None:
            text += '\let\\thefootnote\\relax\\footnotetext{%s}\n'%self.refs

        text += '\end{frame}\n\n'
        
        return text




In [260]:
slide = None
slides = []
silence = False
isscript = False
section = None
slide_index = 1
for i, x in enumerate(text_orig):
    if x[0] == '%':
        continue
    if x[0] == '*':
        title = x[2:].strip()
    elif x[0] == '#':
        silence = False
        isscript = False
        # save slide and create a new one
        if slide is not None:
            slides.append(slide)

        slide = Slide(x[2:].split('/')[0].strip(), slide_index)
        slide_index +=1 
        if section is not None:
            slide.add_section(section)
            section = None
    elif x[:4] == 'Scr:':
        isscript = True
        slide.add_script(x[4:].strip())
    elif isscript:
        slide.add_script(x.strip())
    elif silence:
        continue
    elif x[0] == '+':
        slide.add_text(x[2:].strip())
    elif x[0] == '-':
        slide.add_item(x[2:].strip())
    elif x[:4] == 'Fig:':
        slide.add_fig(x[4:].strip())
    elif x[:4] == 'Ref:':
        slide.add_refs(x[4:].strip())
    elif x[:4] == 'Sec:':
        section = x[4:].strip()
    elif x[:4] == 'SPB:':
        slide.add_space_before(x[4:].strip())
    elif x[:4] == 'SPA:':
        slide.add_space_after(x[4:].strip())
    elif x[:4] == 'Not:':
        silence = True
    elif x[:4] == 'END:':
        break


1. Why this Course
2. About Me
3. Remarks (1)
4. Remarks (2)
5. How to Pass this Course (1)
6. How to Pass this Course (1)
7. Schedule
8. Today
9. About You
10. What is intelligence? (1)
11. What is intelligence? (2)
12. Types of Definition? (2)
13. Course Overview
14. Definition of Intelligence
15. Psychometrics
16. Psychometrics
17. Theories of Human Intelligence (1)
18. Biological Basis of Intelligence: Neuroscience
19. Biological Basis of Intelligence: Genetics
20. Intelligence and Cognitive Abilities
21. Intelligence Over the Lifespan
22. Life outcomes: education, labor and health
23. Controversies Around Human Intelligence
24. Enhancement of Intelligence
25. Challenges of AI
26. Assignment (1)
27. Assignment (2)


In [261]:
fig_list = []

main_text = ''
script_text = ''
script_text_clean = ''
ref_list = []
refs_text = ''
nslides = len(slides)
for i, slide in enumerate(slides):
    main_text += slide.create()
    s, sc = slide.get_script(i+1, nslides)
    script_text += s
    script_text_clean += sc
    ref_list.extend(slide.get_ref_list())
    fig_list.extend(slide.figs)

ref_list = list(set(ref_list))
ref_list.sort()

for ref in ref_list:
    refs_text +=  '%s\n'%ref

fig_names = [x[0] for x in fig_list]
fig_names = fig_names + AUX_FIGS
print(fig_names)
fig_not_found = []
for fig_name in fig_names:
    try:
        shutil.copyfile(os.path.join(FIGSDIR, fig_name),
                        os.path.join('./output/figs', fig_name))       
    except:
        fig_not_found.append(fig_name)
    
print("Figures not found:")
import difflib
fig_list = os.listdir(FIGSDIR)
for ff in fig_not_found:
    print(ff, ":", difflib.get_close_matches(ff, fig_list))

['UZH_logo_full.jpg']
Figures not found:


In [262]:
doc = doc.replace('$TITLE', title)
doc = doc.replace('$MAINTEXT', main_text)
with open('./output/main.tex', 'w') as f:
    f.write(doc)

with open('./output/script.txt', 'w') as f:
    f.write(script_text)

with open('./output/refs.txt', 'w') as f:
    f.write(refs_text)

In [263]:
if True:
    from gtts import gTTS
    import os
    audio = gTTS(text=script_text_clean, lang="en", slow=False)
    audio.save('./output/script.mp3')
    print(audio)
#    os.system("start ./output/script.mp3")

from mutagen.mp3 import MP3
audio = MP3('./output/script.mp3')

print(f"Total duration: {round(audio.info.length/60, 2)} minutes" )

<gtts.tts.gTTS object at 0x0000025C898E85E0>
Total duration: 5.86 minutes
