In [144]:
import os
from pathlib import Path
from collections import OrderedDict, namedtuple
import time
from datetime import datetime
import codecs
from textwrap import wrap

import wikipediaapi
from moviepy.editor import *
import gizeh as gz
from gtts import gTTS
from skimage.io import imread, imsave
from skimage import transform
from skimage.util import img_as_ubyte

from image_downloader import master_download
from im_funcs import maxsize_pad

from synthesize import synthesize
from hyperparams import Hyperparams as hp
from data_load import *


# Default Parameters
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
WHITE_GIZEH = (1, 1, 1)
BLACK_GIZEH = (0, 0, 0)

VIDEO_SIZE = (1920, 1080)
IMG_SHAPE = (1080, 1920)
IMG_DISPLAY_DURATION = 4    #duration, in seconds, to display each image

excluded_sections = {'See also', 'References', 'Further reading', 'External links',
                'Formats and track listings', 'Credits and personnel', 'Charts',
                'Certifications', 'Release history'}


In [4]:
wiki = wikipediaapi.Wikipedia('en')
page = wiki.page('Bokmål')
page.title

'Bokmål'

### Workflow:

- Initialize WikiMovie with page
- Create txt file
    - Title,
    - Summary text,
    - Section1 title,
    - Section1 text, 
    - etc ....
- Output all .wav speech files with synthesize function
-  ... currently just name files 0.wav, 1.wav, 2 ....
- Generate clips:
    Possible output:
    - textclip, imageseq with 0.wav (section with text to read)
    - textclip with 1.wav (section without text)

    
Basically, will have to iterate through output samples directory (possibly with just index 'i' for filepath),
and link them back with the text.

If Section has no text, it's just a main header... ex: `{'History': '', 'Ancient times': 'blah blah...'}`

Create textclip for 'History', set audio, and go on to next, e.g. 'Ancient times'

If value at the key is not empty, create textclip as above, AND Image Sequence.

For each item try to make one piece of audio continue through Textclip display and Image sequence if necessary.
Maybe just set the section header durations to be a little long, like 3 secs. The actual paragraph may start in sooner but at least the audio won't get cutoff, and don't have to synthesize the section title audio separately on a different line of the text file.

    
|   audio ---------| ------------> |
|----------------:|:--------------|
| /Section title + |         Text/ |
| TextClip         | ImageSequence |


- Pass the dict into text file as `script[i]['title'] + '. ' + script[i]['text']`
  
  (notice the space after the period)
  
- Also process the script dictionary for creating 

In [145]:
class WikiMovie():
    """
    Make movies in standard format (.mp4) from wikipedia pages.
    Initialize with 'page' object from wikipedia Python module.
    Primary user function is make_movie().
    """
    def __init__(self, page, narrator='gtts'):
        self.page = page
        self.title = self.page._attributes['title']
        self.narrator = narrator
        self.script = [{'title': page.title, 'level': 0, 'text': page.summary}]
        self.cliplist = []
        
        # self.p = Path(__file__).resolve().parents[1]
        self.p = Path(os.path.abspath('')).resolve() ## For jupyter notebook
        self._imgidx = 0
        self.cutoff = None
        

    def _create_paths(self):
        # Image directories
        self.parent_images = self.p / 'images'
        self.imgdir = self.p / 'images' / self.title
        self.resizedir = self.imgdir / 'resize'
        # gTTS audio directories
        self.parent_audio = self.p / 'audio'
        self.auddir = self.p / 'audio' / self.title
        # dc_tts directory
        self.dctts_dir = self.p / 'dc_tts'
        self.dctts_in = self.dctts_dir / 'text_input'
        self.dctts_out = self.dctts_dir / 'samples' / self.title
        # URL lists text files directory
        self.url_dir = self.p / 'url_files'
        # Video directory (all article videos stored in folder, files named by title)
        self.viddir = self.p / 'videos'
        # Video save path
        self.vidpath = self.viddir / (self.title + ".mp4")

        print('creating paths...')
        for d in [self.parent_images, self.imgdir, self.resizedir,\
                self.parent_audio, self.auddir, self.dctts_dir,\
                self.dctts_in, self.dctts_out, self.url_dir, self.viddir]:
            if not d.exists():
                d.mkdir()
                print(d, "directory created")
            elif d.exists():
                print(d, "exists")

    def _resize_images(self):
        self._imgpaths = []
        contents = self.imgdir.glob('*')
        fnames =  [x for x in contents if x.is_file() and x.parts[-1][0] != '.']
        self.fixed_durations = [IMG_DISPLAY_DURATION for _ in fnames]
        
        n_imgs = len(fnames)
        for i, fname in enumerate(fnames):
            sys.stdout.write(f"Resizing Images [{'#' * (i+1) + ' ' * (n_imgs-i-1)}]   \r")
            sys.stdout.flush()
            
            path = str(self.imgdir / fname)
            print(path)
            save_path = str(self.resizedir / fname)
            print(self.resizedir)
            print(save_path)
            try:
                maxsize_pad(path, save_path)
            except Exception:
                continue
            self._imgpaths.append(save_path)


    def _make_narration(self, string, mp3path):
        tts = gTTS(string)
        tts.save(mp3path)
        return AudioFileClip(mp3path)


    def _add_ImageSequence(self, audioclip):
        """add image with soundtrack audioclip of narrated text"""
        ### Cycle through images so it doesn't always start at the first one
        tmp_imgpaths = self._imgpaths[self._imgidx:] + self._imgpaths[:self._imgidx]
        self._imgidx += 1
        image_sequence = ImageSequenceClip(sequence=tmp_imgpaths,
                                durations=self.fixed_durations, load_images=True).\
                            set_position(('center', 400)).\
                            fx(vfx.loop, duration=audioclip.duration).\
                            set_audio(audioclip)
        self.cliplist.append(image_sequence)


    def _add_subsection(self, section, level):
        """
        Add textclip of section titles.
        If it's just a main header followed by subsections, NO image sequence.
        If section contains text, create narratation and image sequence.
        """

        mp3path_header = os.path.join(self.auddir, section.title + '_header.mp3')
        ac_header = self._make_narration(section.title, mp3path_header)
        fontsize = 130 - (30 * level) # higher level means deeper 'indentation'
        tc_header = TextClip(section.title, color='white', fontsize=fontsize, 
                    size=VIDEO_SIZE, method='caption').\
                    set_audio(ac_header).set_duration(ac_header.duration)
        self.cliplist.append(tc_header)
        ## if there is an actual paragraph in the section, create an image sequence for it
        if section.text:
            mp3path_text = os.path.join(self.auddir, section.title + '_text.mp3')
            ac_text = self._make_narration(section.text[:self.cutoff], mp3path_text)
            self._add_ImageSequence(ac_text)

        print(section.title, "complete")


    def _flush_sections(self, sections, level=0):
        """
        Get text from all levels (sections, subsections) in page order and generate narrations
        """
        for s in sections:
            if s.title in excluded_sections:
                print('exluding')
                continue
            else:
                self.script.append({'title':s.title, 'level': level+1, 'text': s.text})
                # self._add_subsection(s, level) # clip creation
                # recursion to next level. Once lowest level is reached, next main section will be accessed
                self._flush_sections(s.sections, level+1)


    def _flush_page(self):
        pass
        # add main title and summary    
#         mp3path_title = os.path.join(self.auddir, self.title + '_title.mp3')
#         ac_title = self._make_narration(self.title, mp3path_title)
#         tc_title = TextClip(self.title, color='white', fontsize=150, 
#                     size=VIDEO_SIZE, method='caption').\
#                     set_audio(ac_title).set_duration(ac_title.duration)
#         self.cliplist.append(tc_title)
        
#         mp3path_summary = os.path.join(self.auddir, self.title + '_summary.mp3')
#         ac_summary = self._make_narration(self.page.summary[:self.cutoff], mp3path_summary)
#         self._add_ImageSequence(ac_summary)
#         print('Title and Summary complete')

        # create clips for rest of sections
#         self._flush_sections(self.page.sections)
        
        
                             
    def make_text_file(self):
        """
        List of text. All 
        """
        self._flush_sections(self.page.sections)
                      
        self.sent_path = self.dctts_in / f"{self.title}.txt"
                             
        with self.sent_path.open('w') as sf:
            sf.write(f"Script for Wikipedia article {self.title}\n")
                             
            for d in self.script[:None]:
                # {'title': section title, 'level': level, 'text': section text}
                # break section down into 180 character chunks            
                s = d['title'] + d['text']
                seg_text = wrap(s, hp.max_N) # Split script every 180 chars
                for i, seg in enumerate(seg_text):
                             
                    sf.write(f"{d['title']}:{i} {seg}\n")

                         
    def make_movie(self, cutoff=None):
        """
        Args:
            cutoff (int): Limit the length of the script. Used like script[:cutoff]
        Returns:
            None
        """
        print("Video Title: ", self.title)
        self.cutoff = cutoff
        self._create_paths()

        # Download and resize images
        master_download(main_keyword=self.title, url_dir=self.url_dir,
                        img_dir=self.imgdir, num_requested=100)
        self._resize_images()
        print('\n') 

        # Create Video Clips
        print("Creating clips. . .")
        self._flush_page()

        thanks = TextClip("Thanks for watching \n and listening",
                            color='white', fontsize=72, size=VIDEO_SIZE, method='caption').\
                            set_duration(2)

        subscribe = TextClip("Please Subscribe!",
                                color='white', fontsize=72, size=VIDEO_SIZE, method='caption').\
                                set_duration(2)

        self.video = concatenate_videoclips(self.cliplist + [thanks, subscribe],
                                            method='compose').\
                                            on_color(color=BLACK, col_opacity=1)
        # Encode Video
        start = datetime.now()
        self.video.write_videofile(str(self.vidpath) , fps=1, codec='mpeg4', 
                                    audio_codec="aac", preset='ultrafast')
        dur = datetime.now() - start
        print("Video Encoding completed in time: ", dur)

        # self.audio_clip.close()
        # title_text.close()
        thanks.close()
        subscribe.close()
        self.video.close()
        

In [146]:
WMM = WikiMovie(page)

In [147]:
WMM._create_paths()

creating paths...
/Users/jared/video-creater/images exists
/Users/jared/video-creater/images/Bokmål exists
/Users/jared/video-creater/images/Bokmål/resize exists
/Users/jared/video-creater/audio exists
/Users/jared/video-creater/audio/Bokmål exists
/Users/jared/video-creater/dc_tts exists
/Users/jared/video-creater/dc_tts/text_input exists
/Users/jared/video-creater/dc_tts/samples/Bokmål exists
/Users/jared/video-creater/url_files exists
/Users/jared/video-creater/videos exists


In [148]:
WMM._flush_sections(page.sections)
WMM.make_text_file()
hp.test_data = str(WMM.sent_path)
hp.sampledir = str(WMM.dctts_out)

exluding
exluding


In [149]:
hp.test_data

'/Users/jared/video-creater/dc_tts/text_input/Bokmål.txt'

In [150]:
hp.sampledir

'/Users/jared/video-creater/dc_tts/samples/Bokmål'

In [151]:
lines = codecs.open(WMM.sent_path, 'r', 'utf-8').readlines()[1:]

In [152]:
lines

['Bokmål:0 BokmålBokmål (UK: , US: ; literally "book tongue") is an official written standard for the Norwegian language, alongside Nynorsk. Bokmål is the preferred written standard of\n',
 'Bokmål:1 Norwegian for 85% to 90% of the population in Norway. Unlike, for instance, the Italian language, there is no nationwide standard or agreement on the pronunciation of Bokmål.\n',
 'Bokmål:2 Bokmål is regulated by the governmental Norwegian Language Council. A more conservative orthographic standard, commonly known as Riksmål, is regulated by the non-governmental\n',
 'Bokmål:3 Norwegian Academy for Language and Literature. The written standard is a Norwegianised variety of the Danish language. The first Bokmål orthography was officially adopted in 1907\n',
 'Bokmål:4 under the name Riksmål after being under development since 1879. The architects behind the reform were Marius Nygaard and Jacob Jonathan Aars. It was an adaptation of written\n',
 'Bokmål:5 Danish, which was commonly used sinc

In [153]:
sents = [text_normalize(line.split(" ", 1)[-1]).strip() + "E" for line in lines] # text normalization, E: EOS

In [154]:
sents

['bokmalbokmal uk us literally book tongue is an official written standard for the norwegian language alongside nynorsk. bokmal is the preferred written standard ofE',
 'norwegian for to of the population in norway. unlike for instance the italian language there is no nationwide standard or agreement on the pronunciation of bokmal.E',
 'bokmal is regulated by the governmental norwegian language council. a more conservative orthographic standard commonly known as riksmal is regulated by the non governmentalE',
 'norwegian academy for language and literature. the written standard is a norwegianised variety of the danish language. the first bokmal orthography was officially adopted inE',
 'under the name riksmal after being under development since . the architects behind the reform were marius nygaard and jacob jonathan aars. it was an adaptation of writtenE',
 'danish which was commonly used since the past union with denmark to the dano norwegian koine spoken by the norwegian urban elite

In [143]:
lines[0].split(" ", 1)[-1]

'Bokmål (UK: , US: ; literally "book tongue") is an official written standard for the Norwegian language, alongside Nynorsk. Bokmål is the preferred written standard of Norwegian for 85% to 90% of the population in Norway. Unlike, for instance, the Italian language, there is no nationwide standard or agreement on the pronunciation of Bokmål.\n'

In [130]:
text_normalize(lines[0])

'bokmal. bokmal uk us literally book tongue is an official written standard for the norwegian language alongside nynorsk. bokmal is the preferred written standard of norwegian for to of the population in norway. unlike for instance the italian language there is no nationwide standard or agreement on the pronunciation of bokmal. '

In [136]:
char2idx, idx2char = load_vocab()

In [137]:
texts = np.zeros((len(sents), hp.max_N), np.int32)
for i, sent in enumerate(sents):
    texts[i, :len(sent)] = [char2idx[char] for char in sent]


ValueError: cannot copy sequence with size 321 to array axis with dimension 180

In [113]:
synthesize()

ValueError: cannot copy sequence with size 2683 to array axis with dimension 180

In [None]:
vars(hp)