### Step 1: Installing necessary packages

In [60]:
# Installing the required libraries
# !pip install python-docx
# !pip install pytesseract
# !pip install gtts
# !pip install opencv-python-headless
# !pip install opencv-python
# !pip install moviepy
# !pip install googletrans==4.0.0-rc1
# !pip install docx2python
# !pip install tinytag
# !pip install Pillow
# !pip install pydub
# !pip install docx2txt


### Step 2: Importing modules

In [61]:
import os
import textwrap
from docx import Document
from googletrans import Translator
import moviepy.editor as mp
from gtts import gTTS
from moviepy.editor import *
from moviepy.editor import concatenate_videoclips
from moviepy.video.fx import resize
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip

### Step 3: Reading the Word document, extract paragraphs, and images

In [62]:
import docx
from PIL import Image
import uuid

def read_word_document(file_path):
    doc = docx.Document(file_path)
    paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]

    images = []
    for idx, shape in enumerate(doc.inline_shapes):
        img_id = str(uuid.uuid4())
        if shape.type == docx.enum.shape.WD_INLINE_SHAPE.PICTURE:
            blip = shape._inline.graphic.graphicData.pic.blipFill.blip
            if blip.embed is not None:
                rId = blip.embed
            else:
                rId = blip.link
            document_part = doc.part
            image_part = document_part.related_parts[rId]
            img_data = image_part.blob
            extension = os.path.splitext(image_part.partname)[1]
            image_path = f'image_{img_id}{extension}'
            with open(image_path, 'wb') as f:
                f.write(img_data)
            img = Image.open(image_path)
            img.save(image_path)
            images.append(image_path)

    return paragraphs, images

file_path = '/Users/psumit/Documents/Career/Codespace/Projects/text-to-video/text-to-video-JP1/test4page-JP.docx'
paragraphs, images = read_word_document(file_path)



### Step 4: Generating audio files for each paragraph

In [63]:
from gtts import gTTS
import os

def generate_audio_files(paragraphs, language_code='ja'):
    audio_files = []
    for idx, paragraph in enumerate(paragraphs):
        tts = gTTS(text=paragraph, lang=language_code)
        audio_file = f"audio_{idx}.mp3"
        tts.save(audio_file)
        audio_files.append(audio_file)
    return audio_files

language_code = "ja"
audio_files = generate_audio_files(paragraphs, language_code)

### Step 5: Creating the video

In [64]:
def wrap_text(text, max_width=50):
    lines = textwrap.wrap(text, width=max_width)
    return '\n'.join(lines)

def resize_image_maintain_aspect_ratio(image, width, height):
    image.thumbnail((width, height), Image.ANTIALIAS)
    return image

def create_video(paragraphs, images, audio_files, output_path='output_video.mp4'):
    clips = []

    intro_text = wrap_text(paragraphs[0])
    outro_text = wrap_text(paragraphs[-1])

    intro_clip = TextClip(intro_text, fontsize=24, color='white', size=(1280, 720), bg_color='black', print_cmd=False).set_duration(5)
    clips.append(intro_clip.set_audio(mp.AudioFileClip(audio_files[0])))

    for i in range(1, len(paragraphs) - 1):
        image = Image.open(images[i - 1])
        image = resize_image_maintain_aspect_ratio(image, 1280, 720)
        image = image.convert('RGB')
        image.save(f"temp_{i}.jpg")
        audio_duration = mp.AudioFileClip(audio_files[i]).duration
        img_clip = ImageClip(f"temp_{i}.jpg", duration=audio_duration)
        img_clip = img_clip.set_audio(mp.AudioFileClip(audio_files[i]))
        clips.append(img_clip)

    outro_clip = TextClip(outro_text, fontsize=24, color='white', size=(1280, 720), bg_color='black', print_cmd=False).set_duration(5)
    clips.append(outro_clip.set_audio(mp.AudioFileClip(audio_files[-1])))

    final_clip = concatenate_videoclips(clips)
    final_clip.write_videofile(output_path, codec='libx264', fps=24)

create_video(paragraphs, images, audio_files)

Moviepy - Building video output_video.mp4.
MoviePy - Writing audio in output_videoTEMP_MPY_wvf_snd.mp3


                                                                      

MoviePy - Done.
Moviepy - Writing video output_video.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready output_video.mp4


#### 2 problems?

- The images are still very blurry, almost as if a signal has gone off in a TV. I need to resolve this.

- The audios for every paragraph. If I have to describe it clearly, the video plays like this:
    
    a. The video starts playing with the black screen with the audio transcription.
    
    b. Although the introductory paragraph's audio transcript has not yet finished, paragraph 1's audio transcription begins.
    
    Big problem. Paragraph 1's audio and image should not be heard and visible until the introductory paragraph is done.

The code fails to consider when the audio transcription ends for a paragraph so that it can

go to the transcription for the following paragraph.

So, I need to find a way to consider the length of each paragraph so that the audio transcriptions

do not end up cluttering each other.