In [None]:
GEMINI_API_KEY = "YOUR_GEMINI_API_KEY"

In [None]:
!pip install python-pptx

In [None]:
!sudo apt-get update && sudo apt-get install libreoffice

In [None]:
!pip install --upgrade --no-deps --force-reinstall git+https://github.com/openai/whisper.git

In [None]:
!sudo apt update && sudo apt install ffmpeg

In [None]:
!pip install yt-dlp

In [None]:
!pip install tiktoken

In [6]:
import yt_dlp
import subprocess
import os
import glob
import whisper

audio_segments_folder = "audio_segments"
transcription_output = "transcription.txt"

# Create folders if they don't exist
os.makedirs(audio_segments_folder, exist_ok=True)


# Step 1: Download the video if a URL is provided
video_url = "https://www.youtube.com/live/D7BzTxVVMuw?t=19684s"  # Replace with your video URL

# or you might want to analyse the uploaded video file.

UPLOADED_FILE = None # Replace with the path of your uploaded file

# Global variable to capture the downloaded file name
downloaded_file = None

# Define a progress hook to capture the filename when download finishes
def progress_hook(d):
    global downloaded_file
    if d.get('status') == 'finished':
        downloaded_file = d.get('filename')
        print(f"Download finished, saved as {downloaded_file}")

# Step 1: Download the video in lower quality (limit resolution to 480p) and capture filename
ydl_opts = {
    'format': 'bestvideo[height<=480]+bestaudio/best[height<=480]',
    'outtmpl': 'downloaded_video.%(ext)s',
    'progress_hooks': [progress_hook]
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([video_url])

if downloaded_file is None:
    raise Exception("Download failed: no file was captured.")

print(downloaded_file)


[youtube] Extracting URL: https://www.youtube.com/live/D7BzTxVVMuw?t=19684s
[youtube] D7BzTxVVMuw: Downloading webpage
[youtube] D7BzTxVVMuw: Downloading tv client config
[youtube] D7BzTxVVMuw: Downloading player 5ae7d525
[youtube] D7BzTxVVMuw: Downloading tv player API JSON
[youtube] D7BzTxVVMuw: Downloading ios player API JSON
[youtube] D7BzTxVVMuw: Downloading m3u8 information
[info] D7BzTxVVMuw: Downloading 1 format(s): 243+251
[download] Destination: downloaded_video.f243.webm
[download] 100% of  416.05MiB in 00:00:20 at 20.44MiB/s  Download finished, saved as downloaded_video.f243.webm

[download] Destination: downloaded_video.f251.webm
[download] 100% of  397.36MiB in 00:00:09 at 43.77MiB/s  Download finished, saved as downloaded_video.f251.webm

[Merger] Merging formats into "downloaded_video.webm"
Deleting original file downloaded_video.f251.webm (pass -k to keep)
Deleting original file downloaded_video.f243.webm (pass -k to keep)
downloaded_video.f251.webm


In [None]:
# extract a short video to check if fine
!ffmpeg -ss 00:10:00 -to 00:13:00 -i downloaded_video.webm -c copy output.webm

In [None]:
if not downloaded_file and UPLOADED_FILE:
  downloaded_file = UPLOADED_FILE

In [None]:
# Step 1: Extract audio from the downloaded video (audio-only extraction)
original_audio = "original_audio.mp3"
ffmpeg_extract_audio_cmd = [
    'ffmpeg', '-i', downloaded_file,
    '-q:a', '0', '-map', 'a',
    original_audio
]
subprocess.run(ffmpeg_extract_audio_cmd)

## CompletedProcess(args=['ffmpeg', '-i', '/content/downloaded_video.mkv', '-q:a', '0', '-map', 'a', 'original_audio.mp3'], returncode=0)


In [9]:
# Step 2: Speed up the extracted audio by 1.2x using the atempo filter
spedup_audio = "spedup_audio.mp3"
ffmpeg_speedup_audio_cmd = [
    'ffmpeg', '-i', original_audio,
    '-filter:a', "atempo=1.2",
    spedup_audio
]
subprocess.run(ffmpeg_speedup_audio_cmd)

## CompletedProcess(args=['ffmpeg', '-i', 'original_audio.mp3', '-filter:a', 'atempo=1.2', 'spedup_audio.mp3'], returncode=1)

CompletedProcess(args=['ffmpeg', '-i', 'original_audio.mp3', '-filter:a', 'atempo=1.2', 'spedup_audio.mp3'], returncode=0)

In [None]:
# Step 3: Split the sped-up audio into 1-minute segments, saved in the dedicated folder
split_audio_cmd = [
    'ffmpeg', '-i', spedup_audio,
    '-f', 'segment', '-segment_time', '60', '-c', 'copy',
    os.path.join(audio_segments_folder, 'audio_segment%03d.mp3')
]
subprocess.run(split_audio_cmd)

## CompletedProcess(args=['ffmpeg', '-i', 'spedup_audio.mp3', '-f', 'segment', '-segment_time', '60', '-c', 'copy', 'audio_segments/audio_segment%03d.mp3'], returncode=0)


In [None]:
# Step 4: Load the Whisper model and transcribe each audio segment,
# writing the transcriptions (with timestamps) into a text file.
model = whisper.load_model("base")  # Options: "base", "small", "medium", "large"

with open(transcription_output, 'w') as outfile:
    # Get a sorted list of audio segment files from the folder
    audio_segments = sorted(glob.glob(os.path.join(audio_segments_folder, "audio_segment*.mp3")))

    for seg in audio_segments:
        outfile.write(f"Transcription for {seg}:\n")
        print(f"Transcribing {seg}...")
        result = model.transcribe(seg)

        # Write the overall transcription text
        outfile.write("Full Transcription:\n")
        outfile.write(result["text"] + "\n")

        # Write each segment with timestamps
        outfile.write("Timestamps and Text Segments:\n")
        for s in result.get("segments", []):
            start = s.get("start")
            end = s.get("end")
            text = s.get("text")
            outfile.write(f"{start:6.2f}s to {end:6.2f}s: {text}\n")
        outfile.write("-" * 50 + "\n\n")

print(f"Transcriptions have been written to {transcription_output}")

In [None]:
all_text = ""
capture = False
with open(transcription_output, 'r') as infile:
    for line in infile:
        stripped = line.strip()
        # Start capturing when we hit the "Full Transcription:" header.
        if stripped == "Full Transcription:":
            capture = True
            continue
        # Stop capturing when the timestamps section starts.
        if stripped == "Timestamps and Text Segments:":
            capture = False
        # Append captured lines.
        if capture and stripped:
            all_text += stripped + " "
# print(all_text)


In [4]:
# check num of words
len(all_text.split())

In [None]:
from openai import OpenAI

client = OpenAI(
    api_key=GEMINI_API_KEY,
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

In [None]:
SEGMENT_EXTRACTION_PROMPT= """Your task is to analyze the provided text and divide it into coherent segments.
This text is transcribed from a video and may contain random sounds or incoherent phrases which could not be understood properly.
The number of segments to be made is left to your judgement but it should not exceed 20.
For each segment, please generate a JSON object containing the following keys:

1. "segment_number": Provide the serial number of the segment.
2. "short_title": Provide a short title for this segment . max 5 to 7 words
3. "summary": Provide a concise summary of the segment's key ideas. You must explain the tricks and ideas discussed in a concise manner such that a person reading the summary can understand the key elements discussed.
4. "justification": Explain why you believe this segment is valuable and worthy of attention.
5. "counter_intuitive": Identify any points that are counter-intuitive or surprising, and that are worth remembering.
6. "topics": List topics or tags related to this segment that could be explored further to enhance understanding.
7. "questions": Generate a set of questions that can test the user's understanding of the segment.

The final output should be a valid JSON object containing an array of segments. The JSON must strictly follow the format below:

{
  "segments": [
    {
      "segment_number": "<int num>",
      "short_title": "title of the segment",
      "summary": "A concise summary of the segment and the important ideas with a short explanation.",
      "justification": "The reason this segment is significant.",
      "counter_intuitive": "Key counter-intuitive or surprising points that are important to remember.",
      "topics": ["topic1", "topic2", "..."],
      "questions": ["Question 1?", "Question 2?", "..."]
    }
    // ... additional segments as needed
  ]
}

Ensure that the output is a valid JSON with no additional keys or formatting errors."""

In [None]:
messages_for_llm = []
messages_for_llm.append({"role": "system", "content": SEGMENT_EXTRACTION_PROMPT})
messages_for_llm.append({"role": "user", "content": f"The transcribed text is as follows: {all_text}"})

In [None]:
llm_kwargs = {"model": "gemini-2.0-flash",
                            "temperature": 0.2,
                            "response_format": {"type": "json_object"}
}

In [None]:
completion = client.chat.completions.create(
            messages=messages_for_llm,
            **llm_kwargs
        )

In [None]:
result_json = completion.choices[0].message.content

In [None]:
def trim_incomplete_json(json_str):
    stack = 0
    last_index = -1
    for i, char in enumerate(json_str):
        if char == '{':
            stack += 1
        elif char == '}':
            stack -= 1
            # When the stack is balanced, record this index
            if stack == 0:
                last_index = i
    # If we found a valid end, slice the string up to that point.
    if last_index != -1:
        return json_str[:last_index+1]
    return json_str  # return original if no valid end found

In [None]:
import json
with open('transcription_result.json', 'w') as fp:
    json.dump(json.loads(result_json), fp)

In [None]:
QUESTION_ANSWER_PROMPT = """You are provided with a list of questions related to the transcribed text from a video.
For each question, please provide a concise answer that captures the essence of the concept strictly from the content discussed in the text.
DO NOT make up answers on your own.
Your output must be valid JSON in the following format:

{
  "answers": [
    {
      "question": "Question 1?",
      "answer": "Concise answer for question 1."
    },
    {
      "question": "Question 2?",
      "answer": "Concise answer for question 2."
    }
    // ... more question-answer pairs as needed
  ]
}

Do not include any additional keys or commentary.
Make sure that each answer is short, clear, and directly addresses the question in an understandable way.
Please provide your answers in the specified JSON format.
"""

In [None]:
import json
import os
import time


# Placeholder function: Replace with your actual LLM API call
def call_llm_api(messages_for_llm: list[dict]) -> dict:
    """
    This function should call your LLM with the given prompt and return its response as a string.
    For demonstration, we simulate a response.
    """
    parsed_json = None
    result_json = None
    completion = client.chat.completions.create(
                messages=messages_for_llm,
                **llm_kwargs
            )
    result_json = completion.choices[0].message.content
    fixed_result = trim_incomplete_json(result_json)
    try:
        parsed_json = json.loads(fixed_result)
        print("Successfully parsed JSON!")
    except json.JSONDecodeError as e:
        print("Still not valid JSON:", e)
    if parsed_json:
      return parsed_json
    else:
      return result_json

# Define the prompt template for extracting answers
def build_answer_prompt(system_prompt, transcribed_text, questions):
    user_input = f"The transcribed text is as follows: {transcribed_text}\n\n"
    user_input += f"The Questions to be answered are as follows: {questions}"
    messages_for_llm = []
    messages_for_llm.append({"role": "system", "content": system_prompt})
    messages_for_llm.append({"role": "user", "content": user_input})

    return messages_for_llm

# List to hold final question-answer pairs
final_answers = []

parsed_json = json.loads(result_json)

# Iterate over each segment in the parsed JSON
for segment in parsed_json.get("segments", []):
    segment_num = segment.get("segment_number")
    questions = segment.get("questions", [])
    if not questions:
        continue

    # Build the prompt for the current set of questions.
    prompt = build_answer_prompt(QUESTION_ANSWER_PROMPT , all_text, questions)

    # Call the LLM API to get the answers for this segment
    llm_response = call_llm_api(prompt)
    if llm_response:

        try:

            # Iterate over the answers and add them to the final list
            for qa in llm_response.get("answers", []):
                final_answers.append({
                    "segment": segment_num,
                    "question": qa.get("question", ""),
                    "answer": qa.get("answer", "")
                })
        except json.JSONDecodeError as e:
            print("Error parsing LLM response:", e)
    time.sleep(3)

# Dump the final answers into a JSON file
output_file = "final_answers.json"
with open(output_file, "w") as f:
    json.dump(final_answers, f, indent=2)

print(f"All question-answer pairs have been saved in '{output_file}'.")

In [None]:
import json
import os
import textwrap
from pptx import Presentation
from pptx.util import Inches, Pt

# Load your JSON data (adjust file names as needed)
with open("/content/transcription_result.json", "r") as f:
    segment_data = json.load(f)["segments"]

with open("final_answers.json", "r") as f:
    answers_data = json.load(f)

# Group Q&A pairs by segment_number
answers_by_segment = {}
for qa in answers_data:
    seg_num = qa["segment"]
    answers_by_segment.setdefault(seg_num, []).append(qa)

# Create a new PowerPoint presentation
prs = Presentation()

# Helper function to add wrapped text to a textbox
def add_wrapped_paragraph(text_frame, text, font_size=Pt(20), bold=False, bullet=False):
    # Enable word wrapping
    text_frame.word_wrap = True
    p = text_frame.add_paragraph()
    p.text = text
    p.font.size = font_size
    p.font.bold = bold
    if bullet:
        p.level = 0
    return p

for seg in segment_data:
    seg_num = seg.get("segment_number")

    # --- Slide 1: Title Slide ---
    slide1 = prs.slides.add_slide(prs.slide_layouts[5])  # Blank layout
    title_box = slide1.shapes.add_textbox(Inches(0.5), Inches(0.5), Inches(9), Inches(1.5))
    tf_title = title_box.text_frame
    tf_title.word_wrap = True
    short_title = seg.get("short_title", f"Segment {seg_num}")
    add_wrapped_paragraph(tf_title, short_title, font_size=Pt(36), bold=True)

    # --- Slide 2: Summary, Counter-Intuitive Points & Topics ---
    slide2 = prs.slides.add_slide(prs.slide_layouts[5])
    summary_box = slide2.shapes.add_textbox(Inches(0.5), Inches(0.5), Inches(9), Inches(5))
    tf_summary = summary_box.text_frame
    tf_summary.word_wrap = True
    add_wrapped_paragraph(tf_summary, "Summary & Key Points", font_size=Pt(32), bold=True)

    summary_text = f"Summary: {seg.get('summary', '')}\n\n"
    summary_text += f"Counter-Intuitive Points: {seg.get('counter_intuitive', '')}\n\n"
    topics = seg.get('topics', [])
    summary_text += f"Topics to Explore: {', '.join(topics)}"
    add_wrapped_paragraph(tf_summary, summary_text, font_size=Pt(20))

    # --- Slide 3: Q&A Slide ---
    slide3 = prs.slides.add_slide(prs.slide_layouts[5])
    qa_box = slide3.shapes.add_textbox(Inches(0.5), Inches(0.5), Inches(9), Inches(5))
    tf_qa = qa_box.text_frame
    tf_qa.word_wrap = True
    add_wrapped_paragraph(tf_qa, "Q&A", font_size=Pt(32), bold=True)

    qa_list = answers_by_segment.get(seg_num, [])
    for qa in qa_list:
        question = qa.get("question", "")
        answer = qa.get("answer", "")
        qa_text = f"Q: {question}\nA: {answer}\n"
        add_wrapped_paragraph(tf_qa, qa_text, font_size=Pt(20), bullet=True)


# Save the presentation to a PPTX file
pptx_file = "presentation.pptx"
prs.save(pptx_file)
print(f"Presentation saved as {pptx_file}")

In [None]:
pdf_file = pptx_file.replace(".pptx", ".pdf")
try:
    # Run LibreOffice headless conversion
    conversion = subprocess.run(
        ["libreoffice", "--headless", "--convert-to", "pdf", pptx_file],
        check=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE
    )
    print("LibreOffice output:", conversion.stdout.decode("utf-8"))
    if os.path.exists(pdf_file):
        print(f"Presentation successfully converted to PDF: {pdf_file}")
    else:
        print("PDF conversion did not produce output. Please ensure LibreOffice is installed and in your PATH.")
except subprocess.CalledProcessError as e:
    print("PDF conversion failed:", e.stderr.decode("utf-8"))