<font color='#00AED3'>

# EduCreate | Minimum Viable Product - Extension Audio</font>



# Setup and Import Modules

In [None]:
%%capture
# Install required modules and packages
%pip install --upgrade --quiet openai yt_dlp tiktoken pydub

In [None]:
# Import modules and packages
import locale
from pprint import pprint
import os

from google.colab import userdata
from openai import OpenAI

import yt_dlp
import tiktoken
from pydub import AudioSegment

locale.getpreferredencoding = lambda: "UTF-8"

client = OpenAI(api_key = userdata.get("OPENAI_API_KEY_W210"))
openai_model = "gpt-4o-mini"
openai_audio_model = "whisper-1"

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


<font color='#00AED3'>

# INPUT: User Requirements </font>

In [None]:
## REFERENCE TO YOUTUBE URL
# 1. Enter YouTube url
load_youtube_path = 'https://youtu.be/Q-mkVSasZIM?si=Qe5LI1aJKRfvPlTP'

## OR

## REFERENCE TO TEACHER'S RECORDED LESSON
# 1. Enter path to saved file of recorded lesson
load_video_path = '/content/drive/My Drive/Colab Notebooks/W210_Capstone/W205_Presentation_EO.mp4'


# 2. Enter Instructions
user_prompt = """
Please summarise the key points from the lesson and in a useful order for students who missed this live lesson.
Please also suggest some questions (and answers) for follow up work by the students.
"""


# Set choice for using YouTube or Recorded Lesson
is_load_youtube = True

if is_load_youtube:
  load_path = load_youtube_path
else:
  load_path = load_video_path

# Functions to Extract Audio from Video

In [None]:
# Function to extract audio from any YouTube video
def extract_audio_from_yt(youtube_path, output_path):
  ydl_opts = {
      'format': 'bestaudio/best',
      'postprocessors': [{
          'key': 'FFmpegExtractAudio',
          'preferredcodec': 'mp3',
          'preferredquality': '192',}],
      'outtmpl': output_path
      }

  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([youtube_path])

# Function to extract audio from any video
def extract_audio_from_video(video_path, audio_output_path):
  video = AudioSegment.from_file(video_path, format="mp4")
  video.export(audio_output_path, format="mp3")


# Function to split audio into defined chunks with defined overlap
def split_audio(audio_path, chunk_length_minutes=5, overlap_seconds=5):
  audio = AudioSegment.from_file(audio_path, format="mp3")
  # Convert minutes to milliseconds
  chunk_length_ms = chunk_length_minutes * 60 * 1000
  # Convert seconds to milliseconds
  overlap_ms = overlap_seconds * 1000

  chunks = []
  start = 0
  chunk_num = 0

  while start < len(audio):
    end = start + chunk_length_ms
    chunk = audio[start:end]
    chunk_name = f"{os.path.splitext(audio_path)[0]}_chunk{chunk_num}.mp3"
    chunk.export(chunk_name, format="mp3")
    chunks.append(chunk_name)
    chunk_num += 1
    start = end - overlap_ms

  return chunks

In [None]:
if is_load_youtube:
  audio_output_path = 'yt_audio'
  extract_audio_from_yt(load_path, audio_output_path)
  load_audio_path = '/content/' + audio_output_path + '.mp3'
else:
  audio_output_path = 'vid_audio.mp3'
  extract_audio_from_video(load_path, audio_output_path)
  load_audio_path = '/content/' + audio_output_path

[youtube] Extracting URL: https://youtu.be/Q-mkVSasZIM?si=Qe5LI1aJKRfvPlTP
[youtube] Q-mkVSasZIM: Downloading webpage
[youtube] Q-mkVSasZIM: Downloading ios player API JSON
[youtube] Q-mkVSasZIM: Downloading player 1f8742dc
[youtube] Q-mkVSasZIM: Downloading m3u8 information
[info] Q-mkVSasZIM: Downloading 1 format(s): 251
[download] Destination: yt_audio
[download] 100% of   10.27MiB in 00:00:00 at 26.33MiB/s  
[ExtractAudio] Destination: yt_audio.mp3
Deleting original file yt_audio (pass -k to keep)


# Transcription of Audio

In [None]:
# Transcribe the audio
audio_chunks = split_audio(load_audio_path)

full_transcription = ""

for chunk in audio_chunks:
  with open(chunk, "rb") as audio_file:
    transcription = client.audio.transcriptions.create(
        model=openai_audio_model,
        file=audio_file)
    full_transcription += transcription.text + "\n"

In [None]:
# Inspect transcription
pprint(full_transcription)

("Hi, I'm John Green. This is Crash Course World History, and today we're "
 'going to do some legitimate comp-civ, for those of you into that kind of '
 "thing. Stan, I can't help but feel that we have perhaps too many globes. "
 "That's better. Today we're going to learn about the horrible totalitarian "
 'Persians and the saintly, democracy-loving Greeks. But of course, we already '
 'know this story. There were some wars in which no one wore any shirts and '
 'everyone was reasonably fit. The Persians were bad, the Greeks were good, '
 "Socrates and Plato are awesome, the Persians didn't even philosophize, the "
 "West is the best. Go team! Yeah, well, no. Let's start with the Persian "
 'Empire, which became the model for pretty much all land-based empires '
 'throughout the world, except for, wait for it, the Mongols. Much of what we '
 'know about the Persians and their empire comes from an outsider writing '
 'about them, which is something we now call history. And one of the f

In [None]:
# Function to count tokens in a text
def count_tokens(text, model=openai_model):
  # Load the appropriate tokenizer for the model
  encoding = tiktoken.encoding_for_model(model)
  # Encode the text to get the tokens
  tokens = encoding.encode(text)
  # Return the number of tokens
  return len(tokens)

# Check if we can pass entire transcription to model or summarization required first
num_tokens = count_tokens(transcription.text)
print(f"Number of tokens: {num_tokens}")

if num_tokens < 128000:
  print("Passing entire transcription to model.")
else:
  print("Summarized transcription will be passed to the model. This may take a while.")
  summ_transcription = client.chat.completions.create(
      model=openai_model,
      messages=[
          {"role": "system", "content": """Summarize the text to be LESS THAN 25000 words."""},
          {"role": "user", "content": [{"type": "text", "text": f"Here is the text: {transcription.text}"}]}
          ],
      temperature=0)
  print(f"Summarization done. Number of tokens: {count_tokens(summ_transcription.choices[0].message.content)}")

Number of tokens: 460
Passing entire transcription to model.


# OUTPUT: Generation of Teacher Response

In [None]:
system_context = """
You are a high school History teacher and an expert in a broad range of History topics.
You will be provided with an audio transcription of the lesson taught by your fellow teaching colleague.
You will also be given some instructions by your teaching colleague on the help required.
Please ONLY provide your responses to the questions or instructions. DO NOT add responses such as 'Of Course, Certainly etc.'
Here are the instructions from your teaching colleague:""" + user_prompt

response = client.chat.completions.create(
    model=openai_model,
    messages=[
    {"role": "system", "content": system_context},
    {"role": "user", "content": [{"type": "text", "text": f"Here is the audio transcription of the lesson: {transcription.text}"}]}
    ],
    temperature=0)

In [None]:
print(response.choices[0].message.content)

**Key Points from the Lesson:**

1. **Corruption in Athenian Government**: The Athenian government was described as notoriously corrupt, deriving its power from imperialist beliefs rather than the will of its citizens.

2. **Socrates' Death**: Athens is credited with giving us Socrates, but it is important to note that they ultimately forced him to commit suicide, highlighting the contradictions in Athenian democracy.

3. **Persian Rule and the Peloponnesian War**: The lesson suggests that under Persian rule, the Greeks might have avoided the Peloponnesian War, which significantly weakened the Greek city-states and made them vulnerable to conquest by Alexander the Great.

4. **Consequences of War**: The aftermath of the Peloponnesian War led to bloody conflicts with the Persians and a long absence of democracy in Greece, lasting nearly two millennia.

5. **Philosophical Questions**: The lesson raises fundamental questions about the purpose of life, the organization of society, and the 