In [2]:
#connection to my google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!pip install yt-dlp

Collecting yt-dlp
  Downloading yt_dlp-2025.7.21-py3-none-any.whl.metadata (175 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/175.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m174.1/175.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.4/175.4 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading yt_dlp-2025.7.21-py3-none-any.whl (3.3 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━[0m [32m2.1/3.3 MB[0m [31m63.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m52.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: yt-dlp
Successfully installed yt-dlp-2025.7.21


In [4]:
#Import libraries
import cv2
import subprocess
import numpy as np
import os
from pathlib import Path
import json
from skimage.metrics import structural_similarity as ssim

In [5]:
#In this section, I will create a dictionary that indicates
#the intervals during which each slide appears in the presentation.


# Loading the slides
presentation_path = Path("/content/drive/My Drive/ProjectSemester1/slides")
slides = {}

slide_filenames = sorted(
    [f.name for f in presentation_path.iterdir() if f.name.startswith("slide_") and f.suffix == ".png"],
    key=lambda x: int(x.split("_")[1].split(".")[0])
)

for slide_filename in slide_filenames:
    slide = cv2.imread(presentation_path / slide_filename, cv2.IMREAD_GRAYSCALE)
    slides[slide_filename] = slide

slide_list = [slides[slide_filename] for slide_filename in slide_filenames]

#Retrieving the video streaming URL
video_url = "https://www.youtube.com/watch?v=e_a7C2KtJY4"
command = f'yt-dlp -f best -g "{video_url}"'
video_stream_url = subprocess.check_output(command, shell=True, encoding="utf-8").strip()

#Open the video
cap = cv2.VideoCapture(video_stream_url)
if not cap.isOpened():
    print("problem in the video")
    exit()

try:
  frame_rate = cap.get(cv2.CAP_PROP_FPS)
  if not frame_rate or frame_rate <= 0:
      frame_rate = 30

  ret, first_frame = cap.read()
  if not ret:
      exit()

  frame_height, frame_width = first_frame.shape[:2]

  # Resize slides
  resized_slide_list = [cv2.resize(slide, (frame_width, frame_height), interpolation=cv2.INTER_AREA)
                        for slide in slide_list]

  check_interval = int(frame_rate * 10)  #Check every 10 seconds
  frame_count = 0

  video_time = (35 * 60) + 17  #Start time in this lesson
  cap.set(cv2.CAP_PROP_POS_MSEC, video_time * 1000)

  #Let's say the lecture starts on the first page, that's a reasonable assumption.
  current_slide_index = 1
  slide_intervals = {slide_name: [] for slide_name in slide_filenames}
  current_interval_start = video_time

  while cap.isOpened():
      ret, frame = cap.read()
      if not ret:
          break
  #Every ten seconds we will check to see if the page has been updated.
  #If the page has changed, we will close the interval and open a new one for the new page.

      if frame_count % check_interval == 0:
          gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

          best_match_index = current_slide_index
          best_score = -1
  #Lets assume that the page currently displayed is relatively close to the page that was before it. This assumption is true
  # for the majority of lectures because it is unlikely that many pages were changed within 10 seconds.
  #This assumption will help shorten the processing time.
          start_index = max(0, current_slide_index - 3)
          end_index = min(len(resized_slide_list) - 1, current_slide_index + 5)

          for i in range(start_index, end_index + 1):
              #Resizing the images causes the images to not be exactly the same at the pixel level.
              #Probably happens because of interpolations.
              # As a result, MSE results in lower accuracy than SSIM.
              score = ssim(gray_frame, resized_slide_list[i])
              if score > best_score:
                  best_score = score
                  best_match_index = i

          if best_match_index != current_slide_index:
              # End of previous slide interval
              prev_slide = slide_filenames[current_slide_index]
              slide_intervals[prev_slide].append((current_interval_start, video_time))

              # Update
              current_slide_index = best_match_index
              current_interval_start = video_time

      frame_count += 1
      video_time += 1 / frame_rate

  # saving last interaval
  if current_interval_start is not None:
      final_slide = slide_filenames[current_slide_index]
      slide_intervals[final_slide].append((current_interval_start, video_time))
finally:
  cap.release()

output_path = Path("/content/drive/My Drive/ProjectSemester1/slide_intervals.json")
with output_path.open("w", encoding="utf-8") as f:
    json.dump(slide_intervals, f, ensure_ascii=False, indent=4)



In [6]:
#In this section, I decoded the subtitle file and turned it into a list
#of dictionaries containing the words, start time, end time, and what was said.
srt_file_name = "Intro to CS, Python Edition - Lecture 8a.iw.srt"
presentation_path = Path("/content/drive/My Drive/ProjectSemester1")
srt_file_path = presentation_path / srt_file_name

def srt_time_to_seconds(time_str: str) -> float:
    from datetime import datetime
    dt = datetime.strptime(time_str, "%H:%M:%S,%f")
    return dt.hour * 3600 + dt.minute * 60 + dt.second + dt.microsecond / 1e6

subtitles_intervals = []
if not srt_file_path.exists():
    print("why?!")
    exit()

with srt_file_path.open("r", encoding="utf-8") as f:
    srt_content = f.read()

transcription_blocks = srt_content.strip().split("\n\n")
for block in transcription_blocks:
    lines = block.splitlines()
    if len(lines) >= 3:
        time_line = lines[1]
        times = time_line.split(" --> ")
        if len(times) == 2:
            start_time = srt_time_to_seconds(times[0].strip())
            end_time = srt_time_to_seconds(times[1].strip())
            text = " ".join(lines[2:])
            subtitles_intervals.append({
                "start": start_time,
                "end": end_time,
                "text": text
            })
print(subtitles_intervals)

[{'start': 0.0, 'end': 30.0, 'text': "את זה פה ואם זה לא אז או שאני מגביר נעם או שאני מגביר שגיאה. שלב ג' נוסף, יהיו פה הרבה שלבי ג' זה להתחיל את המטריצה,סליחה, להתחיל את המכפלה, להכניס את המכפלה ב-AX וזה רק הסוסים. ועכשיו, שלב ג' הבא, אחרי השני שני שטרבעי ג' לנו את הדמים.זה את זה פה ואם זהלא אז או שאני מגביר נעם או שאני מגביר שגיאה. על גביע שני (2x2)"}, {'start': 30.0, 'end': 41.78, 'text': 'אני אפילו פשוט בלולאות לעסוק את הדבר הזה, כן, אני נברס בלולאות על כל העמודות והשורות ומחשב את התוצאה האפות.'}, {'start': 43.74, 'end': 54.36, 'text': 'אוקיי, אז אפשר לעשות את זה בלולאות ככה, אבל תרגיל טוב, בשבילכם לתרגל כמה שיותר ולעשות את זה עוד פעם עם ליסטומבר הןשן.'}, {'start': 54.36, 'end': 66.42, 'text': 'ואפשר לעשות את זה ככה, זו הדרך שאתם הייתם עושים את זה, אז בואו נכתוב את זה רגע לאט לאט בפה רגע.'}, {'start': 66.42, 'end': 83.32, 'text': 'או נדבר על זה לפחות. אז האיבר, האיי, רגע.'}, {'start': 83.32, 'end': 113.3, 'text': 'אוקיי, אז זה שווה לסכום, איי, איי, אוקיי, וכל מה שיש פה, כן?'}, {'st

In [7]:
#Match the intervals of the transcriptions to the intervals of the pages.

presentations_words = { slide_name: [] for slide_name in slides.keys() }

for subtitle in subtitles_intervals:
    sub_start = subtitle["start"]
    sub_end = subtitle["end"]
    words = (subtitle["text"])

    for slide_name, intervals in slide_intervals.items():
        for (int_start, int_end) in intervals:
            if sub_start < int_end and sub_end > int_start:
                presentations_words[slide_name].append(words)
                break

#for slide_name in presentations_words:
#    presentations_words[slide_name] = sorted(list(presentations_words[slide_name]))


output_directory = Path("/content/drive/My Drive/ProjectSemester1/words_to_presentationsNew")
output_directory.mkdir(parents=True, exist_ok=True)

output_file = output_directory / "presentations_words.json"
with output_file.open("w", encoding="utf-8") as f:
    json.dump(presentations_words, f, ensure_ascii=False, indent=4)
print("The dictionary is saved.", output_file)


The dictionary is saved. /content/drive/My Drive/ProjectSemester1/words_to_presentationsNew/presentations_words.json
