

---

# **Install Depedencies**

In [1]:
pip install streamlit youtube-transcript-api pytube sentence-transformers transformers torch scikit-learn numpy pandas nltk


Collecting streamlit
  Downloading streamlit-1.48.0-py3-none-any.whl.metadata (9.5 kB)
Collecting youtube-transcript-api
  Downloading youtube_transcript_api-1.2.2-py3-none-any.whl.metadata (24 kB)
Collecting pytube
  Downloading pytube-15.0.0-py3-none-any.whl.metadata (5.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (fr

In [2]:
pip install keybert

Collecting keybert
  Downloading keybert-0.9.0-py3-none-any.whl.metadata (15 kB)
Downloading keybert-0.9.0-py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.4/41.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: keybert
Successfully installed keybert-0.9.0


In [3]:
import nltk
nltk.download("punkt")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

# **Main App.py**

In [4]:
%%writefile app.py
import streamlit as st
from pytube import YouTube
from youtube_transcript_api import YouTubeTranscriptApi
from transformers import pipeline


summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1)  # device=-1 means CPU


def get_video_id(url):
    if "v=" in url:
        return url.split("v=")[1].split("&")[0]
    elif "youtu.be" in url:
        return url.split("/")[-1]
    return None

def fetch_transcript(video_id):
    try:
        api = YouTubeTranscriptApi()
        transcript_list = api.fetch(video_id)
        text = " ".join([entry.text for entry in transcript_list])
        return text
    except Exception as e:
        st.error(f"Error fetching transcript: {e}")
        return None

def segment_text(text, max_chunk_size=500):
    words = text.split()
    chunks, current_chunk = [], []
    count = 0
    for word in words:
        current_chunk.append(word)
        count += 1
        if count >= max_chunk_size:
            chunks.append(" ".join(current_chunk))
            current_chunk = []
            count = 0
    if current_chunk:
        chunks.append(" ".join(current_chunk))
    return chunks

def summarize_chunks(chunks):
    summaries = []
    for chunk in chunks:
        summary = summarizer(chunk, max_length=100, min_length=30, do_sample=False)
        summaries.append(summary[0]['summary_text'])
    return summaries

st.title("🎥 YouTube Video Summarizer")

url = st.text_input("Enter YouTube Video URL")

if st.button("Generate Summary"):
    if url:
        with st.spinner("Fetching transcript..."):
            video_id = get_video_id(url)
            transcript = fetch_transcript(video_id)
        if transcript:
            st.subheader("Transcript Segments & Summaries")
            chunks = segment_text(transcript)
            summaries = summarize_chunks(chunks)
            for i, (chunk, summary) in enumerate(zip(chunks, summaries)):
                with st.expander(f"Segment {i+1}"):
                    st.write("**Original Transcript:**")
                    st.write(chunk)
                    st.write("**Summary:**")
                    st.write(summary)
    else:
        st.warning("Please enter a valid YouTube URL.")


Writing app.py


In [5]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.3.0-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.3.0


In [6]:
!pip install streamlit



In [7]:
!ngrok config add-authtoken 2y1h90IDQ09WkydpMkH0V1uwDoY_48ETKKBgkK1u6cGDHcwm2

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [8]:
from pyngrok import ngrok
import subprocess
import threading
import time

ngrok.kill()
def run_streamlit():
    subprocess.call(["streamlit", "run", "app.py", "--server.port=8503",
                     "--server.headless=true", "--server.enableCORS=false"])
thread = threading.Thread(target=run_streamlit)
thread.start()
time.sleep(5)

public_url = ngrok.connect(8503)
print(f"Streamlit Application Link: {public_url}")

Open your app here: NgrokTunnel: "https://0bf3672c9a10.ngrok-free.app" -> "http://localhost:8503"


In [9]:
ngrok.kill()

In [None]:
!pip uninstall youtube-transcript-api -y
!pip install youtube-transcript-api



Found existing installation: youtube-transcript-api 1.2.2
Uninstalling youtube-transcript-api-1.2.2:
  Successfully uninstalled youtube-transcript-api-1.2.2
Collecting youtube-transcript-api
  Using cached youtube_transcript_api-1.2.2-py3-none-any.whl.metadata (24 kB)
Using cached youtube_transcript_api-1.2.2-py3-none-any.whl (485 kB)
Installing collected packages: youtube-transcript-api
Successfully installed youtube-transcript-api-1.2.2


In [None]:
from youtube_transcript_api import YouTubeTranscriptApi
print(dir(YouTubeTranscriptApi))


['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'fetch', 'list']
