In [None]:
! pip install openai --upgrade
! pip install langchain --upgrade
! pip install python-dotenv
! pip install srt
! pip install codecs
! pip install tiktoken

In [7]:
import os
import openai
import langchain as lc
from langchain.document_loaders import SRTLoader
from dotenv import load_dotenv
import srt
import codecs
import tiktoken
from IPython.display import clear_output

# Load environment variables
load_dotenv()

openai_api_key = os.environ.get("OPENAI_API_KEY")

if not openai_api_key:
    raise ValueError("OPENAI_API_KEY not found in environment variables.")

openai.api_key = openai_api_key

GPT_MODEL_NAME = "gpt-4"

In [14]:
from openai import OpenAI

client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"),
)

def num_tokens_from_string(string: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding("cl100k_base")
    num_tokens = len(encoding.encode(string))
    return num_tokens

def get_completion(prompt, model=GPT_MODEL_NAME):
    response = client.chat.completions.create(
        temperature=0, 
        model=GPT_MODEL_NAME, 
        messages=[
            {"role": "system", "content": "You are a helpful assistant with expertise in english language reading and writing. You are an expert copywriter."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

def write_to_file(content: str, file_path: str):
    """Write content to a file."""
    with open(file_path, 'w', encoding='utf-8') as f:
        f.write(content)

def read_srt_file(file_path: str):
    """Read and parse an SRT file."""
    with codecs.open(file_path, 'r', encoding='utf-8-sig') as f:
        return list(srt.parse(f.read()))

### Audio download from Youtube

In [None]:
from pytube import YouTube
from pydub import AudioSegment
import subprocess
import os

# Improved: Added error handling and streamlined audio format

def download_video(youtube_link: str, output_filename: str = "stream"):
    """Downloads the video stream from a YouTube video."""
    try:
        youtube = YouTube(youtube_link)
        # Selecting the best audio stream
        video_stream = youtube.streams.first()
        if not video_stream:
            raise Exception("No video stream found in the YouTube video.")

        # Downloading and saving the stream
        video_stream.download(filename=f"{output_filename}.mp4")

    except Exception as e:
        print(f"Error downloading video: {e}")

# YouTube video URL
url = 'https://www.youtube.com/watch?v=LUk7INpXM5M'

download_video(url)

### Extract Audio from Video

In [None]:
! pip install moviepy

In [None]:
video_file_name = "stream"

In [None]:
from moviepy.editor import VideoFileClip

def extract_audio(video_file: str, audio_file: str):
    """Extracts audio from a video file and saves it as an audio file."""
    try:
        with VideoFileClip(video_file) as video:
            audio = video.audio
            audio.write_audiofile(audio_file)
    except Exception as e:
        print(f"Error extracting audio: {e}")

# Usage
extract_audio(f"{video_file_name}.mp4", "output_audio.mp3")


### Whisper Transcription

In [None]:

! pip uninstall whisper
! pip install git+https://github.com/openai/whisper.git
! pip install pydub 
! pip install ffmpeg

In [None]:
import torch

is_cuda_available = torch.cuda.is_available()
print(f"CUDA Available: {is_cuda_available}")


In [None]:
import whisper
from whisper.utils import get_writer
from pydub import AudioSegment

model = whisper.load_model("medium")
audio = "output_audio.mp3"
result = model.transcribe(audio)
output_directory = "./"

# Save as an SRT file
try:
    srt_writer = get_writer("srt", output_directory)
    srt_writer(result, audio)
    print(f"SRT file saved.")
except Exception as e:
    print(f"Error saving SRT file: {e}")


### Creating video chapters

In [25]:
def process_subtitles(subtitles, model_name: str):
    """Process subtitles and split them into chapters."""
    result, chunk = "", ""
    for sub in subtitles:
        chunk += f"{sub.start}>{sub.end.total_seconds()}\n{sub.content}"
        if num_tokens_from_string(chunk) > 7000:
            result += split_into_chapters(chunk, model_name)
            chunk = ""        
    if chunk:
        result += split_into_chapters(chunk, model_name)
    return result

def split_into_chapters(chunk: str, model_name: str):
    """Split the transcript chunk into chapters."""
    prompt = ("Below is a part of a video transcript. You need to split the video "
              "into five topic chapters. The chapters will be used to navigate in the "
              "larger video timeline to let watchers switch between topics. Read the "
              "entire transcript. Once done reading, split it into chapters. Provide "
              "the list of chapters in this format [HH:MM:SS Chapter Name in Turkish]. Put each "
              "chapter in a separate line in plain text using the transcript language. \n\n" + chunk)
    return get_completion(prompt, model_name)


subtitles = read_srt_file("output_audio.srt")
final_output = process_subtitles(subtitles, GPT_MODEL_NAME)
write_to_file(final_output, 'chapters-iac.txt')

### Creating video summary

In [26]:
def create_summary(transcript: str, model_name: str):
    """Create a summary of the given transcript."""
    prompt = ("Below is a video transcript. Your goal is to summarize the "
              "entire video. You need to create the shortest summary as possible "
              " that will help a reader understand the information given in the video. "
              "Your summary should be in Turkish.\n\n" + transcript)
    return get_completion(prompt, model_name)

subtitles = read_srt_file("output_audio.srt")
transcript_text = "\n\n".join([f"{sub.content}" for sub in subtitles])
final_output = create_summary(transcript_text, GPT_MODEL_NAME)
write_to_file(final_output, 'summaries.txt')

### Title Creator

In [27]:
def process_srt_file(file_path: str, model_name: str):
    """Process an SRT file to generate Turkish title alternatives and a summary."""
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    prompt = ("Below is a series of summaries created out of different sections "
              "of a video recording. The video is published on YouTube. Provide "
              "10 Turkish title alternatives and a single Turkish summary for the "
              "video. Both title and summary should be inviting and helpful to "
              "watchers.\n\n" + content)
    
    return get_completion(prompt, model_name)

final_output = process_srt_file("summaries.txt", GPT_MODEL_NAME)
with open('title-description.txt', 'w', encoding='utf-8') as f:
    f.write(final_output)