In [8]:
import os
import openai
import langchain as lc
from langchain.document_loaders import SRTLoader
from dotenv import load_dotenv
import srt
import codecs
import tiktoken
from IPython.display import clear_output

# Load environment variables
load_dotenv()

openai.api_key = os.environ.get("OPENAI_API_KEY")

#model_name = "gpt-3.5-turbo-16k"
model_name = "gpt-4"

In [9]:
def num_tokens_from_string(string: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.encoding_for_model(model_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

### Creating video chapters

In [15]:
def get_completion(prompt):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model= model_name,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

def process_srt_file(file_path):
    # Read and parse SRT file
    with codecs.open(file_path, 'r', encoding='utf-8-sig') as f:
        file_content = f.read()
        subtitle_generator = srt.parse(file_content)
        subtitles = list(subtitle_generator)
    
    result = ""
    chunk = ""
    for sub in subtitles:
        chunk += str(sub.start) + ">" + str(sub.end.total_seconds) + "\\n" + sub.content
        if num_tokens_from_string(chunk) > 7000:
            prompt = "Below is a part of a video transcript. You need to split the video into five topic chapters. The chapters will be used to navigate in the larger video timeline to let watchers switch between topics. Read the entire transcript. Once done reading, split it into chapters. Provide the list of chapters in this format [HH:MM:SS Chapter Name]. Put each chapter in a separate line in plain text. Match the transcript language in the output.\\n\\n" + chunk
            completion = get_completion(prompt)
            result += completion
            clear_output(wait=True)
            print(completion)
            chunk = ""        

    return result

final_output = process_srt_file("C:\\Users\\daronyondem\\Downloads\\2023-09-09_12.13.srt")
with open('chapters.txt', 'w', encoding='utf-8') as f:
    f.write(final_output)

0:43:15.619000 Yeni İcatlar ve Roket Sayısı
0:43:34.270000 Yapay Zeka ve İnsan İlişkisi
0:44:00.469000 Mülakat Soruları ve Cevapları
0:44:37.310000 Teknoloji ve İnsan Farkı
0:45:53.290000 Siri ve Apple Güncellemeleri


### Creating video summary

In [16]:
def get_completion(prompt):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model= model_name,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

def process_srt_file(file_path):
    # Read and parse SRT file
    with codecs.open(file_path, 'r', encoding='utf-8-sig') as f:
        file_content = f.read()
        subtitle_generator = srt.parse(file_content)
        subtitles = list(subtitle_generator)
    
    result = ""
    chunk = ""
    for sub in subtitles:
        chunk += str(sub.start) + ">" + str(sub.end.total_seconds) + "\\n" + sub.content
        if num_tokens_from_string(chunk) > 7000:
            prompt = "Below is a part of a video transcript. Your goal is to summarize the entire video. You need to create the shortest summary of this section as possible that you will combine with other sections to create the full summary of the entire video.\\n\\n" + chunk
            completion = get_completion(prompt)
            result += completion
            clear_output(wait=True)
            print(completion)
            chunk = ""        

    return result

final_output = process_srt_file("C:\\Users\\daronyondem\\Downloads\\2023-09-09_12.13.srt")
with open('summaries.txt', 'w', encoding='utf-8') as f:
    f.write(final_output)

The speaker discusses the potential risks and ethical issues associated with artificial intelligence and data sharing. They mention the importance of data isolation and caution when using AI, especially in a business context. They also discuss the potential for misuse of data, such as personal or financial information, if it falls into the wrong hands. The speaker suggests that companies like Apple and Samsung need to be careful with how they use and share data.


In [19]:
def get_completion(prompt):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model= model_name,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

def process_srt_file(file_path):
    content = ""
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    result = ""
    prompt = "Below is a series of summaries created out of different section of a video recording. The video is published on Youtube. Provide 10 TUrkish title alternatives and a single Turkish summary for the video. Both title and summary should be inviting and helpful to watchers. \\n\\n" + content
    result = get_completion(prompt)

    return result

final_output = process_srt_file("summaries.txt")
with open('title-description.txt', 'w', encoding='utf-8') as f:
    f.write(final_output)