In [5]:
import pandas as pd
data = pd.read_csv("metaclass.csv")
texts = [text for text in data["text"]]


In [24]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

template = """You are a helpful AI assistant that helps to produce video timestamps along with the topics. 
Instructions:
You need to look into the provided data and summarize the main topics. Then you need to specify the timestamps.
The timestamps given to you is in seconds, you need to convert them into minutes. You should use the format as 
HM:SEC -- where H is the hourly timestamp and the M is the Minute timestamp.

Below are the texts and timestamps for the video.
{video_transcript} """ 

prompt = PromptTemplate(template=template,
                        input_variables=["video_transcript"])
llm = OpenAI()
chain = LLMChain(
    llm=llm, 
    prompt=prompt
)



In [7]:
import os
import chromadb
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_transformers import (
    LongContextReorder,
)
from langchain.chains import StuffDocumentsChain, LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

# Get embeddings.
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


# Create a retriever
retriever = Chroma.from_texts(texts, embedding=embeddings).as_retriever(
    search_kwargs={"k": 10}
)
query = "What can you tell me about the metaclass?"

# Get relevant documents ordered by relevance score
docs = retriever.get_relevant_documents(query)
docs

[Document(page_content=' So what are meta classes actually?', metadata={}),
 Document(page_content=' So if you just want to check what is the Metaclass here?', metadata={}),
 Document(page_content=' Metaclasses and how type has been used. So, these things will be covering in this previous video. So,', metadata={}),
 Document(page_content=' the Metaclass as the Metaclass, as we have defined the meta class in the definition of the class.', metadata={}),
 Document(page_content=' instances of meta. Now, let us see. So, as you can see the meta is the meta class and they have inherited the', metadata={}),
 Document(page_content=' So same way, a meta class is a class of a class. So, this meta class is', metadata={}),
 Document(page_content=' We have just used the Metaclass.', metadata={}),
 Document(page_content=' how we are going to see this how the like we are saying that type is a meta class of every class that we create,', metadata={}),
 Document(page_content=' a meta class. So, this is h

In [8]:
# Reorder the documents:
# Less relevant document will be at the middle of the list and more
# relevant elements at beginning / end.
reordering = LongContextReorder()
reordered_docs = reordering.transform_documents(docs)

# We prepare and run a custom Stuff chain with reordered docs as context.

# Override prompts
document_prompt = PromptTemplate(
    input_variables=["page_content"], template="{page_content}"
)
document_variable_name = "context"
llm = OpenAI()
stuff_prompt_override = """Given this text extracts:
-----
{context}
-----
Please answer the following question:
{query}"""
prompt = PromptTemplate(
    template=stuff_prompt_override, input_variables=["context", "query"]
)

# Instantiate the chain
llm_chain = LLMChain(llm=llm, prompt=prompt)
chain = StuffDocumentsChain(
    llm_chain=llm_chain,
    document_prompt=document_prompt,
    document_variable_name=document_variable_name,
)
chain.run(input_documents=reordered_docs, query=query)

'\n\nMetaclasses are classes of classes, and they are used to define the behavior of a class. They are also used to create instances of classes. Type is a metaclass of every class that is created, and it is used to define the attributes and methods of the class. Meta classes can be used to create instances of classes, and they are used to control the behavior of the class.'

In [27]:
#try summarize
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders.csv_loader import CSVLoader

loader = CSVLoader("metaclass.csv")
data = loader.load()

template = """Below are the texts generted from an youtube video. 
You need to create Chapters from the transcript. 
{transcript}
CREATE CHAPTERS"""

prompt = PromptTemplate(template=template,
                        input_variables=["transcript",],
                         )




In [33]:
from langchain.text_splitter import SpacyTextSplitter

text_splitter = SpacyTextSplitter(chunk_size=1000)

texts = text_splitter.split_documents(data)

Created a chunk of size 1238, which is longer than the specified 1000


In [35]:
from langchain.chains import MapReduceDocumentsChain

# Override prompts
document_prompt = PromptTemplate(
    input_variables=["page_content"], template="{page_content}"
)
document_variable_name = "context"
llm = OpenAI()
stuff_prompt_override = """Given this video transcript generate Video Chapters.
The chapters should be precise to each topic covered in the video. First list all the unique
things discussed in the video, then consise the summary of them. Then create the chapter names.
-----
{context}
-----
GENERATE THE CHAPTERS:
"""
prompt = PromptTemplate(
    template=stuff_prompt_override, input_variables=["context"]
)

# Instantiate the chain
llm_chain = LLMChain(llm=llm, prompt=prompt)

chain = StuffDocumentsChain(
    llm_chain=llm_chain,
    document_prompt=document_prompt,
    document_variable_name=document_variable_name,
)
chain.run(input_documents=texts)

InvalidRequestError: This model's maximum context length is 4097 tokens, however you requested 23552 tokens (23296 in your prompt; 256 for the completion). Please reduce your prompt; or completion length.

In [55]:
map_prompt_template = """
                      This particular text is a transcript of a video. The text contains the 
                      english transcript along with the timestamps. The documents would have both start time and
                      end time. Your task is to arrange the topics serially in such a way that chapters could be created. 
                      Also note the timestamps(start time:end time) for each chapters.
                      {text}
                      """

map_prompt = PromptTemplate(template=map_prompt_template, input_variables=["text"])

combine_prompt_template = """
                      You need to return the chapters precisely arranging them in order along with the timestamps.
                      Note, that the timestamps are in seconds, you need to arrange them in minutes. The timestamp format
                      would be HM:SS (H is Hour, M is Minutes and SS is seconds)
                      ```{text}```
                      BULLET POINT SUMMARY:
                      """

combine_prompt = PromptTemplate(
    template=combine_prompt_template, input_variables=["text"]
)

In [56]:


map_reduce_chain = load_summarize_chain(
    OpenAI(),
    chain_type="map_reduce",
    map_prompt=map_prompt,
    combine_prompt=combine_prompt,
    return_intermediate_steps=True,
)

In [60]:
output = map_reduce_chain({"input_documents": texts})

In [61]:
print(output["output_text"])


• Chapter 1: Introduction (0:00:00 - 0:37:68.14): Overview of the video and topics discussed. 
• Chapter 2: Dynamic Inheritance with Type Planter Class (36:64.86 - 36:67.06) 
• Chapter 3: Dynamic Inheritance with Type Meta Class (36:67.06 - 36:92.8) 
• Chapter 4: Creating a Tuple with a Single Element (36:94.72 - 37:19.03)
• Chapter 5: Understanding Python Tuples (37:19.03 - 37:20.55)
• Chapter 6: Syntactical Requirement (37:20.55 - 37:53.72)
• Chapter 7: Second Part (37:53.72 - 37:57.31)
• Chapter 8: Introduction (37:57.31 - 37:66.38)
• Chapter 9: Assignment of Custom Class Name (37:72.6 - 37:77.47)
• Chapter 10: Introduction to Experience Two (37:77.47 - 37:84.62)
• Chapter 11: Error Analysis (37:84.62 - 37:86.
