In [1]:
import configparser, os, re
config = configparser.ConfigParser()
config.read('./keys.ini')
os.environ['GOOGLE_API_KEY'] = config['GOOGLE']['GOOGLE_API_KEY']
os.environ['GOOGLE_CSE_ID'] = config['GOOGLE']['GOOGLE_CSE_ID']
openai_api_key = config['OPENAI']['OPENAI_API_KEY']
os.environ['OPENAI_API_KEY'] = openai_api_key

In [2]:
from typing import Any, List, Mapping, Optional
from langchain.llms.base import LLM
from langchain.callbacks.manager import CallbackManagerForLLMRun
import requests

class CustomLLM2(LLM):
    @property
    def _llm_type(self) -> str:
        return "custom"

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        if isinstance(stop, list):
            stop = stop + ["\n###","\nObservation:","\n问题","\nQuestion:"]
        HOST = 'localhost:5000'
        URI = f'http://{HOST}/v1/chat/completions'

        response = requests.post(
            URI,
            json={
                "messages": [
                {
                    "role": "user",
                    "content": prompt
                  }
                ],
                "mode": "instruct",
                "instruction_template": "Alpaca",
            },
        )
        response.raise_for_status()
        return response.json()['choices'][0]['message']['content']
  
    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {}

#use one 1 chain to summary the story dialogue from main page content of the URL
def story_summary_stuff(docs, prompt_template = ""):
    #input: docs of the web page
    from langchain.chains.llm import LLMChain
    from langchain.prompts import PromptTemplate
    from langchain.chains.combine_documents.stuff import StuffDocumentsChain
    # Define prompt
    if prompt_template == "":
        prompt_template = """The text below is the subtitle of a video about US stock market. Summarize it. 
        List the major sections of the video with time stamp.
        Example output:
        Overall summary of the video
        Time Stamp1: Summary1
        Time Stamp2: Summary2
        ...
        "{subtitle}"
        Output:"""
    prompt = PromptTemplate.from_template(prompt_template)

    # Define LLM chain
    #llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
    llm_chain = LLMChain(llm=llm, prompt=prompt)

    # Define StuffDocumentsChain
    stuff_chain = StuffDocumentsChain(
        llm_chain=llm_chain, document_variable_name="subtitle"
    )
    
    return stuff_chain.run(docs)

#youtube loader and return docs for the transcript with time stamps
def loader(link:str, language=["zh"], db_loc = "./cache/YTDBT", overwrite = False):
    #load video info from link
    from langchain.document_loaders import YoutubeLoader
    try:
        loader = YoutubeLoader.from_youtube_url(
            link, add_video_info=True, language=language
        )
        docs = loader.load()
    except Exception as e:
        print(e)
        return(None);
        
    #check whether the vid is already in the DB
    this_id = docs[0].metadata['source']
    from langchain.vectorstores import Chroma
    from langchain.embeddings import OpenAIEmbeddings
    db = Chroma(persist_directory=db_loc, embedding_function=OpenAIEmbeddings())
    tmp = db.get()['ids']
    #this_db_list = [x.split("_")[0] for x in tmp]
    #this_db_set = set(this_db_list)
    this_db_set = set(tmp)
    if this_id in this_db_set:
        if overwrite == False:
            return(link+" link already in the db, skip");
    print("adding link "+link);
            
    #format the transcript into SRT
    from youtube_transcript_api import YouTubeTranscriptApi
    from youtube_transcript_api.formatters import SRTFormatter
    transcript = YouTubeTranscriptApi.get_transcript(docs[0].metadata['source'],languages=language)
    formatter = SRTFormatter()
    srt_formatted = formatter.format_transcript(transcript)
    with open("./cache/Output.srt", "w", encoding="utf-8") as text_file:
        text_file.write(srt_formatted)
    
    #summarize the SRT with time stamps
    from langchain.chat_models import ChatOpenAI
    #llm = ChatOpenAI(temperature=0, model_name="gpt-4-1106-preview")
    from langchain.docstore.document import Document
    output_srt = Document(page_content="Title: "+docs[0].metadata['title']+" \nSRT: "+srt_formatted, metadata=docs[0].metadata);
    res = story_summary_stuff([output_srt])
    
    #write the summary into DB
    from langchain.embeddings import OpenAIEmbeddings
    from langchain.docstore.document import Document
    output_doc = Document(page_content=res, metadata=docs[0].metadata);
    db.add_documents([output_doc], ids = [docs[0].metadata['source']])
    return(output_doc)


In [3]:
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
#llm = CustomLLM2()
res = loader("https://www.youtube.com/watch?v=8Y6k3z-c3oc&list=PLs8TZNEg0Ubi06d-yWj88kC8Wk5zsMANU&index=3&ab_channel=TwilledWave",language=["en","zh-Hant"], db_loc = "./cache/test", overwrite = True)
print(res.page_content)

adding link https://www.youtube.com/watch?v=8Y6k3z-c3oc&list=PLs8TZNEg0Ubi06d-yWj88kC8Wk5zsMANU&index=3&ab_channel=TwilledWave
**Overall summary of the video:**
The video appears to be a dramatic narrative involving a doctor who visits a remote village to provide medical assistance. The villagers, suffering from various ailments and hardships, view the doctor as a savior. The doctor, however, struggles with the villagers' expectations and the harsh realities of their situation. The story explores themes of hope, disillusionment, and the limitations of medical intervention in the face of systemic issues.

**Major sections of the video with time stamps:**

**00:00:00 - 00:00:02:** Introduction and gratitude expressed to the doctor for saving a life.

**00:00:02 - 00:00:05:** The doctor is advised to keep quiet and let a child rest to avoid recurring illness.

**00:00:05 - 00:00:08:** Discussion about payment for the doctor's services, which the doctor refuses.

**00:00:08 - 00:00:10:** T

In [17]:
from pytube import Playlist
p = Playlist('https://www.youtube.com/playlist?list=PLweTl9OQEsJhOM8W6ZAigP7eJxBU_yQM6')
for url in p.video_urls:
    res = loader(url,["en"])
    print(res.metadata['title'])

adding link https://www.youtube.com/watch?v=-iHseGn2LhQ


HTTPError: 500 Server Error: Internal Server Error for url: http://localhost:5000/v1/chat/completions

In [16]:
p.video_urls

['https://www.youtube.com/watch?v=zCf87ABhqpU', 'https://www.youtube.com/watch?v=-iHseGn2LhQ', 'https://www.youtube.com/watch?v=Oh3QdmodNK8']

In [4]:
#llm = CustomLLM2()
db_loc = "./cache/stock"
Channels = ["https://www.youtube.com/watch?v=0p4EGTMBGoM&list=UULFH-_Z7YSk4QTFmXbYSSBB4w&ab_channel=MikeSwartz",
            "https://www.youtube.com/watch?v=PY2dh6RdfIU&list=UULFoMzWLaPjDJBbipihD694pQ&ab_channel=StockMoe",
            "https://www.youtube.com/watch?v=hE-k_SUKGCs&list=UULFFCEuCsyWP0YkP3CZ3Mr01Q&ab_channel=ThePlainBagel",
            "https://www.youtube.com/watch?v=t1Ic3f8q-o4&list=UULFFhJ8ZFg9W4kLwFTBBNIjOw&ab_channel=NaNa%E8%AF%B4%E7%BE%8E%E8%82%A1",
            "https://www.youtube.com/watch?v=tLLzdsLTDPs&list=UULF7kCeZ53sli_9XwuQeFxLqw&ab_channel=TickerSymbol%3AYOU",
            "https://www.youtube.com/watch?v=7BnzPu6LbMY&list=UULFyqlbzLoYtpqDXwRI9Yh5LA&ab_channel=BWB-BusinessWithBrian",
           ]

from pytube import Playlist
text = ""
for c in Channels: 
    p = Playlist(c)
    url = p.video_urls[0]
    res = loader(url,language=["en","zh-Hant"],db_loc = "./cache/stock")
    if (type(res)==str) | (res == None):
        print(res)
    else:
        print(res.metadata['title'])
        print(res.page_content)
        text = text +"\n" + res.page_content


adding link https://www.youtube.com/watch?v=bjBs9QDz_XI
My AlgoBox Indicators And Settings
### Overall summary of the video
The video provides a detailed overview of the speaker's AlgoBox indicators and settings used for trading in the US stock market. It covers various charts, indicators, and settings, explaining how they are configured and utilized to optimize trading performance.

### Major sections of the video with time stamps

**00:00:00 - 00:00:01**: Introduction to the video and common questions from traders.

**00:00:01 - 00:00:04**: Overview of the AlgoBox settings and charts used.

**00:00:04 - 00:00:06**: Explanation of the 89 range chart and its significance.

**00:00:06 - 00:00:08**: Introduction to the data series and loading bars.

**00:00:08 - 00:00:11**: Discussion on keeping the machine light for better performance.

**00:00:11 - 00:00:13**: Indicators used on the 89 range chart.

**00:00:13 - 00:00:15**: Details on the golden ropes indicator.

**00:00:15 - 00:00:18*

KeyboardInterrupt: 