In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

YOUTUBE_VIDEO = "https://www.youtube.com/watch?v=BblV6AQsd2s&ab_channel=RiseAgainstVEVO"

### Setting up the model
Define the LLM model to use

In [2]:
from langchain_openai.chat_models import ChatOpenAI

model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo")

model.invoke("Are we in a simulation?")

AIMessage(content='There is currently no scientific evidence to definitively prove or disprove that we are living in a simulation. The idea of a simulated reality is a philosophical concept that has been explored in various theories, but it remains a topic of debate and speculation. Ultimately, whether or not we are in a simulation is a question that may never be fully answered.', response_metadata={'finish_reason': 'stop', 'logprobs': None})

### Using the magic of Chains :)

In [3]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

# Chain example
chain = model | parser

chain.invoke("Are we in a simulation?")

'There is currently no scientific evidence to definitively prove or disprove the idea that we are living in a simulation. Some researchers and philosophers have proposed the concept of a simulated reality, but it remains a speculative and philosophical question. Ultimately, it is up to individual belief and interpretation.'

### Using prompt templates

In [4]:
from langchain_core.prompts import ChatPromptTemplate

template = """
  Answer the question based on the context below. If you can't
  answer the question, just say "I don't know".

  Context: {context}

  Question: {question}
  """

prompt = ChatPromptTemplate.from_template(template)
print(prompt.format(context="The sky is blue.", question="What color is the sky?"))

print("Did it work?")

chain = prompt | model | parser
chain.invoke({
  "context": "I have an appointment tomorrow at 3pm.", 
  "question": "Can I go for lunch tomorrow at 3:00PM?"
})

Human: 
  Answer the question based on the context below. If you can't
  answer the question, just say "I don't know".

  Context: The sky is blue.

  Question: What color is the sky?
  
Did it work?


'No, you have an appointment at 3pm tomorrow so you cannot go for lunch at that time.'

### Combining Chains

In [5]:
from operator import itemgetter

translation_template = """
  Translate the 

  answer: {answer}

  to

  language: {language}
  """

translation_prompt = ChatPromptTemplate.from_template(translation_template)

translation_chain = (
  {"answer": chain, "language": itemgetter("language") } | translation_prompt | model | parser
)

translation_chain.invoke({
  "context": "I have an appointment tomorrow at 3pm.", 
  "question": "Can I go for lunch tomorrow at 3:00PM?",
  "language": "es"
})


'Respuesta: No, tienes una cita a las 3pm mañana.'

### Transcribing the YouTube Video

In [6]:
import tempfile
import whisper
from pytube import YouTube

if not os.path.exists("transcription.txt"):
  youtube = YouTube(YOUTUBE_VIDEO)
  audio = youtube.streams.filter(only_audio=True).first()

  whisper_model = whisper.load_model("base")

  script_dir = os.path.expanduser('~/Documents/tmp')

  with tempfile.TemporaryDirectory() as temp_dir:
    print(f"** Downloading {audio.title} to {temp_dir}")
    # Download the audio file with this name transcription_audio.mp4
    file = audio.download(temp_dir, filename="transcription_audio.mp4")
    
    # Print the file path
    print(file)
    # Validate if the file exists
    print(os.path.exists(file))
    # Print the file name
    print(os.path.basename(file))

    transcription = whisper_model.transcribe(file, fp16=False)["text"].strip()
    
    with open("transcription.txt", "w") as f:
     f.write(transcription)

    transcription

### Splitting into documents

In [7]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("transcription.txt")
text_documents = loader.load()
text_documents


[Document(page_content="This video is only under Stunde Oh Am I loud and clear I'm not breaking up, am I still your charm? I'm not just that luck, are we getting closer? Are we just getting more lost? I'll show you mine if you show me yours first Let's compare scars, I'll tell you who's It is worse, let's unlight these pages and replace them with the wrong ones We live on front porches and swim like a boy We get by just fine here, I've been in the way The flow is a labor, I'll sleep till the end I'll cross these streets until you hold my hand I've been here so long, I think that it's time to move The winter's so cold, summer's over too soon Let's pack our bags and settle down where palm trees grow I've got some friends, some that I hardly know But we've had some times, I wouldn't trade for the world We chase these days down with talks of the places that we will go We live on front porches and swim like a boy We get by just fine here, I've been in the way The flow is a labor, I'll sleep

### Setting up the splitter

In [10]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
text_splitter.split_documents(text_documents)[:5]

[Document(page_content="This video is only under Stunde Oh Am I loud and clear I'm not breaking up, am I still your charm?", metadata={'source': 'transcription.txt'}),
 Document(page_content="I still your charm? I'm not just that luck, are we getting closer? Are we just getting more lost?", metadata={'source': 'transcription.txt'}),
 Document(page_content="getting more lost? I'll show you mine if you show me yours first Let's compare scars, I'll tell you", metadata={'source': 'transcription.txt'}),
 Document(page_content="I'll tell you who's It is worse, let's unlight these pages and replace them with the wrong ones We", metadata={'source': 'transcription.txt'}),
 Document(page_content="the wrong ones We live on front porches and swim like a boy We get by just fine here, I've been in", metadata={'source': 'transcription.txt'})]