In [1]:
# Gerekli kütüphaneleri yükleme
!pip install beyondllm youtube_transcript_api llama-index-readers-youtube-transcript llama_index.embeddings.huggingface

Collecting openai==1.20.0 (from beyondllm)
  Using cached openai-1.20.0-py3-none-any.whl.metadata (21 kB)
Collecting google-generativeai<0.5.0,>=0.4.1 (from llama-index-embeddings-gemini==0.1.6->beyondllm)
  Using cached google_generativeai-0.4.1-py3-none-any.whl.metadata (6.2 kB)
Collecting google-ai-generativelanguage==0.4.0 (from google-generativeai<0.5.0,>=0.4.1->llama-index-embeddings-gemini==0.1.6->beyondllm)
  Using cached google_ai_generativelanguage-0.4.0-py3-none-any.whl.metadata (5.1 kB)
Using cached openai-1.20.0-py3-none-any.whl (292 kB)
Using cached google_generativeai-0.4.1-py3-none-any.whl (137 kB)
Using cached google_ai_generativelanguage-0.4.0-py3-none-any.whl (598 kB)
Installing collected packages: openai, google-ai-generativelanguage, google-generativeai
  Attempting uninstall: google-ai-generativelanguage
    Found existing installation: google-ai-generativelanguage 0.6.6
    Uninstalling google-ai-generativelanguage-0.6.6:
      Successfully uninstalled google-ai-

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
crewai 0.51.1 requires pydantic<3.0.0,>=2.4.2, but you have pydantic 1.10.17 which is incompatible.
crewai 0.51.1 requires regex<2024.0.0,>=2023.12.25, but you have regex 2024.4.16 which is incompatible.
crewai-tools 0.8.3 requires pydantic<3.0.0,>=2.6.1, but you have pydantic 1.10.17 which is incompatible.
embedchain 0.1.120 requires tiktoken<0.8.0,>=0.7.0, but you have tiktoken 0.6.0 which is incompatible.
instructor 1.3.3 requires pydantic<3.0.0,>=2.7.0, but you have pydantic 1.10.17 which is incompatible.
langchain-google-genai 1.0.8 requires google-generativeai<0.8.0,>=0.7.0, but you have google-generativeai 0.4.1 which is incompatible.
langchain-openai 0.1.21 requires openai<2.0.0,>=1.40.0, but you have openai 1.20.0 which is incompatible.
langchain-openai 0.1.21 requires tiktoken<1,>=0.7, but you have tikto

In [23]:
# Python kodu: API Anahtarlarını al ve ortam değişkenlerine ata
from getpass import getpass
import os

# Hugging Face ve Google API anahtarlarını güvenli bir şekilde alma ve ayarlama
hf_token = getpass('Enter Your HuggingfaceHub Token')
google_api_key = getpass('Enter Your Google API Key')

# Çevresel değişkenler olarak ayarlama
os.environ['HF_TOKEN'] = hf_token
os.environ['GOOGLE_API_KEY'] = google_api_key

In [24]:
from beyondllm import source, embeddings, retrieve, llms, generator

# YouTube videosundan veriyi yükleme
data = source.fit(
    path="https://www.youtube.com/watch?v=ZM1bdh2mDJQ",  # Video linki
    dtype="youtube",
    chunk_size=1024,  # Veriyi parçalara ayırma boyutu
    chunk_overlap=0   # Parçalar arasında üst üste binme miktarı
)


['https://www.youtube.com/watch?v=ZM1bdh2mDJQ']


In [25]:
# Veriyi vektörlere dönüştürmek için embedding modeli seçme
model_name = 'BAAI/bge-small-en-v1.5'  # Hugging Face üzerinde bulunan bir model
embed_model = embeddings.HuggingFaceEmbeddings(
    model_name=model_name
)

In [26]:
# Gelişmiş retriever'ı ayarlama
retriever = retrieve.auto_retriever(
    data=data,
    embed_model=embed_model,
    type="cross-rerank",  # Gelişmiş retriever türü
    mode="OR",            # 'OR' modunda çalışacak
    top_k=2               # En iyi iki eşleşmeyi getir
)


In [27]:
# Örnek bir sorgu ile veri getirme ve sonuçları gösterme
query = "Which tool is mentioned in the video?"
retrieved_nodes = retriever.retrieve(query)
print("Sorgu sonucu bulunan veriler:", retrieved_nodes)

Sorgu sonucu bulunan veriler: [NodeWithScore(node=TextNode(id_='36748b5c-0bf2-448d-b0d1-5900691da625', embedding=None, metadata={'video_id': 'ZM1bdh2mDJQ'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='ZM1bdh2mDJQ', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'video_id': 'ZM1bdh2mDJQ'}, hash='615e7d33180855d5371f6b37c8848fe6cb5ae545e1ba29af48aae860c75a6437')}, text="hi everyone welcome to the part seven\nvideo of building llm applications using\ngen stack in this video we are going to\nbuild a CSV agent using this tack so the\nCSV file will be the data source for the\nentire R pipeline so if you do not know\nwhat rag is then in short it's retrieval\naugmented generation it streamlines the\nprocess of retrieving data from a data\nsource then pre-processing it and\nstoring it in a certain way in a vector\nstore and then retri it when a query is\npassed and then presenting it with the\nhelp of a

In [28]:
# Hugging Face'den bir dil modeli ayarlama
llm = llms.HuggingFaceHubModel(
    model="mistralai/Mistral-7B-Instruct-v0.2",  # Hugging Face üzerinden bir LLM
    token=os.environ.get('HF_TOKEN')            # Hugging Face API anahtarını kullanarak
)

In [29]:
# System prompt ekleme
system_prompt = f"""
<s>[INST]
You are an AI Assistant.
Please provide direct answers to questions.
[/INST]
</s>
"""

In [31]:
# Sorgu oluşturma ve pipeline'dan yanıt alma
pipeline = generator.Generate(
    question=query,            # Sorgu olarak "query" değişkeni kullanılıyor
    retriever=retriever,
    system_prompt=system_prompt,  # System prompt'u ekledik
    llm=llm
)

In [32]:
# Yanıtı al ve göster
response = pipeline.call()
print("Model yanıtı:", response)

Model yanıtı: 
        ANSWER: The tools mentioned in the video are Titanic dataset (for reference), RAG (Retrieval Augmented Generation), Tinybird, Hugging Face Inference API, Azure Chat Openi, Chroma Vector Store, and Combine Docs Chain.


In [33]:
# RAG Triad değerlendirme metriklerini al ve göster
rag_evals = pipeline.get_rag_triad_evals()
print("RAG Triad Değerlendirmesi:", rag_evals)

Executing RAG Triad Evaluations...
RAG Triad Değerlendirmesi: Context relevancy Score: 10.0
This response meets the evaluation threshold. It demonstrates strong comprehension and coherence.
Answer relevancy Score: 10.0
This response meets the evaluation threshold. It demonstrates strong comprehension and coherence.
Groundness score: 7.0
This response does not meet the evaluation threshold. Consider refining the structure and content for better clarity and effectiveness.
