In [None]:
pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain

In [None]:

pip install -U langchain langchain-openai

In [None]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = 'api_key' #kicseréltem, hogy a githubon ne legyen benne a kulcs

In [None]:
os.environ['OPENAI_API_KEY'] = 'api_key'

In [None]:
pip install PyGithub

In [None]:
pip install faiss-cpu


In [None]:
pip install faiss-cpu

## Github repo feldolgozása és indexelés

In [9]:
import requests
from github import Github
from bs4 import BeautifulSoup
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
import faiss
import numpy as np

In [10]:
#GitHub Repoból kiszedi a szükséges dolgokat (kód, dokumentáció)
def clone_repo(github_token, repo_name):
    g = Github(github_token)
    repo = g.get_repo(repo_name)
    
    contents = repo.get_contents("")
    
    files = []
    while contents:
        file_content = contents.pop(0)
        if file_content.type == "dir":
            contents.extend(repo.get_contents(file_content.path))
        else:
            if file_content.name.endswith(('.md', '.py', '.txt', '.json', '.yml', '.c', '.h', '.cpp', '.hpp')): # Ezeket a fájlokat fogadja el egyenlőre
                file_data = {
                    "file_name": file_content.name,
                    "file_content": requests.get(file_content.download_url).text
                }
                files.append(file_data)
    
    return files

In [11]:
#A Issue-k kiszedése a repo-ból
def fetch_issues(github_token, repo_name):
    g = Github(github_token)
    repo = g.get_repo(repo_name)
    
    issues = []
    for issue in repo.get_issues(state='open'):
        issue_data = {
            "file_name": f"issue_{issue.number}",
            "file_content": f"Title: {issue.title}\nDescription: {issue.body}"
        }
        issues.append(issue_data)
    
    return issues

In [12]:
#A fájlok kisebb darabokra bontása
def preprocess_files(files):
    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
    documents = []
    
    for file in files:
        content = file['file_content']
        
        #Markdown fájlok esetén HTML kód eltávolítása
        if file['file_name'].endswith(('.md', '.html')):
            soup = BeautifulSoup(content, 'html.parser')
            content = soup.get_text()
        
        chunks = text_splitter.split_text(content)
        
        for chunk in chunks:
            documents.append(chunk)
    
    return documents

In [13]:
#Embeddingek generálása  OpenAI embeddings-el és FAISS indexeléssel
def create_embeddings(documents):
    embeddings = OpenAIEmbeddings()
    
    doc_embeddings = embeddings.embed_documents(documents)
    doc_embeddings_np = np.array(doc_embeddings)
    
    index = faiss.IndexFlatL2(doc_embeddings_np.shape[1])
    index.add(doc_embeddings_np)
    
    return index, doc_embeddings_np, documents

In [None]:
#Egy konkrét repora alkalmazva a fenti funkciókat

github_token = 'api_key'
repo_name = "pydantic/pydantic"
    
files = clone_repo(github_token, repo_name)
    
issues = fetch_issues(github_token, repo_name)
    
all_documents = files + issues

In [None]:
documents = preprocess_files(all_documents)
    
index, doc_embeddings_np, documents = create_embeddings(documents)

## Retrieval

In [16]:
#A kérdés (Query) feldolgozása és a legrelevánsabb dokumentumok visszaadása
def retrieve_relevant_document(query, index, documents, embeddings, k=5):
    query_embedding = embeddings.embed_query(query)
    query_embedding_np = np.array(query_embedding).reshape(1, -1)
    D, I = index.search(query_embedding_np, k)
    
    relevant_docs = [documents[i] for i in I[0]]
    return relevant_docs

In [17]:
#Példa kérdés
query = "Does this program support Python 3.14?"
relevant_docs = retrieve_relevant_document(query, index, documents, OpenAIEmbeddings())
    
print(f"5 válasz legrevelánsabb dokumentum:")
for doc in relevant_docs:
        print(f"- {doc}")

5 válasz legrevelánsabb dokumentum:
- Title: Add support for Python 3.14
Description: First 3.14 beta release is [planned on 2025-05-06](https://peps.python.org/pep-0745/#release-schedule) and PEP 649/749 is almost fully implemented.

Considering the significant changes it provides to the runtime evaluation of type hints, we should add support to 3.14 and report any bugs/issues.
- ### Python, Pydantic & OS Version

```Text
sqlalchemy 1.4
pydantic 2.7.1
python 3.11.5
```
- ```Python

```

### Python, Pydantic & OS Version

```Text
2.10
```
- ```Text
Python 3.10.9
pydantic 2.0a4
Windows 10
```
- As it is being proposed as an official standard for Python, other editors can also easily add support for it.

And authors of other libraries similar to Pydantic can also easily adopt the standard right away (using the "Alternate Form") and get the benefits of these additional editor features.


## Generation

In [18]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'), additional_kwargs={})])

In [19]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [20]:
chain = prompt | llm

In [21]:
chain.invoke({"context":relevant_docs,"question":"Does this program support Python 3.14?"})

AIMessage(content='Yes, the program supports Python 3.14 as it mentions adding support for Python 3.14 and reporting any bugs/issues related to it.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 287, 'total_tokens': 318, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-BJdCrULdfptHFuwh3PBOoiLFVYfmq', 'finish_reason': 'stop', 'logprobs': None}, id='run-1ad26a7a-5aae-4ad7-a8f0-d234536711b3-0', usage_metadata={'input_tokens': 287, 'output_tokens': 31, 'total_tokens': 318, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

## Kiértékelés RAGAs-sal

In [None]:
pip install ragas

In [None]:
from datasets import Dataset

questions = ["Does this program support Python 3.14?", 
             "What is the purpose of the REDIRECT_TO_V1 dictionary?",
             "What is the purpose of the is_root_model function",
            ]
ground_truths = ["Yes, this program supports Python 3.14.",
                "The REDIRECT_TO_V1 dictionary automatically redirects certain utility functions from the main Pydantic namespace to their v1 implementations, generating a warning message that informs users about the change while maintaining backward compatibility. This helps manage the transition between major versions by ensuring old code continues to work while encouraging updates to newer patterns.",
                "The is_root_model function determines whether a given TypeInfo represents a RootModel subclass or the RootModel class itself. It does this by checking if the type has a base class matching the ROOT_MODEL_FULLNAME constant, which is defined as 'pydantic.root_model.RootModel'."]
answers = []
contexts = []

for query in questions:
  context = retrieve_relevant_document(query, index, documents, OpenAIEmbeddings())
  answers.append(chain.invoke({"context": context,"question":query}).content)
  contexts.append(context)

data = {
    "question": questions,
    "answer": answers,
    "contexts": contexts,
    "reference": ground_truths
}

dataset = Dataset.from_dict(data)

  from .autonotebook import tqdm as notebook_tqdm


In [24]:
data

{'question': ['Does this program support Python 3.14?',
  'What is the purpose of the REDIRECT_TO_V1 dictionary?',
  'What is the purpose of the is_root_model function'],
 'answer': ['Yes, the program supports Python 3.14 as it mentions adding support for Python 3.14 and reporting any bugs/issues related to it.',
  "The purpose of the REDIRECT_TO_V1 dictionary is to map certain objects from the 'pydantic.utils' module to their equivalents in the 'pydantic.v1.utils' module.",
  'The purpose of the is_root_model function is to determine whether the type info is a root model subclass (or the `RootModel` class itself).'],
 'contexts': [['Title: Add support for Python 3.14\nDescription: First 3.14 beta release is [planned on 2025-05-06](https://peps.python.org/pep-0745/#release-schedule) and PEP 649/749 is almost fully implemented.\n\nConsidering the significant changes it provides to the runtime evaluation of type hints, we should add support to 3.14 and report any bugs/issues.',
   '### P

In [25]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
)

result = evaluate(
    dataset = dataset, 
    metrics=[
        context_precision,
        context_recall,
        faithfulness,
        answer_relevancy,
    ],
)

df = result.to_pandas()

Evaluating: 100%|██████████| 12/12 [00:10<00:00,  1.12it/s]


In [26]:
df

Unnamed: 0,user_input,retrieved_contexts,response,reference,context_precision,context_recall,faithfulness,answer_relevancy
0,Does this program support Python 3.14?,[Title: Add support for Python 3.14\nDescripti...,"Yes, the program supports Python 3.14 as it me...","Yes, this program supports Python 3.14.",1.0,0.0,0.666667,0.989978
1,What is the purpose of the REDIRECT_TO_V1 dict...,[REDIRECT_TO_V1 = {\n f'pydantic.utils:{obj...,The purpose of the REDIRECT_TO_V1 dictionary i...,The REDIRECT_TO_V1 dictionary automatically re...,1.0,0.0,1.0,1.0
2,What is the purpose of the is_root_model function,"[::: pydantic.root_model, Note that for [root ...",The purpose of the is_root_model function is t...,The is_root_model function determines whether ...,0.25,1.0,1.0,0.987499
