In [1]:
import os

import streamlit as st
from dotenv import load_dotenv

In [2]:
# !pip install llama-cpp-python

In [3]:
# !pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir

In [4]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms import LlamaCpp

In [5]:
my_model_path = r"D:\_programlama\ML\LangChainTutorial_1\models\chat\llama-2-7b-chat.Q3_K_M.gguf"

# Load and split document

In [6]:
from langchain.document_loaders import TextLoader

from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

In [7]:
token_size = 250
vector_store_persist_directory = f"./vector-store/asu_ai_db_tr_{token_size}_llama_en"
original_document_path = "./data/yeni_sss.txt"

## Embedding function

In [8]:
from langchain_community.embeddings import LlamaCppEmbeddings

In [9]:
embedding_function = LlamaCppEmbeddings(model_path=my_model_path)

AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [10]:
if os.path.exists(vector_store_persist_directory):
    # Load documents
    loader = TextLoader(original_document_path, encoding="UTF-8")
    documents = loader.load()
    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=token_size)

    # Split and persist documents
    docs = text_splitter.split_documents(documents)
    db = Chroma.from_documents(
        docs, embedding_function, persist_directory=vector_store_persist_directory
    )
    db.persist()
else:
    db = Chroma(persist_directory=vector_store_persist_directory, embedding_function=embedding_function)

## Callbacks support token-wise streaming

In [11]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

## LLM

In [12]:
from langchain.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate, ChatPromptTemplate

In [13]:
n_gpu_layers = 40  # Change this value based on your model and your GPU VRAM pool.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path=my_model_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    verbose=True,  # Verbose is required to pass to the callback manager
)

AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [14]:

from langchain.retrievers.multi_query import MultiQueryRetriever

In [15]:
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=db.as_retriever(),
    llm=llm,
)


from langchain.prompts import PromptTemplate

def answer_my_question(question):
    unique_docs = retriever_from_llm.get_relevant_documents(query=question)

    template = """{question} sorusuna aşağıda verilen bağlamdaki bilgilere ugun şekilde cevap ver.\n{baglam}.
    Cevapta sadece sorunun en kısa cevabını döndür. Soru yeterince açık değilse soru alternatifleri vererek tekrar sorulmasını iste."""
    prompt = PromptTemplate(template=template, input_variables=["question","baglam"])

   
    llm_chain = LLMChain(prompt=prompt, llm=llm)
    llm_chain.run(question=question, baglam=unique_docs)

In [16]:
answer_my_question("Which master's programs are free?")


Version 1: What master's programs have zero cost and are available in real-time?
Version 2: Can you provide a list of fully funded master's degrees that I can access immediately?
Version 3: How do I find graduate programs with no tuition fees and immediate availability?

Llama.generate: prefix-match hit



Which master's programs are free?

The following master's programs are free:

1. Master of Arts in Teaching (MAT) - This program is offered by the University of Georgia and is designed for students who want to become teachers in grades 4-12. The program is fully online and does not charge tuition.
2. Master of Science in Environmental Science (MSES) - This program is offered by the University of Denver and focuses on environmental science, policy, and management. The program is completely free, thanks to a grant from the National Science Foundation.
3. Master of Public Health (MPH) - Many universities offer MPH programs that are free or low-cost, including the University of California, Berkeley, and the University of Michigan. These programs are designed for students who want to pursue careers in public health, epidemiology, and health policy.
4. Master of Business Administration (MBA) - Some universities offer MBA programs that are free or low-cost, including the Massachusetts Instit

In [17]:
# # Callbacks support token-wise streaming
# callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [18]:
# template = """Question: {question}

# Answer: Let's work this out in a step by step way to be sure we have the right answer."""

# prompt = PromptTemplate(template=template, input_variables=["question"])

In [19]:
# llm_chain = LLMChain(prompt=prompt, llm=llm)
# question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
# llm_chain.run(question)