<a href="https://colab.research.google.com/github/davinfalahtama/AI-Dikti/blob/main/playground.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install optimum
!pip install accelerate
!pip install auto-gptq
!pip install PyPDF2
!pip install langchain
!pip install langchain_google_genai
!pip install faiss-gpu

In [1]:
import requests
import torch
from transformers import AutoModelForCausalLM, AutoConfig,AutoTokenizer
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.runnables import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
import transformers

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
model_name= "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"

config = AutoConfig.from_pretrained(model_name)
config.quantization_config["disable_exllama"] = True

model = AutoModelForCausalLM.from_pretrained(model_name,
                                             config=config,
                                             revision="main").to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

text_generation_pipeline = transformers.pipeline(
            model=model,
            tokenizer=tokenizer,
            task="text-generation",
            temperature=0.2,
            repetition_penalty=1.1,
            return_full_text=True,
            max_new_tokens=300,
)
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


In [None]:
prompt = "Tell me about AI"
prompt_template=f'''<s>[INST] {prompt} [/INST]
'''

print("*** Pipeline:")
pipe = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)

print(pipe(prompt_template)[0]['generated_text'])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


*** Pipeline:


In [6]:
def extract_text_from_pdf(file):
      pdf_text = ""
      pdf_reader = PdfReader(file)
      for page in pdf_reader.pages:
          pdf_text += page.extract_text()
      return pdf_text

file = extract_text_from_pdf("MTA023401.pdf")

In [7]:
def split_text_into_chunks(text):
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
        return text_splitter.split_text(text)

splited_text = split_text_into_chunks(file)

In [8]:
def create_vector_store(text_chunks):
      embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key="AIzaSyBYgcagyUPWzHFRyTZO3o8r85oZqmC25E8")
      vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
      return vector_store

vector_store = create_vector_store(splited_text)

In [10]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def get_conversational_chain(llm,vector_chain):
    prompt_template = """
        ### [INST]
        Instruction: Answer the question based on your knowledge. Here is context to help:

        {context}

        ### QUESTION:
        {question}

        [/INST]
    """
    retriever = vector_chain.as_retriever(search_type="similarity", search_kwargs={"k": 6})


    custom_rag_prompt = PromptTemplate.from_template(prompt_template)

    rag_chain = (
      {"context": retriever | format_docs, "question": RunnablePassthrough()}
      | custom_rag_prompt
      | llm
      | StrOutputParser()
    )

    return rag_chain


rag_chain =  get_conversational_chain(mistral_llm,vector_store)

In [None]:
rag_chain.invoke("What is Task Decomposition?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
