In [81]:
import shutil
import requests
from urllib.parse import urlparse
import sys
from tqdm.notebook import tqdm
import pandas as pd
from typing import Optional, List, Tuple
from langchain_core.language_models import BaseChatModel
import json
import datasets
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document as LangchainDocument
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.llms import HuggingFaceHub
from langchain_community.chat_models import ChatHuggingFace
import os
import random
import time
from datasets import Dataset, DatasetDict
import gradio as gr

In [82]:
loader = PyPDFLoader("finance1.pdf")

In [83]:
text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=512,  
        chunk_overlap=30,
        add_start_index=True,
        separators=["\n\n", "\n", ".", " ", ""],
    )
try:
    langchain_docs = loader.load_and_split(text_splitter=text_splitter) #loads and slits
    #docs = loader.load()
    #langchain_docs = text_splitter.split_documents(docs)
except Exception as e:
    print("An error occurred:", e)


In [84]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

db = FAISS.from_documents(langchain_docs,
                          HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5'))

In [85]:
from langchain_core.runnables import RunnablePassthrough

In [86]:
##tailor this for your own purpose
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel, PeftConfig

model_name = 'Ubaidbhat/Finance'
config = PeftConfig.from_pretrained(model_name)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
d_map = {"": torch.cuda.current_device()} if torch.cuda.is_available() else None

model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config, device_map=d_map)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(model, model_name)
# model = model.merge_and_unload()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [96]:
prompt_template = """
  Below is an instruction that describes a task. Write a response that appropriately completes the request.
  ### Instruction:
  {query}
  
  ### Answer:
  """

prompt_template_with_context = """
  Below is an instruction that describes a task, paired with an input that provides further context. Write a response that 
  appropriately completes the request.
  ### Instruction:
  {query}
  ### Input:
  {input}
  
  ### Answer:
  """



def extract_answer(message):
    # Find the index of '### Answer:'
    start_index = message.find('### Answer:')
    if start_index != -1:
        # Extract the part of the message after '### Answer:'
        answer_part = message[start_index + len('### Answer:'):].strip()
        # Find the index of the last full stop
        last_full_stop_index = answer_part.rfind('.')
        if last_full_stop_index != -1:
            # Remove the part after the last full stop
            answer_part = answer_part[:last_full_stop_index + 1]
        return answer_part.strip()  # Remove leading and trailing whitespace
    else:
        return "I don't have the answer to this question....."


#Inference Function............
def inferance(prompt: str, model, tokenizer, temp = 1.0, limit = 400, input = False) -> str:
  device = "cuda:0"
  encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
  model_inputs = encodeds.to(device)
  generated_ids = model.generate(**model_inputs, max_new_tokens=limit, do_sample=True, pad_token_id=tokenizer.eos_token_id)
  decoded = tokenizer.batch_decode(generated_ids)
    
  answer  = extract_answer(decoded[0])
    
  return answer


In [100]:
def predict(type, text):
    print(type)
    if type == "Base":
        prompt = prompt_template.format(query=text)
        answer = inferance(prompt, model, tokenizer, temp = 1.0, limit = 400, input = False)
        return answer
    else:
        relevantText = db.as_retriever().get_relevant_documents(text)
        context = relevantText[0].page_content
        prompt = prompt_template_with_context.format(query=text, input = context)
        answer = inferance(prompt, model, tokenizer, temp = 1.0, limit = 400, input = False)
        return answer

pred = gr.Interface(
    fn = predict,
    inputs=[
        
        gr.Radio(['Base', 'Context'], label="Select One"),
        gr.Textbox(
            label="Question",
            lines= 2,
            value="Tell me Anything about Finance...",
        ),
    ],
    outputs='text',
    title="Finance"
)

pred.launch(share=True)

Running on local URL:  http://127.0.0.1:7876
Running on public URL: https://0c243a494bc322dc0f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


