In [None]:
!pip install bitsandbytes accelerate langchain arxiv fitz sentence_transformers faiss-gpu pymongo

In [1]:
import torch
import time
from io import BytesIO
from langchain.prompts import PromptTemplate # for custom prompt specification
from langchain.text_splitter import RecursiveCharacterTextSplitter # splitter for chunks
from langchain.embeddings import HuggingFaceEmbeddings # embeddings
from langchain.vectorstores import FAISS # vector store database
from langchain.chains import RetrievalQA # qa and retriever chain
from langchain.memory import ConversationBufferMemory # for model's memory on past conversations
from langchain.llms.huggingface_pipeline import HuggingFacePipeline # pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
from langchain.docstore.document import Document

In [2]:
# transformer model configuration
# this massively model's precision for memory efficiency
# The model's accuracy is reduced.
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_id = "bigcode/starcoder2-3b" # model repo id
device = 'cuda'

#
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id,
                                             trust_remote_code=True,
                                             device_map = "auto",
                                          quantization_config=quant_config)

# create a pipeline
pipe = pipeline("text-generation",
                model=model,
                tokenizer=tokenizer,
                temperature=1e-3,
                return_full_text = False,
                max_new_tokens=2048)

llm = HuggingFacePipeline(pipeline=pipe)

import locale
locale.getpreferredencoding = lambda: "UTF-8"

model-00002-of-00003.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
embeddings_model_id = "BAAI/bge-large-en-v1.5"

embeddings_model = HuggingFaceEmbeddings(model_name=embeddings_model_id,model_kwargs = {"device": "cuda"})

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/779 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

In [4]:
import zipfile
import os
from pymongo import MongoClient
from gridfs import GridFS
from bson.objectid import ObjectId

client = MongoClient("mongodb+srv://21bce095:Abcdxyz@cluster0.jlybbr4.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
db = client["db_test"]
fs = GridFS(db)

def unzip_file(zip_file_path):
    !mkdir "extracted_zip"
    extract_to_folder = 'extracted_zip'
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to_folder)

def read_files_in_directory(directory):
    file_contents = []
    for root, _, files in os.walk(directory):
        for filename in files:
            filepath = os.path.join(root, filename)
            if os.path.isfile(filepath):
                try:
                    with open(filepath, 'r', encoding='utf-8') as file:
                      doc = Document(page_content=file.read())
                      file_contents.append(doc)
                except:
                  print(f"unable to decode {filepath}")
                  pass
    return file_contents

In [22]:
def generate(files_content,query):
  # text_splitter = RecursiveCharacterTextSplitter(chunk_size=49152, chunk_overlap=16)
  # splits = text_splitter.split_documents(files_content)
  # print(f"We have {len(splits)} chunks in memory")

  # create vector db for similarity search
  vectorstore_db = FAISS.from_documents(files_content, embeddings_model)

  # performs a similarity check and returns the top K (K=8) embeddings
  # that are similar to the question's embeddings
  retriever = vectorstore_db.as_retriever(search_type="mmr", search_kwargs={"k": 5})
  custom_prompt_template = """
  ### System:
  You are an AI assistant that follows instructions extremely well. Help as much as you can and do not repeat yourself.do not generate repetitive content.
  ### User:
  You are a Senior developer with multi language coding skills and follow the coding patterns of the existing code base. Use only the following information to answer user queries:
  Context= {context}
  History = {history}
  Question= {question}
  ### Assistant:
  """

  prompt = PromptTemplate(template=custom_prompt_template,
                          input_variables=["question", "context", "history"])
  memory = ConversationBufferMemory(input_key="question",
                                   memory_key="history",
                                   return_messages=True)

  qa_chain = RetrievalQA.from_chain_type(
                        llm=llm, chain_type='stuff',
                        retriever = vectorstore_db.as_retriever(),
                        return_source_documents = True,
                        chain_type_kwargs = {"verbose": False,
                                            "prompt": prompt,
                                            "memory": memory
                                            })
  # query = "make docker compose file without mysql service"
  response = qa_chain({"query": query})
  # print(response['result'])
  with open('response.txt','w') as f:
    f.write(response['result'])
  return response['result']
# !mkdir downloads

In [None]:
while True:
  cursor = fs.find({"flag": "False"})
  for file in cursor:
      !mkdir downloads
      # Save files to downloads folder
      with open(os.path.join("downloads", file.filename), "wb") as f:
          f.write(fs.get(file._id).read())

      unzip_file("downloads/"+file.filename)
      query = file.prompt
      files_content = read_files_in_directory("extracted_zip")
      response = generate(files_content,query)

      # Update document in GridFS collection
      query = {"_id": ObjectId(file._id)}
      update_query = {
          "$set": {
              "flag": "True",
              "response": response
          }
      }
      db.fs.files.update_one(query, update_query)
      print(response)
      !rm -rf downloads
  # break

In [27]:
!mkdir downloads