In [None]:
import json
with open(f"database/technical_engineering_vector_store.json", "r") as f:
    chunks_data = json.load(f)
id = 0
chunks_data[f'{id}']

In [None]:
#This python script contains functions that collect data in form of .txt,.doc and parse them into paragraphs or full text fr

#Imports
from annotated_types import doc
from docx import Document
import os

#obtaining doucment and returning list of paragraphs
def obtain_paragraphs(doc):  #we can add extra logic here for obtaining paragraphs and metadata for each paragraph and convert to Doc
    return doc.paragraphs

#funciton for obtaining document
def get_document(filenames: list):
    file_paths = [] #for storing file paths
    file_names = [] #for storing file names for reducing duplicacy
    overall_paragraphs = []

    for filename in filenames:
        file_name = filename
        file_path = os.path.join('../data/doc/', file_name)

        # checking duplicates in files and skipping it 
        if file_name in file_names:
            continue

        #obtaining paragraphs from document files
        doc = Document(file_path)   #reading document file 
        
        paragraphs = obtain_paragraphs(doc) #getting paragraphs
        print(paragraphs)

        overall_paragraphs.append(paragraphs) #adding all paragraphs
        file_paths.append(file_path)
        file_names.append(file_name)

    return overall_paragraphs

In [None]:
'''
This Python Script contains logic to chunk the documents / Text Parsed information using Langchains and other methods
'''

#import langchain with overlaps / size
import langchain
from langchain_text_splitters import RecursiveCharacterTextSplitter

def do_chunking(paragraph):
    text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=100,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
    )   
    texts = text_splitter.create_documents([paragraph])
    return texts

In [None]:

# Do data collection 
filenames = ['new.docx'] #list containing all document names
documents = get_document(filenames)
documents


In [None]:
# Do data chunking
chunked_documents = []
for doc in documents:
    for i in doc:
    
        chunks = do_chunking(doc)
        chunked_documents.append(chunks)
print(chunked_documents)


In [None]:
from sentence_transformers import SentenceTransformer

# Load the model
model = SentenceTransformer('bert-base-nli-mean-tokens')

# Generate embeddings for a text
text = ["Your text to embed goes here","dzdfasfdsa"]
embeddings = model.encode(text)

# 'embeddings' now contains the embeddings for your text
len(embeddings)

In [None]:
# Load the model
# model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Generate embeddings for a text
embeddings = model.encode(text)

In [None]:
len(embeddings[0])

In [None]:
from transformers import AutoTokenizer, MistralForCausalLM

model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

prompt = "Hey, are you conscious? Can you talk to me?"
inputs = tokenizer(prompt, return_tensors="pt")

# Generate
generate_ids = model.generate(inputs.input_ids, max_length=30)
tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")

messages = [
    {"role": "user", "content": "What is your favourite condiment?"},
    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
    {"role": "user", "content": "Do you have mayonnaise recipes?"}
]

model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
tokenizer.batch_decode(generated_ids)[0]

In [None]:
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain_experimental.chat_models import Llama2Chat
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)
from langchain_core.messages import SystemMessage

template_messages = [
    SystemMessage(content="You are a helpful assistant."),
    MessagesPlaceholder(variable_name="chat_history"),
    HumanMessagePromptTemplate.from_template("{text}"),
]
prompt_template = ChatPromptTemplate.from_messages(template_messages)

from langchain_community.llms import HuggingFaceTextGenInference

llm = HuggingFaceTextGenInference(
    inference_server_url="http://127.0.0.1:8080/",
    max_new_tokens=512,
    top_k=50,
    temperature=0.1,
    repetition_penalty=1.03,
)

model = Llama2Chat(llm=llm)

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
chain = LLMChain(llm=model, prompt=prompt_template, memory=memory)

print(
    chain.run(
        text="What can I see in Vienna? Propose a few locations. Names only, no details."
    )
)

In [1]:
#searching faiss
#imports
from elasticsearch import Elasticsearch
import faiss
import numpy as np
import json

def search_faiss(index_name,question_vector):

    #reading faiss file
    index = faiss.read_index(f'database/{index_name}')

    #reading text file
    #load the dictionary from the JSON file
    with open(f"database/{index_name.split('.')[0]}.json", "r") as f:
        chunks_data = json.load(f)
        
    query_vector = question_vector

    # perform the search
    k = 3  # number of top results to retrieve
    distances, vector_ids = index.search(np.array([query_vector]), k)

    # retrieve the corresponding text and metadata for the top results
    # (assuming you have stored the text and metadata in a separate data structure)
    top_results = []
    for id in vector_ids[0]:
        # retrieve the text  for the result using the vector ID
        text = chunks_data[f'{id}']
        # top_results.append((text, distances[0][id]))
        top_results.append(text)
    return top_results


In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import datetime

tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", device_map="auto")

input_text = "Write me a poem about Machine Learning."
input_ids = tokenizer(input_text, return_tensors="pt")
print(f'generate start: {datetime.datetime.now()}')
outputs = model.generate(**input_ids, max_length=200)
print(tokenizer.decode(outputs[0]))
print(f'END: {datetime.datetime.now()}')

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/google/gemma-2b-it.
401 Client Error. (Request ID: Root=1-66013321-6ca254b44051a131073c2bf5;07b63c98-d72a-4a29-8718-c50397671ea5)

Cannot access gated repo for url https://huggingface.co/google/gemma-2b-it/resolve/main/config.json.
Repo model google/gemma-2b-it is gated. You must be authenticated to access it.

In [3]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("../Llama-2-7B-Chat-GPTQ")
model = AutoModelForCausalLM.from_pretrained("../Llama-2-7B-Chat-GPTQ")

PackageNotFoundError: No package metadata was found for auto-gptq

In [2]:
'''
This python script contains logic for getting the hugging face model for 
'''
#imports
from embedding import sentence_embeddings
# from search_vector_creation import search_faiss
from langchain import PromptTemplate, LLMChain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


#Ask user query
user_input = input("Please enter the query")
question = user_input #for ease if in case i double use both

'''
FUTURE SCOPE :  WE CAN DO QUERY EXPANSION (GENERATION OF QUERIES USING LLM MODEL IT SELF :) )
'''

#Get the search retrivals from database like top 3
'''
converting query to embedded vector as faiss search only on vectors 
(though vector represents multi modal data, it dont care it search/saves only vectors)

'''
index_name = 'technical_engineering_vector_store.faiss'
question_vector = sentence_embeddings([question])[0]
retreived_results = search_faiss(index_name,question_vector)
print(retreived_results) #print


#Add template
prompt_template ='''
Your an assitant of technical engineer. 
You will take the context from the user as context
also you will have option to look on what question  you are trying to answer based on the context provided.

Restrictions :  Your restricted to use your own knowledge or your restricted to hallucinate , Try to answer from the context for question

answer should include similarity score between context and answer generated from chatgpt by having 
(answer similarity_score : score) at end of answer
context:{context}
Question : {question}
'''

#Config Model
prompt = PromptTemplate(template=prompt_template, input_variables=["context","question"])


#Convey prompt to model and input the obtained 3 retreivals
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")

def load_model(model):
    inputs = tokenizer(prompt, return_tensors="pt")

    # Generate
    generate_ids = model.generate(inputs.input_ids, max_length=30)
    return tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

rag_chain = (
    {"context": 'Result : '.join(retreived_results), "question": RunnablePassthrough()} 
    |prompt
    |load_model(model)
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

#result the created Answer from model 



  from .autonotebook import tqdm as notebook_tqdm


In [9]:
# Example: reuse your existing OpenAI setup
from openai import OpenAI

# Point to the local server
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")

# Create a system message with the formatted prompt
system_message = {"role": "system", "content": prompt_template}


completion = client.chat.completions.create(
    model = 'Phi-2',
  messages=[
    system_message
  ],
  temperature=0.7,
)

return completion.choices[0].message.content

ChatCompletionMessage(content=" I am a cat, I have fur so soft and black.My name is Mittens, you can call me that.I like to chase mice, and sometimes eat rice.But don't be afraid, I'm just here for fun.You can pet me, but not too rough.Or else I might scratch your face or pull a muscle.So please treat me with care, and we'll both have a great day!\n", role='assistant', function_call=None, tool_calls=None)


" I am a cat, I have fur so soft and black.My name is Mittens, you can call me that.I like to chase mice, and sometimes eat rice.But don't be afraid, I'm just here for fun.You can pet me, but not too rough.Or else I might scratch your face or pull a muscle.So please treat me with care, and we'll both have a great day!\n"