# Importing Libraries

In [1]:
import langchain
import langchain_openai
import pinecone
import langchain_huggingface

  from tqdm.autonotebook import tqdm


# Video about VectorDB

https://www.youtube.com/watch?v=klTvEwg3oJ4

# Load Environment Variables

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

# Methods

In [3]:
from langchain.document_loaders import PyPDFDirectoryLoader

def read_doc(directory):
    file_loader = PyPDFDirectoryLoader(directory)
    documents = file_loader.load()
    return documents

In [4]:
doc = read_doc('documents/')
len(doc)

15

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter 

def chunk_data(docs, chunk_size = 800, chunk_overlap = 50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = chunk_size, chunk_overlap = chunk_overlap)
    text_splitter.split_documents(docs)
    return docs

In [6]:
documents = chunk_data(docs = doc)
documents

[Document(page_content='The Traveling Salesman problem\nAmanur Rahman Saiyed\nIndiana State University\nTerre Haute, IN 47809 , USA\nasaiyed@sycamores.indstate.edu\nApril 11, 2012\nAbstract\nThe Traveling Salesman Problem, deals with creating the ideal path\nthat a salesman would take while traveling between cities. The solution\nto any given TSP would be the Shortest way to visit a ﬁnite number\nof cities, visiting each city only once, and then returning to the starting\npoint. We also must assume that if there are two cities, city A and city\nB for example, it costs the same amount of money to travel from A to\nB as it does from B to A. For the most part, the solving of a TSP is\nno longer executed for the intention its name indicates. Instead, it is a\nfoundation for studying general methods that are applied to a wide range\nof optimization problems.\nContents\n1 Statement Of The Problem 2\n2 History of The TSP 2\n3 Solution methods of TSP 3\n3.1 Exact Solutions . . . . . . . . . . 

In [20]:
def retrieve_query(query, index, k=2):
    matching_results = index.similarity_search(query, k=k)
    return matching_results

In [21]:
def retrieve_answers(query, chain, index):
    doc_search = retrieve_query(query, index)
    print(doc_search)
    response = chain.run(input_documents = doc_search, question = query)
    return response

# Converting document to embeddings

In [22]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x000002A4A398DF10>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x000002A4A39BCB50>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version='', openai_api_base=None, openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [23]:
vectors = embeddings.embed_query("How are you?")
len(vectors)

1536

# Initializing VectorDB

In [24]:
from langchain_pinecone import PineconeVectorStore

index_name = "langchainvector"

index = PineconeVectorStore.from_documents(doc, embeddings, index_name = index_name)

# Prompt

In [33]:
problem = """

Given a set of cities and the distances between each pair of cities, find the route 
that maximizes the total distance traveled by the car, such that each city is 
visited exactly once and the car returns to the original starting point. This 
is a modified version of travelling salesman problem.

"""

prompt = """Take a deep breath and solve the question delimited by delimited by ''' step by step.

'''{problem}'''

""".format(problem = problem)

# Models

### OpenAI

In [37]:
from langchain.chains.question_answering import load_qa_chain
from langchain_openai import ChatOpenAI

llm_openai = ChatOpenAI(model_name = "gpt-3.5-turbo", temperature=0)
chain_openai = load_qa_chain(llm_openai, chain_type = "stuff")

### HuggingFace

In [46]:
from langchain import HuggingFaceHub

llm_hf = HuggingFaceHub(repo_id="microsoft/Phi-3-vision-128k-instruct", model_kwargs={"temperature": 0, "max_length": 64})
chain_hf = load_qa_chain(llm_hf, chain_type = "stuff")

# Run The LLM

In [50]:
llm_openai.predict(prompt)

  warn_deprecated(


'Step 1: Understand the problem\n- We are given a set of cities and the distances between each pair of cities.\n- We need to find the route that maximizes the total distance traveled by the car.\n- Each city must be visited exactly once.\n- The car must return to the original starting point.\n\nStep 2: Plan a solution\n- This problem is a variation of the traveling salesman problem, which is NP-hard.\n- One approach to solve this problem is to use a brute force algorithm to generate all possible routes and calculate the total distance for each route.\n- We can then compare the total distances of all routes and find the route that maximizes the total distance traveled.\n\nStep 3: Implement the solution\n- Write a function that takes the set of cities and distances as input.\n- Generate all possible permutations of the cities to represent different routes.\n- Calculate the total distance for each route by summing the distances between consecutive cities.\n- Keep track of the route with t

## RAG

In [51]:
our_query = prompt
answer = retrieve_answers(our_query, chain_openai, index)
print(answer)

[Document(page_content='THE TRAVELING SALESMAN PROBLEM 4\nStep 3. calculate the distance of each tour.\nStep 4. choose the shortest tour, this is the optimal solution.\n3.1.2 Example for Brute Force Technique\nA B\nC D3\n5\n2 910\n1\nHere, there are 4 nodes. There is a possibility of the following 3 paths\nA B\nC D3\n5\n2 9\n10\n1A B\nC D3\n5\n2 9\n10\n1\nA→B→C→D→A= 15 A→B→D→C→A= 19', metadata={'page': 3.0, 'source': 'documents\\Travelling Salesman.pdf'}), Document(page_content='THE TRAVELING SALESMAN PROBLEM 4\nStep 3. calculate the distance of each tour.\nStep 4. choose the shortest tour, this is the optimal solution.\n3.1.2 Example for Brute Force Technique\nA B\nC D3\n5\n2 910\n1\nHere, there are 4 nodes. There is a possibility of the following 3 paths\nA B\nC D3\n5\n2 9\n10\n1A B\nC D3\n5\n2 9\n10\n1\nA→B→C→D→A= 15 A→B→D→C→A= 19', metadata={'page': 3.0, 'source': 'documents\\Travelling Salesman.pdf'})]
To solve this modified version of the Traveling Salesman Problem where we want 