In [1]:
import os

# OpenAI
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [20]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = PyPDFLoader('./data/H-1B Filing Instructions.pdf')
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
docs = text_splitter.split_documents(documents)


In [21]:
len(docs)

61

In [22]:
docs

[Document(page_content='1 \n  H-1B FILING INSTRUCTIONS AND GENERAL INFORMATION  \nWhat is an H -1B Petition?  \nH-1B is a nonimmigrant status that permits professional employment.  The employer must file an H -1B \npetition with the U .S. Citizenship and Immigration Services (US CIS) and obtain a pproval for it; the “alien \nbeneficiary” must obtain H -1B status either by applying for an H -1B visa abroad and entering the U.S. in H -1B \nstatus, or by changing to H -1B status from another nonimmigrant status in the U.S.  H-1B employment may  not \nbegin until  the office of International Student and Scholar Services ( ISSS ) informs the hiring department \nthat all necessary approvals have been received.    \n \nRutgers policy restricts H -1B sponsorship to full time faculty appointments only.  For teaching faculty, \n“Assistant Professor” is the lowest range title for which Rutgers will sponsor an H -1B.  For research faculty, the', metadata={'source': './data/H-1B Filing Instructions

In [23]:
from langchain_community.vectorstores import FAISS

embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(docs, embeddings)
print(db.index.ntotal)

61


In [24]:
db.save_local("db")

In [27]:
query = "give me Email ISSS"
docs = db.similarity_search(query)

In [30]:
docs

[Document(page_content='2 \n The First Step:  E -mail ISSS  for an H -1B Request Packet  \n \nE-mail ISSS  at gantonatos@global.rutgers.edu  for a request packet. In  the subject line , write, “H -1B Packet \nneeded .”  In the body of the message,  provide all of the following information :  \n\uf0b7 contact in sponsoring unit: name, phone number & e-mail address  to which H1B packet should be sent  \n\uf0b7 the name of the alien beneficiary for whom H -1B petition will be filed  \n\uf0b7 type of appointme nt (position title) you are offering the alien  \n\uf0b7 geographic location where actual work will occur (“on campus” or, if off -campus, provide city and state)  \n\uf0b7 name of sponsoring unit   \n\uf0b7 please check all that apply to the alien beneficiary:  \n____current Rutgers emplo yee \n____not yet a Rutgers employee  \n____currently in the U.S. in H -1B status  \n____currently in the U.S. in a nonimmigrant status other than H -1B (What status ? ____________)', metadata={'so

In [29]:
docs[0]

Document(page_content='2 \n The First Step:  E -mail ISSS  for an H -1B Request Packet  \n \nE-mail ISSS  at gantonatos@global.rutgers.edu  for a request packet. In  the subject line , write, “H -1B Packet \nneeded .”  In the body of the message,  provide all of the following information :  \n\uf0b7 contact in sponsoring unit: name, phone number & e-mail address  to which H1B packet should be sent  \n\uf0b7 the name of the alien beneficiary for whom H -1B petition will be filed  \n\uf0b7 type of appointme nt (position title) you are offering the alien  \n\uf0b7 geographic location where actual work will occur (“on campus” or, if off -campus, provide city and state)  \n\uf0b7 name of sponsoring unit   \n\uf0b7 please check all that apply to the alien beneficiary:  \n____current Rutgers emplo yee \n____not yet a Rutgers employee  \n____currently in the U.S. in H -1B status  \n____currently in the U.S. in a nonimmigrant status other than H -1B (What status ? ____________)', metadata={'sou