In [4]:
import torch
from transformers import pipeline

pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")



In [10]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.indexes import VectorstoreIndexCreator
from sentence_transformers import SentenceTransformer
from langchain.embeddings import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceBgeEmbeddings

##Load and split documents
bs4_strainer = bs4.SoupStrainer(class_=("main-content"))
loader = WebBaseLoader(
    web_paths=("https://deltek.com/en","https://www.deltek.com/en/about/contact-us", "https://www.deltek.com/en/small-business", "https://www.deltek.com/en/customers",
               "https://www.deltek.com/en/support", "https://www.deltek.com/en/partners"),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()



text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=400, chunk_overlap=50, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)
all_splits_text = [split.page_content for split in all_splits]

print(all_splits_text)









["Powering Project Success\nBetter software means better projects.\n\n\n\nContact Us\n\n\nWhat We Do\n\n\n\n\n\n\nWhat's new at Deltek?\n\n\n\n\n\n\n\n\n\n\n\n\nProject-Based ERP Leader\nDeltek is a G2 leader for 15th consecutive quarter.\n\n\n\n\n\n\n\n\n\n\nDeltek Delivers Value\nNamed vertical-based ERP expert in SMB & Enterprise.\n\n\n\n\n\n\n\n\n\n\nGovCon Study Unveiled\nGet critical insight on trends shaping GovCon.", 'Registration is Now Open\nJoin thousands of Deltek customers, partners and industry leaders at our annual customer conference, Deltek ProjectCon. Explore our latest tech innovations and discover new ways to power your success over three days of learning, networking and fun with Deltek Project Nation.\n\n\n\n\n\n\nRegister Now to Save', 'Register Now to Save\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDeltek Powers Projects\nDeltek delivers software and information solutions that enable superior levels of project intelligence, management and collaboration. Our industry-focused 

In [23]:
length = [len(all_splits_text[i]) for i in range(len(all_splits_text))]
print(sum(length)) ##characters in all the text splits

35027


In [24]:
##open source embedding model
model_name = "BAAI/bge-large-en-v1.5"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
    query_instruction= 'Generate a representation for this sentence that can be used to retrieve related sentences:'
)
query = "Why is Deltek trusted?"
db = FAISS.from_documents(all_splits, embeddings)

##get relevant docs from vectorstore
relevant_docs = db.similarity_search(query, k = 20)
formatted_docs = '\n'.join(doc.page_content for doc in relevant_docs)
print(formatted_docs)


Learn More about the Project Lifecycle








AI-Powered Business Companion
We are transforming project delivery with our AI-powered intelligent business companion, Deltek Dela™. Through innovative technology, Dela helps businesses generate smart content, make informed decisions, automate routine tasks and predict project success. 






Meet Dela
Deltek Professional Services
*






Message












Yes, I would like to receive email about news, products and research from Deltek.



Submit


If you need support for your Deltek solution, please open a case in the Deltek Support Center.


Your personal information, including your email address, will be held in the strictest of confidence and will never be shared with anyone as stipulated in our Privacy Policy.
as a single point of contact to help manage the product lifecycle and enhance your Deltek investment.
Explore Deltek Customer Success Stories






















Product






Ajera
                                



ArchiS

In [20]:
print(len(formatted_docs))

2678


In [25]:
# We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
messages = [
    {
        "role": "system",
        "content": f"You are a friendly chatbot who responds about queries related to Deltek. You will answer questions from {formatted_docs}. You will not make up answers."
    },
    {"role": "user", "content": query},
]
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
start_out = len(prompt)
print(outputs[0]["generated_text"][start_out:])

Deltek is trusted by a wide range of small businesses and organizations due to its innovative software solutions and industry-specific solutions that adapt and evolve with the needs of growing companies. Deltek has been recognized with accolades for outstanding customer satisfaction, including user adoption, meeting business needs, and seamless setup. The company's commitment to providing solutions that connect and automate the project lifecycle that fuels business growth has earned it high praise from customers and industry analysts alike. Deltek's easy-to-use products, including project information management (PIM), specification solutions, cost management, and more, help small businesses increase efficiency, manage projects more effectively, and achieve better performance, all while safeguarding their data and minimizing costs.
