In [1]:
# Importing the necessary Python libraries
import json
import time
import yaml

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from datasets import Dataset
from langchain.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_community.document_loaders import DataFrameLoader
from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df_kis = pd.read_csv('../data/synthetic_knowledge_items.csv')
df_validation = pd.read_csv('../data/rag_sample_qas_from_kis.csv')

In [8]:
embedding_model = OpenAIEmbeddings(model = 'text-embedding-3-small')
adv_embedding_model = OpenAIEmbeddings(model = 'text-embedding-3-large')

In [14]:
text_1 = df_kis.iloc[0]['ki_text']
text_2 = df_kis.iloc[0]['alt_ki_text']

In [15]:
embedding_1 = adv_embedding_model.embed_query(text_1)
embedding_2 = adv_embedding_model.embed_query(text_2)

In [16]:
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

cosine_similarity(embedding_1, embedding_2)

0.8453851956200121

In [17]:
# Reshaping the dataframe to split ki_text and alt_ki_text into their own rows
df_kis_melted = pd.melt(df_kis, id_vars=['ki_topic'], value_vars=['ki_text', 'alt_ki_text'], 
                        var_name='text_type', value_name='text')

# Renaming the text_type values for better readability
df_kis_melted['text_type'] = df_kis_melted['text_type'].replace({'ki_text': 'normal text', 'alt_ki_text': 'alternative text'})

print(df_kis_melted)

                                              ki_topic         text_type  \
0         Setting Up a Mobile Device for Company Email       normal text   
1                            Resetting a Forgotten PIN       normal text   
2            Configuring VPN Access for Remote Workers       normal text   
3         Troubleshooting Issues with Microsoft Office       normal text   
4          Setting Up a Conference Call on Cisco Webex       normal text   
..                                                 ...               ...   
195         Setting Up a New User's Account in Dropbox  alternative text   
196                          Creating a New IT Problem  alternative text   
197  Troubleshooting Issues with a Slow-Performing ...  alternative text   
198  Setting Up a Secure Connection to a Company-Is...  alternative text   
199            Resetting a Forgotten Network Username.  alternative text   

                                                  text  
0    **Setting Up a Mobile Dev

In [18]:
df_kis_melted

Unnamed: 0,ki_topic,text_type,text
0,Setting Up a Mobile Device for Company Email,normal text,**Setting Up a Mobile Device for Company Email...
1,Resetting a Forgotten PIN,normal text,**Resetting a Forgotten PIN**\n\nIf you have f...
2,Configuring VPN Access for Remote Workers,normal text,**Configuring VPN Access for Remote Workers**\...
3,Troubleshooting Issues with Microsoft Office,normal text,**Troubleshooting Issues with Microsoft Office...
4,Setting Up a Conference Call on Cisco Webex,normal text,"To set up a conference call on Cisco Webex, fo..."
...,...,...,...
195,Setting Up a New User's Account in Dropbox,alternative text,"To set up a new user's account in Dropbox, fol..."
196,Creating a New IT Problem,alternative text,To create a new IT problem in Widgetco's IT se...
197,Troubleshooting Issues with a Slow-Performing ...,alternative text,**Troubleshooting Issues with a Slow-Performin...
198,Setting Up a Secure Connection to a Company-Is...,alternative text,To set up a secure connection to a company-iss...


In [20]:
# Loading the documents
documents = DataFrameLoader(df_kis_melted, page_content_column = 'text').load()

In [22]:
len(documents)

200

In [24]:
# Creating FAISS index for the current chunk size
faiss_index = FAISS.from_documents(documents, embedding_model)

In [30]:
# Creating a retriever from the FAISS index
retriever = faiss_index.as_retriever(search_kwargs = {'k': 5})

In [31]:
# Defining the input query
input_query = 'How do I set up my company email on my mobile device?'

# Retrieving the most relevant documents
relevant_documents = retriever.invoke(input_query)

In [32]:
relevant_documents

[Document(metadata={'ki_topic': 'Setting Up a Mobile Device for Company Email', 'text_type': 'normal text'}, page_content='**Setting Up a Mobile Device for Company Email**\n\n**Prerequisites:**\n\n* Mobile device with a supported operating system (iOS, Android, or Windows)\n* Company email account credentials\n* Mobile device management (MDM) profile installed (if required by company policy)\n\n**Step 1: Ensure Mobile Device Management (MDM) Profile is Installed (if required)**\n\nIf your company requires MDM for mobile devices, ensure that the profile is installed on your device. This profile will allow your device to connect to the company network and access company email. If you are unsure whether MDM is required, contact your IT department for assistance.\n\n**Step 2: Set Up Email Account on Mobile Device**\n\n1. Go to the Settings app on your mobile device.\n2. Select "Mail" or "Email" (depending on your device\'s operating system).\n3. Tap "Add Account" or "Create a new account".