# Building Semantic Memory with Embeddings

 

In [17]:
#!python -m pip install semantic-kernel==0.4.5.dev0

In [1]:
import os
import json
from typing import Tuple
import semantic_kernel as sk
from semantic_kernel.connectors.ai.open_ai import (
    OpenAIChatCompletion,
    OpenAITextEmbedding,
    AzureChatCompletion,
    AzureTextEmbedding,
)
endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
deployment_name = os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"]
key = os.environ["AZURE_OPENAI_API_KEY"]
embeddings = os.environ["AZURE_OPENAI_EMBEDDINGS_MODEL_NAME"]
azure_ai_search_api_key = os.environ["AZURE_AISEARCH_API_KEY"]
azure_ai_search_url = os.environ["AZURE_AISEARCH_ENDPOINT"]

In [2]:
kernel = sk.Kernel()

azure_chat_service = AzureChatCompletion(deployment_name=deployment_name, endpoint=endpoint, api_key=key)
# next line assumes embeddings deployment name is "text-embedding", adjust the deployment name to the value of your chat model if needed
azure_text_embedding = AzureTextEmbedding(deployment_name=embeddings, endpoint=endpoint, api_key=key)
kernel.add_chat_service("chat_completion", azure_chat_service)
kernel.add_text_embedding_generation_service("ada", azure_text_embedding)
#kernel.register_memory_store(memory_store=sk.memory.VolatileMemoryStore())
#kernel.import_skill(sk.core_skills.TextMemorySkill())

<semantic_kernel.kernel.Kernel at 0x7f993c1f9a90>

### Manually adding memories

Create some initial memories "About Me". We can add memories to our `VolatileMemoryStore` by using `SaveInformationAsync`

In [3]:
from semantic_kernel.connectors.memory.azure_cognitive_search import (
    AzureCognitiveSearchMemoryStore,
)

# text-embedding-ada-002 uses a 1536-dimensional embedding vector
kernel.register_memory_store(
    memory_store=AzureCognitiveSearchMemoryStore(
        vector_size=1536,
        search_endpoint=azure_ai_search_url,
        admin_key=azure_ai_search_api_key,
    )
)

In [4]:
# Upload some documents to the index
with open('docVectors.json', 'r') as file:  
    documents = json.load(file)  
# Iterate over each document and update it
for document in documents:
    document.update(document.pop('metadata'))
# Create ID
for i, document in enumerate(documents):
    document['id'] = str(i)
# del embeddings
for item in documents:
    item.pop('contentVector', None)
    item['AdditionalMetadata'] = '_'.join([str(item.get(field, '')) for field in ['doc_year', 'doc_type', 'page', 'start_index']])

# Now each di

In [5]:
documents[0]

{'page_content': 'UNITED STATES\nSECURITIES AND EXCHANGE COMMISSION\nWashington, D.C. 20549\n \nFORM \n10-K\n \n \n☒\nANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n \n \n \nFor the Fiscal Year Ended\n June 30, \n2023\n \n \n \nOR\n \n \n☐\nTRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n \n \n \nFor the Transition Period From                  to\nCommission File Number \n001-37845\n \n \nMICROSOFT CORPORATION\n \n \nWASHINGTON\n \n91-1144442\n(STATE OF INCORPORATION)\n \n(I.R.S. ID)\nONE MICROSOFT WAY\n, \nREDMOND\n, \nWASHINGTON\n \n98052-6399\n(\n425\n) \n882-8080\nwww.microsoft.com/investor\n \n \n \n \n \nSecurities registered pursuant to Section 12(b) of the Act:\n \n \n \n \n \n \n \n \n \nTitle of each class\n \nTrading Symbol\n \nName of exchange on which registered\n \n \n \n \n \nCommon stock, $\n0.00000625\n par value per share\n \nMSFT\n \nNASDAQ\n3.125% Notes due 2028\n \nMSFT\n \nNASDAQ\n2.6

In [6]:
for item in documents:
    item.pop('doc_quarter', None)
    item.pop('doc_type', None) 
    item.pop('page', None) 
    item.pop('start_index', None)     
    item.pop('doc_year', None) 

In [7]:
for item in documents:    
    item['Id'] = item.pop('id')
    item['Text'] = item.pop('page_content')
    item['ExternalSourceName'] = item.pop('source')
    item['Description'] = item.pop('company_name')     

In [8]:
first_document = documents[0] 
{key: (value[:2] if isinstance(value, list) else value) for key, value in first_document.items()}


{'AdditionalMetadata': '2023_10K_0_2',
 'Id': '0',
 'Text': 'UNITED STATES\nSECURITIES AND EXCHANGE COMMISSION\nWashington, D.C. 20549\n \nFORM \n10-K\n \n \n☒\nANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n \n \n \nFor the Fiscal Year Ended\n June 30, \n2023\n \n \n \nOR\n \n \n☐\nTRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n \n \n \nFor the Transition Period From                  to\nCommission File Number \n001-37845\n \n \nMICROSOFT CORPORATION\n \n \nWASHINGTON\n \n91-1144442\n(STATE OF INCORPORATION)\n \n(I.R.S. ID)\nONE MICROSOFT WAY\n, \nREDMOND\n, \nWASHINGTON\n \n98052-6399\n(\n425\n) \n882-8080\nwww.microsoft.com/investor\n \n \n \n \n \nSecurities registered pursuant to Section 12(b) of the Act:\n \n \n \n \n \n \n \n \n \nTitle of each class\n \nTrading Symbol\n \nName of exchange on which registered\n \n \n \n \n \nCommon stock, $\n0.00000625\n par value per share\n \nMSFT\n \nNASDAQ\n3.1

In [9]:
len(documents)
document_small = documents[:100]
len(document_small)
document_small[99]


{'AdditionalMetadata': '2023_10K_22_2951',
 'Id': '99',
 'Text': 'were performed by personal computers. Even if many users view these devices as complementary to a personal computer, the prevalence of\n \nthese devices may make it more difficult to attract application developers to our PC operating system platforms. Competing with operating\n \nsystems licensed at low or no cost may decrease our PC operating system margins. Popular products or services offered on competing\n \nplatforms could increase their competitive strength. In addition, some of our devices compete with products made by our original equipment\n \nmanufacturer (“OEM”) partners, which may affect their commitment to our platform.\n•\nCompeting platforms have content and application marketplaces with scale and significant installed bases. The variety and utility of content and\n \napplications available on a platform are important to device purchasing decisions. Users may incur costs to move data and buy new content an

In [10]:
index_name = 'finance-bench-small-sk'
for document in documents:
    await kernel.memory.save_information_async(
        collection=index_name, 
        id=document['Id'], 
        text= document['Text'],         
        description= document['Description'],
        additional_metadata=document['AdditionalMetadata']
    )

hnsw_parameters is not a known attribute of class <class 'azure.search.documents.indexes._generated.models._models_py3.HnswVectorSearchAlgorithmConfiguration'> and will be ignored


In [37]:
async def search_memory_examples(kernel: sk.Kernel) -> None:
    questions = [
        "what's Microsoft",
        "what's Income Statement?",        
    ]

    for question in questions:
        print(f"Question: {question}")
        result = await kernel.memory.search_async("finance-bench-small-sk", question)
        print(f"Answer: {result[0].text}\n")

In [38]:
await search_memory_examples(kernel)

Question: what's Microsoft
Answer: PART I
Item 1
 
Our growth depends on securely delivering continuous innovation and advancing our leading productivity and collaboration tools and services, including
 
Office 365, Dynamics 365, and LinkedIn. Microsoft 365 brings together Office 365, Windows, and Enterprise Mobility + Security to help organizations
 
empower their employees with AI-backed tools that unlock creativity, increase collaboration, and fuel innovation, all the while enabling compliance
 
coverage and data protection. Microsoft Teams is a comprehensive platform for work, with meetings, calls, chat, collaboration, and business process
 
automation. Microsoft Viva is an employee experience platform that brings together communications, knowledge, learning, resources, and insights.
 
Microsoft 365 Copilot combines next-generation AI with business data in the Microsoft Graph and Microsoft 365 applications.

Question: what's Income Statement?
Answer: PART I
Item 1
 
Note About Forw