In [None]:
using System.IO;
using System.Collections.Generic;

// Function to read environment variables from a .env file.
Dictionary<string, string> ReadEnvFile(string filePath)
{
    var dict = new Dictionary<string, string>();
    foreach (var line in File.ReadAllLines(filePath))
    {
        var parts = line.Split('=', 2);
        if (parts.Length == 2)
        {
            var key = parts[0].Trim();
            var value = parts[1].Trim().Trim('"'); // Remove any double quotes
            dict[key] = value;
        }
    }
    return dict;
}

// Read the environment variables from the .env file
var envVars = ReadEnvFile(".env");

// Retrieve the OpenAI API base URL and key from the environment variables
string endpoint = envVars["OPENAI_API_BASE"];
string apiKey = envVars["OPENAI_API_KEY"];

Text Chunking with Semantic Kernal

In [None]:
#r "nuget: Microsoft.SemanticKernel, 1.0.1" 

In [None]:
using Microsoft.SemanticKernel.Text;

// Read the entire content of the RAG update sample file 
string filePath = "azure-functions-June-2023-Updates.txt";
string updateText = await File.ReadAllTextAsync(filePath);

// Disable warning SKEXP0055 
// 'Microsoft.SemanticKernel.Text.TextChunker' is for evaluation purposes only 
// and is subject to change or removal in future updates.
#pragma warning disable SKEXP0055 

// Split the update text into paragraphs
// MaxTokensPerLine is set to 128 and MaxTokensPerParagraph is set to 250
List<string> paragraphs = TextChunker.SplitPlainTextParagraphs(
    TextChunker.SplitPlainTextLines(updateText, 128), //MaxTokensPerLine
    250 //MaxTokensPerParagraph
);

// Re-enable warning SKEXP0055
#pragma warning restore SKEXP0055 

Console.WriteLine($"Number of chunks: {paragraphs.Count}");

Create Embedding

In [None]:
// Import required packages.
#r "nuget: Azure.AI.OpenAI, 1.0.0-beta.12" 
#r "nuget: Azure"

In [None]:
using Azure;
using Azure.AI.OpenAI;
using System.Linq;

AzureKeyCredential credentials = new (apiKey);
OpenAIClient openAIClient = new (new Uri(endpoint), credentials);

// Initialize a list to hold the embedding documents
List<Dictionary<string, object>> inputDocuments = new();

// Iterate over each paragraph in the chunks collection
foreach (var paragraph in paragraphs)
{
    // Initialize a new dictionary to hold the current embedding document
    Dictionary<string, object> currentDocument = new();

    EmbeddingsOptions embeddingOptions = new()
    {
        // Specify the deployment name for the embedding model
        DeploymentName = "text-embedding-ada-002",
        Input = { paragraph },
    };

    // Get the embeddings for the current paragraph
    var returnValue = openAIClient.GetEmbeddings(embeddingOptions);
    float[] embeddingVector = returnValue.Value.Data[0].Embedding.ToArray();
    
    // Add the paragraph and its corresponding embeddings to the current document
    currentDocument["id"] = Guid.NewGuid().ToString();
    currentDocument["content"] = paragraph;
    currentDocument["contentVector"] = embeddingVector;
    inputDocuments.Add(currentDocument);
}

// Get the embeddings for the first document in the list and pring it.
float[] firstDocumentVector = (float[])inputDocuments.First()["contentVector"];
string embeddingString = String.Join(", ", firstDocumentVector);
Console.WriteLine(embeddingString);

Create Vector Index on Azure AI Search

In [8]:
#r "nuget: Azure.Search.Documents, 11.5.1"
#r "nuget: Azure.Identity, 1.10.4"

In [9]:
using Azure.Search.Documents;
using Azure.Search.Documents.Indexes;
using Azure.Search.Documents.Indexes.Models;
using Azure.Search.Documents.Models;
using Azure;

// Define the Azure Search service endpoint and admin key
string serviceEndpoint = envVars["AZURE_SEARCH_SERVICE_ENDPOINT"];
string searchAdminKey = envVars["AZURE_SEARCH_ADMIN_KEY"];

string indexName = "vectorindex";
var searchCredential = new AzureKeyCredential(searchAdminKey);
var indexClient = new SearchIndexClient(new Uri(serviceEndpoint), searchCredential);
var searchClient = indexClient.GetSearchClient(indexName);

// Define the vector search profile and HNSW configuration. We will use the default values.
string vectorSearchProfile = "my-vector-profile";
string vectorSearchHnswConfig = "my-hnsw-vector-config";

// Create a new SearchIndex Definition
SearchIndex searchIndex = new(indexName)
{
    VectorSearch = new()
    {
        Profiles =
        {
            new VectorSearchProfile(vectorSearchProfile, vectorSearchHnswConfig)
        },
            Algorithms =
        {
            new HnswAlgorithmConfiguration(vectorSearchHnswConfig)
        }
    },
    Fields =
    {
        new SimpleField("id", SearchFieldDataType.String) 
        { 
            IsKey = true, 
            IsFilterable = true, 
            IsSortable = true
        },
        new SearchableField("content") 
        { 
            IsFilterable = true 
        },
        new SearchField("contentVector", SearchFieldDataType.Collection(SearchFieldDataType.Single))
        {
            IsSearchable = true,
            // Azure OpenAI model, text-embedding-ada-002 with 1,536 dimensions means one document would consume 1,536 floats.
            VectorSearchDimensions = 1536,
            VectorSearchProfileName = vectorSearchProfile
        }
    }
};

indexClient.CreateOrUpdateIndex(searchIndex);

Upload Embeddings to Azure AI Search

In [None]:
await searchClient.IndexDocumentsAsync(IndexDocumentsBatch.Upload(inputDocuments));

Performing a vector similarity search

In [None]:
var query = "Can you provide the timestamp for the most recent information you have on Azure Functions? Please specify the date and time up to your last update. Give me only the date.";

In [None]:
// Generate the embedding for the query  
EmbeddingsOptions embeddingOptions = new()
{
    DeploymentName = "text-embedding-ada-002",
    Input = { query },
};
var returnValue = openAIClient.GetEmbeddings(embeddingOptions);
float[] queryEmbeddings = returnValue.Value.Data[0].Embedding.ToArray();

// Perform the vector similarity search  
var searchOptions = new SearchOptions
{
    VectorSearch = new()
    {
        Queries = { new VectorizedQuery(queryEmbeddings.ToArray()) { KNearestNeighborsCount = 3, Fields = { "contentVector" } } }
    },
    Size = 3,
    Select = { "content" },
};

// Initialize a list to store the search result documents for future RAG use.
List<SearchDocument> searchDocuments = new List<SearchDocument>();

// Perform the search and get the response
SearchResults<SearchDocument> response = await searchClient.SearchAsync<SearchDocument>(query, searchOptions);

await foreach (SearchResult<SearchDocument> result in response.GetResultsAsync())
{
    searchDocuments.Add(result.Document);
    Console.WriteLine($"Score: {result.Score}\n");
    Console.WriteLine($"Content: {result.Document["content"]}\n");
}

Console.WriteLine($"Total Results: {searchDocuments.Count}");

Retrievel Augmented Generation (RAG) - Standard GPT Output

In [None]:
string chatDeploymentName = envVars["OPENAI_CHAT_DEPLOYMENT_NAME"]; 

var chatCompletionsOptions = new ChatCompletionsOptions()
{
    DeploymentName = chatDeploymentName, 
    Messages =
    {
        new ChatRequestSystemMessage("You are a helpful assistant and always tell the truth. You dont talk much."),
        new ChatRequestUserMessage(query)
    },
    MaxTokens = 100
};

Response<ChatCompletions> response = openAIClient.GetChatCompletions(chatCompletionsOptions);

Console.WriteLine(response.Value.Choices[0].Message.Content);

Retrievel Augmented Generation (RAG) - Augemented GPT Output

In [None]:
string firstDocumentContent = searchDocuments[0]["content"].ToString();

var chatCompletionsOptions = new ChatCompletionsOptions()
{
    DeploymentName = chatDeploymentName, 
    Messages =
    {
        new ChatRequestSystemMessage($"You are a helpful assistant and always tell the truth. You dont talk much. Here is what you know : {firstDocumentContent}"),
        new ChatRequestUserMessage(query)
    },
    MaxTokens = 100
};

Response<ChatCompletions> response = openAIClient.GetChatCompletions(chatCompletionsOptions);

Console.WriteLine(response.Value.Choices[0].Message.Content);