In [48]:
#r "nuget: Azure.AI.OpenAI"
#r "nuget: Azure.Identity"
#r "nuget: Azure"
#r "nuget: Newtonsoft.Json"

using Azure;
using Azure.AI.OpenAI;
using Azure.Identity;
using OpenAI.Embeddings;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System.Net.Http;
using System.Collections.ObjectModel;

Create a new Container to store vectors.
We need to declare a vector policy for the container
Then we will create a vector index
and exclude vector index property so it will not get indexed as regular index.

In [None]:
var cstring = "AccountEndpoint=https://localhost:8081/;AccountKey=C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw==";
var client = new CosmosClient(cstring);
var db = client.GetDatabase("StackOverflow");
 
List<Embedding> embeddings = new List<Embedding>()
{
      new Embedding()
      {
          Path = "/bodyvector",
          DataType = VectorDataType.Float32,
          DistanceFunction = DistanceFunction.Cosine,
          Dimensions = 1536,
      }
};
var collection = new Collection<Embedding>(embeddings);

ContainerProperties props = new ContainerProperties("VectorPosts", "/OwnerUserId"){    
    VectorEmbeddingPolicy = new (collection),
    IndexingPolicy = new IndexingPolicy(){
        VectorIndexes = new Collection<VectorIndexPath>()
        {
            new VectorIndexPath()
            {
                Path = "/bodyvector",
                Type = VectorIndexType.QuantizedFlat
            }
        }
    }
};
props.IndexingPolicy.IncludedPaths.Add(new IncludedPath()
{
    Path = "/*"
});
props.IndexingPolicy.ExcludedPaths.Add(new ExcludedPath()
{
    Path = "/bodyvector/?"
});

var postContainer = await db.CreateContainerIfNotExistsAsync(props, throughput: 4000);

In [73]:
public class Post    
{
    public string id { get; set; }
    public int PostId { get; set; }
    public string PostBody { get; set; }
    public string Title { get; set; }
    public int ViewCount { get; set; }
    public int AnswerCount { get; set; }
    public int CommentCount { get; set; }
    public int FavoriteCount { get; set; }
    public int AcceptedAnswerId { get; set; }
    public DateTime? CreatedOn { get; set; }
    public DateTime? ClosedDate { get; set; }
    public int OwnerUserId { get; set; }
    public string OwnerDisplayName { get; set; }
    public string PostType { get; set; }
    public int Score { get; set; }
    public string Tags { get; set; }
    public float[] bodyvector {get;set;}
    public string score {get;set;}
}

Declare the client to access Azure AI.
We will use the deployment name "embedding"

In [51]:
var openAIClient = new AzureOpenAIClient(
    new Uri("https://savranweb.openai.azure.com/"),
    new AzureKeyCredential("45e26c76efc749c0a1e5b9b7d6e103d7"));
var aiclient = openAIClient.GetEmbeddingClient("embedding");

In [None]:
ReadOnlyMemory<float> GenerateVector(string text)
{    
    OpenAIEmbedding newembedding = aiclient.GenerateEmbedding(text);
    return newembedding.ToFloats();
}

In [53]:
Console.WriteLine(string.Join(",",GenerateVector("This is a test embedding").ToArray()));

-0.025809245,0.002151319,0.0035323112,-0.00045182637,-0.007815197,0.020700067,-0.0027488163,-0.00874354,-0.018382503,-0.027020698,0.001398275,0.029785976,-0.005744532,0.0012970462,0.0022978128,0.012384487,0.016525818,0.0052474407,0.009408523,-0.02055522,0.00014958004,-0.0012781173,-0.00061971816,0.0060868994,-0.012713687,-0.011561488,0.0108240815,-0.03868753,8.133544E-06,-0.009039819,0.006346967,-0.016091274,-0.0013735851,-0.035053167,-0.002885434,-0.0038779706,-0.0048293574,-0.03489515,0.027652763,-0.0064128074,0.010712153,0.0121013755,0.0071172942,-0.025545886,-0.013523517,0.0038615107,-0.0016394138,-0.010804329,-0.0075320858,0.014511116,0.022662098,0.008993732,-0.032077204,0.0031718377,-0.0053955805,0.01161416,-0.0035323112,0.021279458,0.011410057,-0.012068455,-0.0127268545,0.0116799995,-0.015432875,0.00714363,-0.0068539344,0.0005970857,0.0069131907,0.011666832,0.018514182,0.0052408567,0.019936325,0.017947959,-8.41002E-05,-0.005991432,0.020068005,0.0036936188,-0.009316348,-0.0045462

In [54]:
var json = await new HttpClient().GetStringAsync("https://raw.githubusercontent.com/hsavran/Presentations/refs/heads/main/stackoverflow.json");
var postList = JsonConvert.DeserializeObject<List<Post>>(json);
postList.Count.Display();

client = new CosmosClient(cstring, new CosmosClientOptions() {AllowBulkExecution = true});
Container postContainer = client.GetContainer("StackOverflow", "VectorPosts");
foreach (var post in postList.Take(1000))
{
    post.bodyvector = GenerateVector(post.PostBody).ToArray();
    await postContainer.UpsertItemAsync(post, new PartitionKey(post.OwnerUserId));    
}

Make a vector search

In [28]:
#r "nuget: Newtonsoft.Json"
#r "nuget: Microsoft.Azure.Cosmos"
using Microsoft.Azure.Cosmos;

In [57]:
var cstring = "AccountEndpoint=https://localhost:8081/;AccountKey=C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw==";
var client = new CosmosClient(cstring);
var db = client.GetDatabase("StackOverflow");
var vectorContainer = db.GetContainer("VectorPosts");

In [77]:
  float[] embedding = GenerateVector("problems of SQL Server").ToArray();
  var queryDef = new QueryDefinition(
      query: $"SELECT TOP 10 c.PostBody, VectorDistance(c.bodyvector,@embedding) AS score FROM c ORDER BY VectorDistance(c.bodyvector,@embedding)"
      ).WithParameter("@embedding", embedding);
  FeedIterator<Post> feed = vectorContainer.GetItemQueryIterator<Post>(
      queryDefinition: queryDef
  );
  while (feed.HasMoreResults) 
  {
      FeedResponse<Post> response = await feed.ReadNextAsync();
      foreach (Post item in response)
      {
          Console.WriteLine($"Found item:\t{item.score}\t{item.PostBody}");
      }
  }

Found item:	0.8281025153516627	<p>MERGE is one of the new features in SQL Server 2008, by the way.</p>

Found item:	0.8114448861387039	<p>One thing you have to watch out for is the fairly severe differences in the way SQL&nbsp;Server and MySQL implement the SQL syntax.</p>

<p>Here's a nice <a href="http://troels.arvin.dk/db/rdbms/" rel="noreferrer">Comparison of Different SQL Implementations</a>.</p>

<p>For example, take a look at the top-n section. In MySQL:</p>

<pre><code>SELECT age
FROM person
ORDER BY age ASC
LIMIT 1 OFFSET 2
</code></pre>

<p>In SQL&nbsp;Server (T-SQL):</p>

<pre><code>SELECT TOP 3 WITH TIES *
FROM person
ORDER BY age ASC
</code></pre>

Found item:	0.8096576115258016	<p>There's no known trick 'in the wild' for getting around this, other than using CTP-6 of SQL Server 2008 (in favour of the RTM).</p>

<p>the reasons for removing backward compatability (and a lot more discussion besides) are provided at the <a href="https://connect.microsoft.com/SQLServer/feedbac