Connect to Azure Open AI API 

In [1]:
import os
import openai
from langchain.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv
from openai.embeddings_utils import cosine_similarity

load_dotenv()

openai.api_type = "azure"
openai.api_version = "2023-03-15-preview" # "202-12-01"
openai.api_base = os.getenv('OPENAI_API_BASE')  
openai.api_key = os.getenv('OPENAI_API_KEY') 

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", chunk_size=1)

Generating the first embeddings

In [2]:
e = embeddings.embed_query("Nils")
print(e)


[-0.00987802965574538, -0.03519255115341251, 0.00016976826937833967, -0.005283519914629021, -0.002232659963064543, 0.02952146132816036, -0.02142558550295886, 0.010361661769903614, -0.007055735135774334, -0.011209674792631382, 0.005369646302733432, 0.004548133599654411, 0.006654916229480111, -0.0032595509583286464, -0.0005614775307222291, -0.007877247838853354, 0.035139547720147137, -0.0091360179116126, 0.03225100216822357, -0.019265801768174353, -0.018258784964908858, -0.007897123195005242, -0.0006827997067918376, 0.0006890107555893027, -0.01718551759535215, 0.0019361864437370771, 0.014084970417561234, -0.011050672874738902, 0.02059082240722481, -0.034265035774754565, 0.034450536615311854, -0.006688041978287029, 0.0019146549631263025, -0.017344520444567253, 0.0016869170116856735, -0.00903664113085316, -0.0033556150245090013, -0.008864388354644337, 0.016496508353162114, 0.012369068084631186, 0.006048719263831462, 0.005343145983084685, 0.009613025527935288, -0.02432738005055352, -0.02562

Cosine similarity measures the similarity between two vectors of an inner product space.

In [3]:
def calculateSimilarity(word1, word2):
    e1 = embeddings.embed_query(word1)
    e2 = embeddings.embed_query(word2)
    print (f'{word1} vs {word2}  \nSimilarity: {cosine_similarity(e1, e2)}')

calculateSimilarity("King","Queen")
calculateSimilarity("King","Beggar")
calculateSimilarity("King","König")

King vs Queen  
Similarity: 0.911440113812746
King vs Beggar  
Similarity: 0.8034663528403203
King vs König  
Similarity: 0.8791565652783507


In [4]:
calculateSimilarity("Madrid","Barcelona")
calculateSimilarity("Zurich","Barcelona")
calculateSimilarity("Singapore","Barcelona")

Madrid vs Barcelona  
Similarity: 0.908916260466547
Zurich vs Barcelona  
Similarity: 0.8199990329493025
Singapore vs Barcelona  
Similarity: 0.8060094777051502


In [44]:
calculateSimilarity("SC Bern","Schweizermeister")
calculateSimilarity("FC Basel","Schweizermeister")
calculateSimilarity("HC Fribourg-Gottéron","Schweizermeister")
calculateSimilarity("HC Ambri Piotta","Schweizermeister")

SC Bern vs Schweizermeister  
Similarity: 0.8362285187598169
FC Basel vs Schweizermeister  
Similarity: 0.8564257750770068
HC Fribourg-Gottéron vs Schweizermeister  
Similarity: 0.8103693096813419
HC Ambri Piotta vs Schweizermeister  
Similarity: 0.8057841301750931


Import a list of sessions of the Azure Bootcamp Switzerland 2023 and find similar sessions

In [5]:
import pandas as pd

sessions = pd.read_csv('./sessions.csv')
sessions.head()

Unnamed: 0,Key,Title,Speaker,Content
0,decentriq,How we Build Data Clean Rooms on Azure Confide...,"DAVID STURZENEGGER, PRIMO AMREIN",How we Build Data Clean Rooms on Azure Confide...
1,paxdevops,Kubernetes @ PAX - DevOps at a Swiss Insurance,"SASCHA SPREITZER, ESRA DOERKSEN",Kubernetes @ PAX - DevOps at a Swiss Insurance...
2,axpodevops,Axpo DevOps Dojo,ARINDAM MITRA,Axpo DevOps Dojo: Axpo Solutions Journey to De...
3,lakehouse,Building a Lakehouse Platform on Azure with Da...,"HANSJÖRG WINGEIER, MATHIAS HERZOG",Building a Lakehouse Platform on Azure with Da...
4,cloudcost,Azure FinOps: The Quiz,"ROLAND KRUMMENACHER, STEFAN DENK",Azure FinOps: The Quiz: Are you looking for wa...


Add a new column and calculate the embeddings.

In [7]:
sessions['embedding'] = sessions['Content'].apply(lambda x: embeddings.embed_query(x))
sessions.head()

Unnamed: 0,Key,Title,Speaker,Content,embedding
0,decentriq,How we Build Data Clean Rooms on Azure Confide...,"DAVID STURZENEGGER, PRIMO AMREIN",How we Build Data Clean Rooms on Azure Confide...,"[-0.003308611010662261, 0.008811408087100165, ..."
1,paxdevops,Kubernetes @ PAX - DevOps at a Swiss Insurance,"SASCHA SPREITZER, ESRA DOERKSEN",Kubernetes @ PAX - DevOps at a Swiss Insurance...,"[0.01691307188824419, -0.01835013061267945, 0...."
2,axpodevops,Axpo DevOps Dojo,ARINDAM MITRA,Axpo DevOps Dojo: Axpo Solutions Journey to De...,"[0.018055994178347995, -0.01187460038905799, 0..."
3,lakehouse,Building a Lakehouse Platform on Azure with Da...,"HANSJÖRG WINGEIER, MATHIAS HERZOG",Building a Lakehouse Platform on Azure with Da...,"[-0.005630783527336097, -0.016711608816673194,..."
4,cloudcost,Azure FinOps: The Quiz,"ROLAND KRUMMENACHER, STEFAN DENK",Azure FinOps: The Quiz: Are you looking for wa...,"[0.004734826189064798, -0.026251829688503463, ..."


Find similar sessions

In [8]:
preffered_session = "openai" # "avnm" "quantum" "powerbi"
e = sessions[sessions['Key'] == preffered_session]['embedding'].values[0]

similar_sessions = sessions['embedding'].apply(lambda x: cosine_similarity(x, e))

recommandations=pd.concat([sessions['Title'],sessions['Speaker'],similar_sessions], axis=1).sort_values(by='embedding', ascending=False)
recommandations.head()

Unnamed: 0,Title,Speaker,embedding
12,Use the power of OpenAI to leverage your busin...,David Schneider,1.0
18,Develop for inclusion using cognitive services...,ANDRÉ MELANCIA,0.83074
10,Push your Azure tenant to the next level with ...,DENIS SELIMOVIC,0.819536
8,Azure Networking vNext - How to build modern c...,ERIC BERG,0.798539
16,Fully automated & cloud-native data platform,TIM GIGER,0.796223
