In [12]:
import numpy as np
from openai import OpenAI
import pandas as pd

In [2]:
# Import OpenAI key

with open("key", "r") as f:
    key = f.read().strip()

In [3]:
# Create OpenAI client
client = OpenAI(
    api_key=key,
)

In [4]:
# Define a helper function to calculate embeddings
def get_embedding_vec(input):
    """Returns the embeddings vector for a given input"""
    return (
        client.embeddings.create(
            input=input,
            model="text-embedding-3-large",  # We use the new embeddings model here (announced end of Jan 2024)
            # dimensions=... # You could limit the number of output dimensions with the new embeddings models
        )
        .data[0]
        .embedding
    )

In [8]:
places = []
with open("places.txt", "r") as f:
    lines = f.read().split("\n")
    for line in lines:
        parts = line.split(": ")
        places.append(
            {
                "Name": parts[0],
                "Description": parts[1],
                "Embedding": get_embedding_vec(parts[1]),
            }
        )
places

[{'Name': 'Berlin, Deutschland',
  'Description': 'Eine Stadt reich an Geschichte und Kultur. Aktivitäten',
  'Embedding': [0.03170904517173767,
   0.04415890946984291,
   -0.008206748403608799,
   0.04055098816752434,
   0.009197656065225601,
   0.010899984277784824,
   -0.01441897638142109,
   0.003782599465921521,
   0.0026170124765485525,
   -0.04578501358628273,
   -0.02012304775416851,
   0.020745540037751198,
   -0.024175606667995453,
   -0.032090164721012115,
   0.039992015808820724,
   -0.03015916422009468,
   -0.042964737862348557,
   -0.03752744942903519,
   -0.02240975759923458,
   -0.014850910753011703,
   0.036028385162353516,
   0.003341137431561947,
   -0.005583383608609438,
   0.0057802945375442505,
   -0.012627720832824707,
   -0.0033728971611708403,
   0.0023248218931257725,
   0.004859258886426687,
   0.022320829331874847,
   0.008410011418163776,
   -0.00474174739792943,
   -0.024950547143816948,
   0.00992813240736723,
   -0.030768953263759613,
   -0.0450227782130

In [27]:
search = "Ich würde heuer gerne einen Sightseeing-Urlaub machen."
search_embedding = get_embedding_vec(search)

In [28]:
data = {"Name": [], "Similarity": []}
for place in places:
    similarity = np.dot(search_embedding, place["Embedding"])
    data["Name"].append(place["Name"])
    data["Similarity"].append(similarity)
pd.DataFrame(data).sort_values("Similarity", ascending=False)

Unnamed: 0,Name,Similarity
4,"Santorini, Griechenland",0.366219
0,"Berlin, Deutschland",0.360439
8,"Kyoto, Japan",0.352952
2,"Costa del Sol, Spanien",0.335308
9,"Yellowstone Nationalpark, USA",0.330626
3,"Chamonix, Frankreich",0.323846
5,"Amazonas, Brasilien",0.311304
1,"München, Deutschland",0.300923
7,"Serengeti, Tansania",0.292485
6,"Great Barrier Reef, Australien",0.271742
