In [1]:
! ollama list

NAME              	ID          	SIZE  	MODIFIED    
llama2:latest     	78e26419b446	3.8 GB	2 weeks ago	
openthaigpt:latest	e242a9902d99	4.1 GB	2 weeks ago	


In [2]:
import requests
import numpy as np

# Function to get embedding from OpenThaiGPT via Ollama
def get_embedding(text):
    response = requests.post('http://localhost:11434/api/embeddings', json={
        "model": "openthaigpt",
        "prompt": text
    })
    return np.array(response.json()['embedding'])

# Function to get word embedding (simplified, may need adjustment)
def get_word_embedding(sentence, word):
    full_embedding = get_embedding(sentence)
    # This is a simplification. In reality, we might need a more sophisticated
    # method to extract the embedding for a specific word.
    return full_embedding

# Example sentences using "ตา" in different contexts
sentences = [
    "I need to go to the bank to deposit some money.",
    "The river bank was overgrown with wildflowers.",
    "The pilot had to bank the airplane to avoid turbulence.",
    "You can bank on me to finish the project on time.",
    "The food bank is collecting donations for the homeless.",
    "The central bank announced new interest rates to stabilize the economy."
]


# Get embeddings for 'ตา' in each context
embeddings = [get_word_embedding(sentence, 'ตา') for sentence in sentences]

# Function to compute cosine similarity
def cosine_similarity(v1, v2):
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

# Compute pairwise similarities
similarities = np.zeros((len(embeddings), len(embeddings)))
for i in range(len(embeddings)):
    for j in range(len(embeddings)):
        similarities[i][j] = cosine_similarity(embeddings[i], embeddings[j])

# Print results
for i, sentence in enumerate(sentences):
    print(f"ประโยค {i+1}: {sentence}")
    print(f"Embedding สำหรับ 'bank' (first 5 dimensions): {embeddings[i][:5]}")
    print()

print("Cosine Similarities:")
for i in range(len(similarities)):
    for j in range(len(similarities)):
        print(f"{similarities[i][j]:.4f}", end="\t")
    print()

# Find the most similar pair of sentences
max_similarity = 0
max_pair = (0, 0)
for i in range(len(similarities)):
    for j in range(i+1, len(similarities)):
        if similarities[i][j] > max_similarity:
            max_similarity = similarities[i][j]
            max_pair = (i, j)

print(f"\nประโยคที่มีความคล้ายคลึงกันมากที่สุด: ประโยค {max_pair[0]+1} และประโยค {max_pair[1]+1}")
print(f"ค่าความคล้ายคลึง: {max_similarity:.4f}")
print(f"ประโยค {max_pair[0]+1}: {sentences[max_pair[0]]}")
print(f"ประโยค {max_pair[1]+1}: {sentences[max_pair[1]]}")

ประโยค 1: I need to go to the bank to deposit some money.
Embedding สำหรับ 'bank' (first 5 dimensions): [-0.05061352  0.25588679  0.14729989 -0.24418509 -0.8758502 ]

ประโยค 2: The river bank was overgrown with wildflowers.
Embedding สำหรับ 'bank' (first 5 dimensions): [ 0.09196796  0.43233076  1.2263118  -1.19697857 -0.73142749]

ประโยค 3: The pilot had to bank the airplane to avoid turbulence.
Embedding สำหรับ 'bank' (first 5 dimensions): [-0.0269017   0.81395465  1.48813295 -0.65739274 -1.56267452]

ประโยค 4: You can bank on me to finish the project on time.
Embedding สำหรับ 'bank' (first 5 dimensions): [-0.57595593  1.48642766  0.97439957 -1.22708702 -2.76574588]

ประโยค 5: The food bank is collecting donations for the homeless.
Embedding สำหรับ 'bank' (first 5 dimensions): [-0.15703824  0.43237287  1.23145711 -1.95311415 -3.26178479]

ประโยค 6: The central bank announced new interest rates to stabilize the economy.
Embedding สำหรับ 'bank' (first 5 dimensions): [-1.58153486 -0.9279