In [33]:
import ollama
import numpy as np
from Levenshtein import distance as levenshtein_distance
from scipy.spatial.distance import cosine

def generate_embedding(text):
    response = ollama.embeddings(model='snowflake-arctic-embed', prompt=text)
    return response['embedding']

def cosine_similarity(vec1, vec2):
    return 1 - cosine(vec1, vec2)

def compare_strings(string1, string2):
    # Calculate Levenshtein distance
    lev_distance = levenshtein_distance(string1, string2)
    
    # Generate embeddings
    embedding1 = generate_embedding(string1)
    embedding2 = generate_embedding(string2)
    
    # Calculate cosine similarity
    cos_sim = cosine_similarity(embedding1, embedding2)
    
    # Print results
    print("Second String: ", string2)
    #print(f"Levenshtein distance: {lev_distance}")
    print(f"Cosine similarity: {cos_sim:.4f}")
    print("\n")

# Example usage
string1 = "man"
string2 = "Ralph Wiggum: Eccentric, bewildered, innocent, Chief Wiggum’s son, Ralph, school, humorous, nonsensical, childlike, endearing."
string3 = "Kearney Zzyzwicz: Bully, bald, older appearance, Kearney, school, tough, adult responsibilities, rebellious, humorous."
string4 = "Comicbook Guy: Sarcastic, overweight, comic book store owner, pop culture expert, critical, Comic Book Guy, nerdy, knowledgeable, cynical."
string5 = "Jessica Lovejoy: Rebellious, Reverend’s daughter, Jessica, Bart, manipulative, mischievous, charming, complex."
string6 = "grapefruit"

compare_strings(string1, string2)
compare_strings(string1, string3)
compare_strings(string1, string4)
compare_strings(string1, string5)
compare_strings(string1, string6)


Second String:  Ralph Wiggum: Eccentric, bewildered, innocent, Chief Wiggum’s son, Ralph, school, humorous, nonsensical, childlike, endearing.
Cosine similarity: 0.6543


Second String:  Kearney Zzyzwicz: Bully, bald, older appearance, Kearney, school, tough, adult responsibilities, rebellious, humorous.
Cosine similarity: 0.7204


Second String:  Comicbook Guy: Sarcastic, overweight, comic book store owner, pop culture expert, critical, Comic Book Guy, nerdy, knowledgeable, cynical.
Cosine similarity: 0.6728


Second String:  Jessica Lovejoy: Rebellious, Reverend’s daughter, Jessica, Bart, manipulative, mischievous, charming, complex.
Cosine similarity: 0.6500


Second String:  grapefruit
Cosine similarity: 0.6632




In [34]:
string1 = "The quick brown fox jumps over the lazy dog."
string2 = "The quick brown fox jumps over the tired dog."
string3 = "The quick brown fox jumps over the industrious dog."
string4 = "The quick brown fox jumps over the Talinn dog."
string5 = "The quick brown fox jumps over the layzy dog."
string6 = "The quick brown fox jumps over the dog."


compare_strings(string1, string2)
compare_strings(string1, string3)
compare_strings(string1, string4)
compare_strings(string1, string5)
compare_strings(string1, string6)

Second String:  The quick brown fox jumps over the tired dog.
Cosine similarity: 0.9605


Second String:  The quick brown fox jumps over the industrious dog.
Cosine similarity: 0.9311


Second String:  The quick brown fox jumps over the Talinn dog.
Cosine similarity: 0.9191


Second String:  The quick brown fox jumps over the layzy dog.
Cosine similarity: 0.9473


Second String:  The quick brown fox jumps over the dog.
Cosine similarity: 0.9633




In [32]:
string1 = "The quick brown fox jumps over the lazy dog."
string2 = "The quick brown dog jumps over the lazy fox."
string3 = "The slow brown fox jumps over the lazy dog."
string4 = "The quick purple fox jumps over the lazy dog."
string5 = "The quick brown fox Toyota over the lazy dog."
string6 = "The quick brown fox jumps over teh lazy dog."


compare_strings(string1, string2)
compare_strings(string1, string3)
compare_strings(string1, string4)
compare_strings(string1, string5)
compare_strings(string1, string6)

Second String:  The quick brown dog jumps over the lazy fox.
Cosine similarity: 0.9878
Second String:  The slow brown fox jumps over the lazy dog.
Cosine similarity: 0.9819
Second String:  The quick purple fox jumps over the lazy dog.
Cosine similarity: 0.9768
Second String:  The quick brown fox Toyota over the lazy dog.
Cosine similarity: 0.9096
Second String:  The quick brown fox jumps over teh lazy dog.
Cosine similarity: 0.9775


In [28]:
string1 = "The quick brown fox jumps over the lazy dog."
string2 = "The quick brown fox"
string3 = "jumps over the lazy dog."

string4 = "The quick brown"
string5 = "fox jumps over"
string6 = "the lazy dog."


compare_strings(string1, string2)
compare_strings(string1, string3)
compare_strings(string1, string4)
compare_strings(string1, string5)
compare_strings(string1, string6)

Second String:  The quick brown fox
Cosine similarity: 0.8818
Second String:  jumps over the lazy dog.
Cosine similarity: 0.9306
Second String:  The quick brown
Cosine similarity: 0.8418
Second String:  fox jumps over
Cosine similarity: 0.8710
Second String:  the lazy dog.
Cosine similarity: 0.8685


In [31]:
string1 = "The quick brown fox jumps over the lazy dog."
string2 = "The quick brown fox"
string3 = "jumps over the lazy dog."

string4 = "The quick brown"
string5 = "fox jumps over"
string6 = "the lazy dog."

def sum_embeddings(string1, string2):
    emb1 = generate_embedding(string1)
    emb2 = generate_embedding(string2)
    
    return np.add(emb1, emb2)



sum1 = cosine_similarity(sum_embeddings(string2, string3), generate_embedding(string1))
print(f"Sum1: {sum1:.4f}")



compare_strings(string1, string2)
compare_strings(string1, string3)
compare_strings(string1, string4)
compare_strings(string1, string5)
compare_strings(string1, string6)

Sum1: 0.9518
Second String:  The quick brown fox
Cosine similarity: 0.8818
Second String:  jumps over the lazy dog.
Cosine similarity: 0.9306
Second String:  The quick brown
Cosine similarity: 0.8418
Second String:  fox jumps over
Cosine similarity: 0.8710
Second String:  the lazy dog.
Cosine similarity: 0.8685


In [7]:
#!pip install scipy gensim
# Using gensim for Word2Vec
from gensim.models import KeyedVectors
word_vectors = KeyedVectors.load_word2vec_format('./GoogleNews-vectors-negative300.bin', binary=True)

target = 'Prince'
target_vec = word_vectors[target]
print(len(target_vec))

while True:
    input = input("Enter a word: ")
    
    
    # Get vector for a word
    king_vec = word_vectors['king']

man_vec = word_vectors['man']

vector3 = king_vec - man_vec

#print(vector)
# Find similar words
similar_words = word_vectors.most_similar(vector3)
print(similar_words)

[ 0.07373047  0.00405884 -0.13574219  0.02209473  0.18066406 -0.04663086
  0.22460938 -0.22949219 -0.04003906  0.22558594 -0.12402344 -0.24316406
 -0.03662109 -0.28710938  0.07714844  0.22460938  0.26171875  0.19628906
 -0.15527344  0.08544922 -0.09570312  0.2890625   0.04467773 -0.13378906
  0.11767578 -0.16503906  0.04101562  0.078125    0.13183594 -0.29296875
 -0.04443359  0.12988281  0.27539062  0.23144531  0.16992188 -0.11425781
 -0.02587891 -0.14355469  0.07568359  0.32226562  0.28125     0.00085831
  0.10791016  0.11816406 -0.04589844 -0.1640625   0.10058594 -0.11767578
  0.03979492 -0.13183594  0.07373047  0.05786133 -0.05664062  0.21484375
 -0.06591797  0.12890625  0.00439453 -0.15234375  0.21386719 -0.00291443
 -0.03076172  0.15820312 -0.09423828 -0.2734375   0.29296875 -0.07421875
 -0.05200195 -0.06201172 -0.07958984  0.09375    -0.13085938 -0.05639648
  0.08105469  0.11914062 -0.09130859 -0.06396484  0.03149414  0.15527344
  0.02758789 -0.03759766 -0.18164062 -0.00891113 -0

In [20]:
# Get vector for a word
king_vec = word_vectors['Celtics']

man_vec = word_vectors['Boston']

woman_vec = word_vectors['Detroit']

vector3 = king_vec - man_vec + woman_vec

#print(vector)
# Find similar words
similar_words = word_vectors.most_similar(vector3)
print(similar_words)

[('Pistons', 0.8451752066612244), ('Celtics', 0.7604742646217346), ('Detroit_Pistons', 0.7438536882400513), ('Lakers', 0.6999762058258057), ('Pacers', 0.6926992535591125), ('Sixers', 0.6918555498123169), ('McDyess', 0.6811235547065735), ('Cavs', 0.6745649576187134), ('Rasheed_Wallace', 0.656218945980072), ('Nuggets', 0.6523663997650146)]
