In [11]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer

class CosineSimilarityChecker:
    def __init__(self):
        self.vectorizer = CountVectorizer()
        self.string_list = []
        self.allowed_similarity_threshold: float = 0.7

    def add_string(self, string: str) -> None:
        # Check if the string is too similar to any existing strings
        similarities = [self.cosine_similarity(string, existing_string) for existing_string in self.string_list]
        if any(similarity >= self.allowed_similarity_threshold for similarity in similarities):
            print("String is not allowed due to high similarity.")
        else:
            self.string_list.append(string)
            print("String added successfully.")
        # Output cosine similarity for each input string
        for existing_string, similarity in zip(self.string_list, similarities):
            print(f"Cosine similarity between '{string}' and '{existing_string}': {similarity:.4f}")

    def cosine_similarity(self, string1: str, string2: str) -> float:
        vectors = self.vectorizer.fit_transform([string1, string2]).toarray()
        vector1 = vectors[0]
        vector2 = vectors[1]
        dot_product = np.dot(vector1, vector2)
        norm_vector1 = np.linalg.norm(vector1)
        norm_vector2 = np.linalg.norm(vector2)
        similarity = dot_product / (norm_vector1 * norm_vector2)
        return similarity

# Example usage
checker = CosineSimilarityChecker()

# Example data
existing_strings = [
    "I love pizza",
    "I enjoy playing soccer",
    "The sun is shining today",
    "Programming is fun"
]
checker.string_list = existing_strings

# Add new strings
new_string1: str = "I like eating pizza"
checker.add_string(new_string1)

new_string2: str = "I enjoy playing basketball"
checker.add_string(new_string2)

new_string3: str = "The weather is nice today"
checker.add_string(new_string3)

new_string4: str = "I love coding"
checker.add_string(new_string4)

new_string5: str = "I love coal"
checker.add_string(new_string5)

new_string6: str = "I love coal"
checker.add_string(new_string6)

new_string7: str = "I love goals"
checker.add_string(new_string7)

new_string8: str = "I love goals"
checker.add_string(new_string8)

String added successfully.
Cosine similarity between 'I like eating pizza' and 'I love pizza': 0.4082
Cosine similarity between 'I like eating pizza' and 'I enjoy playing soccer': 0.0000
Cosine similarity between 'I like eating pizza' and 'The sun is shining today': 0.0000
Cosine similarity between 'I like eating pizza' and 'Programming is fun': 0.0000
String added successfully.
Cosine similarity between 'I enjoy playing basketball' and 'I love pizza': 0.0000
Cosine similarity between 'I enjoy playing basketball' and 'I enjoy playing soccer': 0.6667
Cosine similarity between 'I enjoy playing basketball' and 'The sun is shining today': 0.0000
Cosine similarity between 'I enjoy playing basketball' and 'Programming is fun': 0.0000
Cosine similarity between 'I enjoy playing basketball' and 'I like eating pizza': 0.0000
String added successfully.
Cosine similarity between 'The weather is nice today' and 'I love pizza': 0.0000
Cosine similarity between 'The weather is nice today' and 'I enjo

In [3]:
# Example data
existing_strings = [
    "I love pizza",
    "I enjoy playing soccer",
    "The sun is shining today",
    "Programming is fun",
    "I like to travel",
    "Music is my passion",
    "I prefer reading books",
    "I am a movie enthusiast"
]
checker.string_list = existing_strings

# Add new strings with high similarity
new_string1: str = "I really enjoy eating pizza"
checker.add_string(new_string1)

new_string2: str = "I love playing soccer"
checker.add_string(new_string2)

# Add new strings with low similarity
new_string3: str = "The rain is pouring heavily today"
checker.add_string(new_string3)

new_string4: str = "I dislike sports"
checker.add_string(new_string4)


String added successfully.
Cosine similarity between 'I really enjoy eating pizza' and 'I love pizza': 0.3536
Cosine similarity between 'I really enjoy eating pizza' and 'I enjoy playing soccer': 0.2887
Cosine similarity between 'I really enjoy eating pizza' and 'The sun is shining today': 0.0000
Cosine similarity between 'I really enjoy eating pizza' and 'Programming is fun': 0.0000
Cosine similarity between 'I really enjoy eating pizza' and 'I like to travel': 0.0000
Cosine similarity between 'I really enjoy eating pizza' and 'Music is my passion': 0.0000
Cosine similarity between 'I really enjoy eating pizza' and 'I prefer reading books': 0.0000
Cosine similarity between 'I really enjoy eating pizza' and 'I am a movie enthusiast': 0.0000
String added successfully.
Cosine similarity between 'I love playing soccer' and 'I love pizza': 0.4082
Cosine similarity between 'I love playing soccer' and 'I enjoy playing soccer': 0.6667
Cosine similarity between 'I love playing soccer' and 'The