In [1]:
import numpy as np

In [2]:
# Define dummy data for four users
users = [
    {
        "id": 1,
        "name": "Female User 1",
        "gender": "female",
        "age": 30,
        "location": "New York",
        "profession": "Entrepreneur",
        "interests": ["starting a business", "traveling", "reading"],
        "personal_statement": "I wanna create a business with my husband"
    },
    {
        "id": 2,
        "name": "Male User",
        "gender": "male",
        "age": 32,
        "location": "New York",
        "profession": "Software Engineer",
        "interests": ["technology", "starting a business", "hiking"],
        "personal_statement": "Looking for a girl who might be interested in starting a business with me"
    },
    {
        "id": 3,
        "name": "Female User 2",
        "gender": "female",
        "age": 28,
        "location": "Los Angeles",
        "profession": "Designer",
        "interests": ["art", "music", "cooking"],
        "personal_statement": "I love creativity and art, looking for someone who shares the same passion."
    },
    {
        "id": 4,
        "name": "Female User 3",
        "gender": "female",
        "age": 26,
        "location": "Chicago",
        "profession": "Marketing Manager",
        "interests": ["marketing", "business strategy", "reading"],
        "personal_statement": "Interested in business and marketing, looking for a like-minded partner."
    }
]

In [3]:
def calculate_similarity(user1, user2):
    # Convert interests to sets for easier comparison
    interests1 = set(user1["interests"])
    interests2 = set(user2["interests"])

    # Calculate Jaccard similarity for interests
    intersection = interests1.intersection(interests2)
    union = interests1.union(interests2)
    jaccard_similarity = len(intersection) / len(union) if union else 0

    # NLP-based similarity for personal statements (simple keyword match here)
    statement1_keywords = set(user1["personal_statement"].lower().split())
    statement2_keywords = set(user2["personal_statement"].lower().split())
    statement_intersection = statement1_keywords.intersection(statement2_keywords)
    statement_union = statement1_keywords.union(statement2_keywords)
    statement_similarity = len(statement_intersection) / len(statement_union) if statement_union else 0

    # Combine similarities (give more weight to personal statements)
    total_similarity = 0.3 * jaccard_similarity + 0.7 * statement_similarity
    return total_similarity

In [4]:
def find_best_matches(user, users, min_similarity_threshold=0.1):
    matches = []

    for candidate in users:
        if candidate["id"] != user["id"] and candidate["gender"] != user["gender"]:
            similarity = calculate_similarity(user, candidate)
            print(f"Calculated similarity between {user['name']} and {candidate['name']} is {similarity:.2f}")
            if similarity >= min_similarity_threshold:
                matches.append((candidate, similarity))

    # Sort matches by similarity score in descending order
    matches.sort(key=lambda x: x[1], reverse=True)

    return matches

In [5]:
# Example usage
user_to_match = users[1]  # Male User
best_matches = find_best_matches(user_to_match, users, min_similarity_threshold=0.1)

print(f"Best matches for {user_to_match['name']}:")
for match, similarity in best_matches:
    print(f"{match['name']} with similarity score {similarity:.2f}")

Calculated similarity between Male User and Female User 1 is 0.18
Calculated similarity between Male User and Female User 2 is 0.09
Calculated similarity between Male User and Female User 3 is 0.25
Best matches for Male User:
Female User 3 with similarity score 0.25
Female User 1 with similarity score 0.18
