In [5]:
import pandas as pd
import numpy as np
import random
from faker import Faker

In [8]:
fake = Faker()
num_users = 600

# define attachment style logic
# scoring range is 1.0 (low) to 7.0 (high) -- midpoint is roughly 4.0
def get_attachment_style(anxiety, avoidance):
    if anxiety < 4.0 and avoidance < 4.0:
        return "Secure"
    elif anxiety >= 4.0 and avoidance < 4.0:
        return "Anxious-Preoccupied"
    elif anxiety < 4.0 and avoidance >= 4.0:
        return "Dismissive-Avoidant"
    else:
        return "Fearful-Avoidant"

# generate dataset
users = []
genders = ['Male', 'Female']
interest_list = ['Hiking', 'Gaming', 'Travelling', 'Reading', 'Cooking', 'Music', 'Art', 'Movies', 'Swimming', 'Meditation', 'Coding', 'Drink', 'Shopping', 'Badminton', 'Fitness']

print(f"Generating {num_users} users...")

for i in range(num_users):
    uid = i + 1
    gender = random.choice(genders)
    age = random.randint(18,35)

    # use normal distribution to make ecr-rs realistic, centered around different means
    if random.random() > 0.5:
        # create a secure cluster
        anx = np.random.normal(2.5, 1.0)
        avo = np.random.normal(2.5, 1.0)
    else:
        # create Insecure clusters (higher variance)
        anx = np.random.normal(4.5, 1.5)
        avo = np.random.normal(4.5, 1.5)

    # clip scores to 1 - 7 range
    anx = np.clip(anx, 1.0, 7.0)
    avo = np.clip(avo, 1.0, 7.0)

    style = get_attachment_style(anx, avo)

    #preferences
    target_gender = 'Female' if gender == 'Male' else 'Male'
    user_interests = random.sample(interest_list, k=3) #picking 3 random interests

    users.append({
        'user_id': uid,
        'name': fake.first_name(),
        'gender': gender,
        'age': age,
        'target_gender': target_gender,
        'anxiety_score': round(anx, 2),
        'avoidance_score': round(avo, 2),
        'attachment_style': style,
        'interests': user_interests
    })


Generating 600 users...


In [9]:
df = pd.DataFrame(users)
df.head()

Unnamed: 0,user_id,name,gender,age,target_gender,anxiety_score,avoidance_score,attachment_style,interests
0,1,Matthew,Male,27,Female,4.41,3.54,Anxious-Preoccupied,"[Shopping, Drink, Movies]"
1,2,Hannah,Female,20,Male,1.0,1.0,Secure,"[Hiking, Swimming, Cooking]"
2,3,Shawn,Female,29,Male,2.74,1.98,Secure,"[Shopping, Fitness, Hiking]"
3,4,Kenneth,Male,23,Female,5.48,2.25,Anxious-Preoccupied,"[Reading, Gaming, Hiking]"
4,5,Tyler,Male,23,Female,2.44,3.74,Secure,"[Gaming, Drink, Cooking]"


In [10]:
df.tail()

Unnamed: 0,user_id,name,gender,age,target_gender,anxiety_score,avoidance_score,attachment_style,interests
595,596,Rebecca,Male,27,Female,4.46,6.55,Fearful-Avoidant,"[Movies, Coding, Travelling]"
596,597,Karina,Male,26,Female,2.28,3.26,Secure,"[Coding, Cooking, Swimming]"
597,598,Jessica,Female,33,Male,2.34,1.09,Secure,"[Gaming, Coding, Movies]"
598,599,Crystal,Female,22,Male,2.85,2.22,Secure,"[Drink, Music, Fitness]"
599,600,Anna,Male,21,Female,4.18,1.56,Anxious-Preoccupied,"[Art, Cooking, Hiking]"


In [12]:
df.describe()

Unnamed: 0,user_id,age,anxiety_score,avoidance_score
count,600.0,600.0,600.0,600.0
mean,300.5,26.366667,3.576967,3.5158
std,173.349358,5.114276,1.526139,1.560808
min,1.0,18.0,1.0,1.0
25%,150.75,22.0,2.44,2.285
50%,300.5,26.0,3.38,3.385
75%,450.25,31.0,4.7125,4.6025
max,600.0,35.0,7.0,7.0


In [16]:
# matching algorithm
def calculate_compatibility(user_a, user_b):
    # hard filters
    if user_a['target_gender'] != user_b['gender']:
        return 0
    if abs(user_a['age'] - user_b['age']) > 5:
        return 0

    # psychological compatibility (core logic)
    # we need tp minimize distance (lower distance = higher score)
    distance = np.sqrt((user_a['anxiety_score'] - user_b['anxiety_score'])**2 + (user_a['avoidance_score'] - user_b['avoidance_score'])**2)

    # attachment theory is tricky
    # secure (low, low) matches best with secure.
    # anxiety (high, low) actually attracts avoidant (low, high) -> high chemistry, but unstable
    # for a healthy match, we need to maintain high distance from secure(1,1)

    # Stability Score: how close is the pair to being secure?
    # average anxiety of the couple + average avoidance of the couple
    pair_anxiety = (user_a['anxiety_score'] + user_b['anxiety_score']) / 2
    pair_avoidance = (user_a['avoidance_score'] + user_b['avoidance_score']) / 2

    # get the lower value
    stability_penalty = (pair_avoidance + pair_anxiety) / 14.0 # max sum is 14
    psych_score = 1.0 - stability_penalty

    # Interest Overlap
    shared_interests = set(user_a['interests']) & set(user_b['interests'])
    interest_score = len(shared_interests) * 0.1

    # final score
    final_score = (psych_score * 0.8) + (interest_score * 0.2)
    return round(final_score * 100, 2)

In [19]:
current_user = df.iloc[348]
print(f"\nFinding matches for: {current_user['name']} ({current_user['attachment_style']})")
print(f"Stats: Anx: {current_user['anxiety_score']}, Avo: {current_user['avoidance_score']}")
print(f"Interests: {current_user['interests']}\n")

matches = []

for index, potential_match in df.iterrows():
    if current_user['user_id'] == potential_match['user_id']:
        continue

    score = calculate_compatibility(current_user, potential_match)

    if score > 0: # Only keep valid matches
        matches.append((potential_match, score))

# Sort by highest score
matches.sort(key=lambda x: x[1], reverse=True)

# Display Top 5 Matches
print(f"{'Name':<10} | {'Style':<20} | {'Score':<5} | {'Interests'}")
print("-" * 60)
for match in matches[:5]:
    m_profile = match[0]
    m_score = match[1]
    print(f"{m_profile['name']:<10} | {m_profile['attachment_style']:<20} | {m_score}%  | {m_profile['interests']}")


Finding matches for: Jacqueline (Dismissive-Avoidant)
Stats: Anx: 2.81, Avo: 4.32
Interests: ['Music', 'Art', 'Badminton']

Name       | Style                | Score | Interests
------------------------------------------------------------
Melissa    | Secure               | 55.91%  | ['Music', 'Drink', 'Swimming']
Megan      | Secure               | 55.09%  | ['Shopping', 'Badminton', 'Gaming']
Pamela     | Secure               | 54.23%  | ['Swimming', 'Music', 'Drink']
Victoria   | Secure               | 53.17%  | ['Art', 'Fitness', 'Coding']
John       | Secure               | 53.11%  | ['Art', 'Music', 'Coding']
