<a href="https://colab.research.google.com/github/Walker31/Connect/blob/main/datingApp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import files

uploaded = files.upload()


Saving users.csv to users.csv
Saving interactions.csv to interactions.csv


In [3]:
import pandas as pd

users_df = pd.read_csv('users.csv')
interactions_df = pd.read_csv('interactions.csv')
print('Users Table:')
print(users_df.head())
print('\nInteractions Table:')
print(interactions_df.head())

Users Table:
   user_id  age  gender  location                         interests  \
0        1   56    Male   Chicago      Photography, Cooking, Gaming   
1        2   46    Male   Phoenix      Reading, Movies, Sports, Art   
2        3   32  Female  New York  Fitness, Gaming, Cooking, Travel   
3        4   60    Male   Phoenix     Fitness, Art, Reading, Hiking   
4        5   25  Female   Phoenix      Photography, Sports, Reading   

  preferred_gender  preferred_age_min  preferred_age_max  
0           Female                 19                 24  
1             Male                 22                 29  
2              Any                 21                 36  
3             Male                 29                 39  
4              Any                 24                 31  

Interactions Table:
   user_id  other_user_id interaction   timestamp
0      328             58     dislike  2025-05-18
1       13            380        like  2025-01-11
2      141            126        li

In [4]:
from scipy.sparse import csr_matrix

interactions_df['score'] = (interactions_df['interaction']=='like').astype(int)

pivot = interactions_df.pivot_table(
    index = 'user_id',
    columns = 'other_user_id',
    values = 'score',
    fill_value=0
)

user_ids = pivot.index.to_list()
item_ids = pivot.columns.to_list()
user_item_sparse = csr_matrix(pivot.values)

print("User-Item matrix shape: ", user_item_sparse.shape)



User-Item matrix shape:  (500, 500)


In [6]:
pivot.shape == (500,500)

True

In [9]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
import scipy.sparse as sp

df = users_df.copy()
vectorizer = CountVectorizer(token_pattern = r"(?u)\b\w+\b", lowercase = True)
interest_matrix = vectorizer.fit_transform(df['interests'])

print("Interest-term vocabulary: ", vectorizer.get_feature_names_out())
print("Interest-term matrix: \n", interest_matrix.shape)

ohe = OneHotEncoder(sparse_output = True, drop = None)
cat_features = ohe.fit_transform(df[['gender','location','preferred_gender']])

print('One-hot cat matrix shape:' , cat_features.shape)

scaler = MinMaxScaler()
age_cols = scaler.fit_transform(df[['age', 'preferred_age_min', 'preferred_age_max']])

age_sparse = sp.csr_matrix(age_cols)

user_profile_features = sp.hstack([ interest_matrix, cat_features, age_sparse],format = 'csr')
print("Final profile shape(users x features): ", user_profile_features.shape)

Interest-term vocabulary:  ['art' 'cooking' 'dancing' 'fitness' 'gaming' 'hiking' 'movies' 'music'
 'photography' 'reading' 'sports' 'travel']
Interest-term matrix: 
 (500, 12)
One-hot cat matrix shape: (500, 12)
Final profile shape(users x features):  (500, 27)


In [13]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

sim_cf = cosine_similarity(user_item_sparse)
print("CF similarity matrix shape:", sim_cf.shape)

np.fill_diagonal(sim_cf, 0.0)

CF similarity matrix shape: (500, 500)


In [14]:

sim_cb = cosine_similarity(user_profile_features)
print("Content-based similarity matrix shape:", sim_cb.shape)

np.fill_diagonal(sim_cb, 0.0)


Content-based similarity matrix shape: (500, 500)


In [16]:
alpha = 0.5
sim_hybrid = alpha * sim_cf + (1-alpha) * sim_cb
print("Hybrid similarity matrix shape:", sim_hybrid.shape)

Hybrid similarity matrix shape: (500, 500)


In [20]:
user_ids = users_df['user_id'].to_list()
id_to_idx = {uid: idx for idx, uid in enumerate(user_ids)}
idx_to_id = {idx: uid for uid,idx in id_to_idx.items()}

from collections import defaultdict

already_seen = defaultdict(set)
for row in interactions_df.itertuples():
  u = row.user_id
  v = row.other_user_id
  already_seen[u].add(v)

demo_df = users_df.set_index('user_id')

In [21]:
def is_compatible(u_id, v_id, demo_df):
    u = demo_df.loc[u_id]
    v = demo_df.loc[v_id]

    if u['preferred_gender'] != 'Any' and v['gender'] != u['preferred_gender']:
        return False

    if v['preferred_gender'] != 'Any' and u['gender'] != v['preferred_gender']:
        return False

    if not (u['preferred_age_min'] <= v['age'] <= u['preferred_age_max']):
        return False

    if not (v['preferred_age_min'] <= u['age'] <= v['preferred_age_max']):
        return False

    return True


In [29]:
import numpy as np

def recommend_for_user(u_id, sim_matrix, users_df, interactions_df, N=10, alpha=0.5):

    all_user_ids = users_df['user_id'].tolist()
    u_idx = id_to_idx[u_id]


    candidates = []
    for v_id in all_user_ids:
        if v_id == u_id:
            continue

        if v_id in already_seen[u_id]:
            continue
        if not is_compatible(u_id, v_id, demo_df):
            continue
        candidates.append(v_id)


    scores = []
    for v_id in candidates:
        v_idx = id_to_idx[v_id]
        score = sim_matrix[u_idx, v_idx]
        scores.append((v_id, score))


    scores_sorted = sorted(scores, key=lambda x: x[1], reverse=True)


    topN = [v for v, s in scores_sorted[:N]]
    return topN

top_10_for_user_5 = recommend_for_user(
    u_id=5,
    sim_matrix=sim_hybrid,
    users_df=users_df,
    interactions_df=interactions_df,
    N=10
)
print("Top 10 recommendations for user 1:", top_10_for_user_5)



Top 10 recommendations for user 1: [10, 191, 46, 148, 400, 26, 106, 395, 288, 320]
