# Friend Recommendation System
Main Notebook for Training and Testing Models

In [1]:

import os
import pandas as pd
from utils.data_loader import load_features, load_edges
from utils.preprocess import normalize_features
from models.cosine_similarity import recommend_friends
from models.knn_model import train_knn, recommend_knn
from models.kmeans_model import train_kmeans, predict_cluster
from models.matrix_factorization_model import build_interaction_dataframe, train_svd
from models.hybrid_model import combine_scores
import joblib


## Load Data

In [2]:

features = load_features('data/facebook/features.txt')
edges_graph = load_edges('data/facebook/edges.txt')
print(f"Loaded {features.shape[0]} users with {features.shape[1]} features.")
print(f"Loaded {edges_graph.number_of_nodes()} nodes and {edges_graph.number_of_edges()} edges in friendship graph.")


Loaded 10 users with 576 features.
Loaded 3959 nodes and 84243 edges in friendship graph.


## Preprocess Data

In [3]:

features = features.fillna(0)
features_normalized = normalize_features(features)
print(features_normalized.head())


         1    2    3    4    5    6    7    8    9    10   ...  567  568  569  \
user_id                                                    ...                  
0        0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0  ...  0.0  0.0  0.0   
107      0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0  ...  0.0  0.0  0.0   
1684     0.0  0.0  0.0  0.0  0.0  1.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
1912     0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0  ...  0.0  0.0  0.0   
3437     0.0  0.0  0.0  0.0  1.0  0.0  0.0  0.0  0.0  1.0  ...  0.0  0.0  0.0   

         570  571  572  573  574  575  576  
user_id                                     
0        0.0  0.0  0.0  0.0  0.0  0.0  0.0  
107      0.0  0.0  0.0  0.0  0.0  0.0  0.0  
1684     0.0  0.0  0.0  0.0  0.0  0.0  0.0  
1912     0.0  0.0  0.0  0.0  0.0  0.0  0.0  
3437     0.0  0.0  0.0  0.0  0.0  0.0  0.0  

[5 rows x 576 columns]


## Train Models and Save

### Cosine Similarity (No Training Needed)

### KNN Model

In [4]:

knn_model = train_knn(features_normalized, n_neighbors=5)
os.makedirs('models/saved_models', exist_ok=True)
joblib.dump(knn_model, 'models/saved_models/knn_model.pkl')
print("KNN model saved.")


KNN model saved.


### KMeans Clustering Model

In [5]:

kmeans_model = train_kmeans(features_normalized, n_clusters=10)


KMeans model saved to models/saved_models/kmeans_model.pkl


### Matrix Factorization (SVD Model)

In [6]:

interactions_df = build_interaction_dataframe(edges_graph)
svd_model = train_svd(interactions_df)


SVD model saved to models/saved_models/svd_model.pkl


## Recommend Friends (Testing Models)

In [7]:

valid_user_ids = features_normalized.index.tolist()
selected_user_id = valid_user_ids[0]

# Cosine Recommendations
cosine_recommendations = recommend_friends(selected_user_id, features_normalized, top_k=5)
print("Cosine Recommendations:", cosine_recommendations)

# KNN Recommendations
knn_recommendations = recommend_knn(selected_user_id, features_normalized, knn_model, top_k=5)
print("KNN Recommendations:", knn_recommendations)

# KMeans Cluster Prediction
cluster = predict_cluster(selected_user_id, features_normalized, kmeans_model)
print(f"User {selected_user_id} belongs to cluster {cluster}")

# (For KMeans, we could later recommend users from the same cluster)

# SVD model will be used in hybrid


Cosine Recommendations: [(3437, 0.1522773975253762), (686, 0.1217161238900369), (107, 0.05504818825631803), (348, 0.03984095364447979), (1912, 0.03390317518104052)]
KNN Recommendations: [(3437, 0.15227739752537617), (686, 0.12171612389003694), (107, 0.055048188256318076), (348, 0.039840953644479815), (1912, 0.03390317518104047)]
User 0 belongs to cluster 2


## Hybrid Model Combination

In [8]:

# Prepare dummy scores
cosine_scores = dict(cosine_recommendations)
knn_scores = dict(knn_recommendations)
kmeans_scores = {uid: 0.8 for uid, _ in cosine_recommendations}  # Dummy fixed score
svd_scores = {uid: 0.75 for uid, _ in cosine_recommendations}    # Dummy fixed score

# Combine
final_recommendations = combine_scores(cosine_scores, knn_scores, kmeans_scores, svd_scores, top_k=5)
print("Final Hybrid Recommendations:", final_recommendations)


Final Hybrid Recommendations: [(3437, 0.4611386987626881), (686, 0.4458580619450185), (107, 0.41252409412815905), (348, 0.4049204768222399), (1912, 0.40195158759052024)]
