In [1]:
# 📌 Hybrid Recommendation System

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# Load cleaned dataset (make sure df is preprocessed already)
df = pd.read_csv("../data/cleaned_github_users.csv")

# ----------------------
# Step 1: Content-Based Similarity (Languages + Repositories)
# ----------------------
df['combined_features'] = df['Languages'].fillna('') + ' ' + df['Repositories'].fillna('')
tfidf = TfidfVectorizer(stop_words='english')
content_matrix = tfidf.fit_transform(df['combined_features'])
content_similarity = cosine_similarity(content_matrix)

# ----------------------
# Step 2: Collaborative Filtering Similarity (Interactions)
# ----------------------
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
interaction_matrix = scaler.fit_transform(df[['Followers', 'Following', 'Public_Repos']])
collaborative_similarity = cosine_similarity(interaction_matrix)

# ----------------------
# Step 3: NLP Bio Similarity (Optional)
# ----------------------
bio_df = df[df['Bio'].notna() & (df['Bio'] != 'No Bio Provided')].copy()
bio_vectorizer = TfidfVectorizer(stop_words='english')
bio_matrix = bio_vectorizer.fit_transform(bio_df['Bio'])
bio_similarity = cosine_similarity(bio_matrix)

# Map usernames to indexes for all similarity matrices
username_to_idx = {user: idx for idx, user in enumerate(df['Username'])}
bio_user_to_idx = {user: idx for idx, user in enumerate(bio_df['Username'])}

# ----------------------
# Step 4: Hybrid Recommendation Function
# ----------------------
def recommend_hybrid_partners(username, top_k=5, weights=(0.4, 0.4, 0.2)):
    if username not in df['Username'].values:
        print("❌ Username not found.")
        return pd.DataFrame()

    idx = username_to_idx[username]
    content_scores = content_similarity[idx]
    collab_scores = collaborative_similarity[idx]

    # Default bio scores = 0
    bio_scores = np.zeros_like(content_scores)
    if username in bio_user_to_idx:
        bio_idx = bio_user_to_idx[username]
        for i, user in enumerate(df['Username']):
            if user in bio_user_to_idx:
                bio_scores[i] = bio_similarity[bio_idx][bio_user_to_idx[user]]

    # Weighted hybrid score
    final_score = (weights[0] * content_scores +
                   weights[1] * collab_scores +
                   weights[2] * bio_scores)

    # Rank and exclude self
    top_indices = final_score.argsort()[::-1]
    top_indices = [i for i in top_indices if i != idx][:top_k]

    recommendations = df.iloc[top_indices].copy()✅ Recommended Fix: Load Score Files (if saved previously)
    recommendations['hybrid_score'] = final_score[top_indices]
    return recommendations[['Username', 'Languages', 'Repositories', 'Location', 'Profile_URL', 'hybrid_score']]

# ----------------------
# Step 5: Example Usage
# ----------------------
example_user = "abhay"
hybrid_recs = recommend_hybrid_partners(example_user)
print("\n🔍 Hybrid Recommendations:")
print(hybrid_recs)



🔍 Hybrid Recommendations:
      Username                                          Languages  \
44       abhay  JavaScript, C, Jupyter Notebook, Ruby, Shell, ...   
1181    snusnu                            JavaScript, Shell, Ruby   
3864  nickolai                                       PHP, C, Ruby   
1563  bharendt      Erlang, ActionScript, JavaScript, Objective-C   
3704  mfoemmel                                 Java, Erlang, Ruby   

                                           Repositories  \
44    abhay.github.io, addressable, app-helium, app-...   
1181  adamantium, alfred, anima, axiom-do-adapter, a...   
3864  clanstat, cv, fts_zh, nanorc, nickolai.github....   
1563  AFNetworking, cowboy, edbi, elixir-tmbundle, e...   
3704        erlang-otp, fig, hubbard, lljvm, webmachine   

                                 Location                  Profile_URL  \
44                      San Francisco, CA     https://github.com/abhay   
1181                         Linz/Austria    https://gi