In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import faiss  # Import FAISS

# Load data
df = pd.read_csv('coursera_courses_detailed.csv')

# Preprocess skills
df['Skills'] = df['Skills'].astype(str).str.lower().str.split(', ')
df['Skills'] = df['Skills'].apply(lambda x: x if isinstance(x, list) else [])
df['Skills_String'] = df['Skills'].apply(lambda x: ' '.join(x) if x else '')

# TF-IDF Vectorizer
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df['Skills_String'])

# Convert sparse matrix to dense numpy array (FAISS requires dense vectors)
dense_vectors = tfidf_matrix.toarray().astype('float32')

# Build FAISS index
dimension = dense_vectors.shape[1]  # Dimension of vectors
index = faiss.IndexFlatIP(dimension)  # Inner Product (cosine similarity)
index.add(dense_vectors)  # Add vectors to index

def create_user_profile(user_skills):
    """Create a user profile vector based on their skills."""
    user_skills = [skill.lower().strip() for skill in user_skills]
    user_profile = tfidf.transform([' '.join(user_skills)]).toarray().astype('float32')
    return user_profile

def recommend_courses(user_skills, n_recommendations=10, rating_weight=0.3):
    """Recommend courses using FAISS for similarity search."""
    # Create user profile
    user_profile = create_user_profile(user_skills)

    # FAISS search (returns distances and indices)
    distances, indices = index.search(user_profile, n_recommendations)

    # Normalize distances to similarity scores (since FAISS returns IP/cosine similarity)
    cosine_sim = distances.flatten()

    # Normalize ratings to 0-1 scale
    max_rating = df['Rating'].max()
    normalized_ratings = df['Rating'].iloc[indices.flatten()] / max_rating

    # Combine scores (70% skill match, 30% rating)
    combined_scores = (1 - rating_weight) * cosine_sim + rating_weight * normalized_ratings.values

    # Get top recommendations
    top_indices = indices.flatten()[np.argsort(combined_scores)[::-1][:n_recommendations]]

    recommendations = df.iloc[top_indices][['Title', 'Provider', 'Skills', 'Rating', 'Course Link']]
    recommendations['Match_Score'] = combined_scores[np.argsort(combined_scores)[::-1][:n_recommendations]]

    return recommendations.sort_values('Match_Score', ascending=False)

ValueError: All ufuncs must have type `numpy.ufunc`. Received (<ufunc 'sph_legendre_p'>, <ufunc 'sph_legendre_p'>, <ufunc 'sph_legendre_p'>)

In [5]:
# Example user skills
user_skills = ['python programming', 'data analysis', 'machine learning']

# Get recommendations
recommendations = recommend_courses(user_skills)
print(recommendations)

                                                 Title            Provider  \
392                            Get Started with Python              Google   
6                                     Machine Learning  Multiple educators   
566                          Data Analysis with Python                 IBM   
604                             Tools for Data Science                 IBM   
10                                       Deep Learning     DeepLearning.AI   
633                       Machine Learning with Python                 IBM   
251                               IBM Machine Learning                 IBM   
63                                  IBM AI Engineering                 IBM   
531  Microsoft Azure Data Scientist Associate (DP-1...           Microsoft   
276          Python for Data Science, AI & Development                 IBM   

                                                Skills  Rating  \
392  [object oriented programming (oop), data analy...     4.8   
6    [uns