<a href="https://colab.research.google.com/github/Vasantha-Meghana/Predictive_Analytics_Projects/blob/Recommending_Courses_to_Users/predictive10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

df = pd.read_csv("online_course_recommendation.csv")  # Replace with your CSV file

# Ensure essential columns exist
required_columns = ['user_id', 'course_id', 'description', 'category', 'tags']
for col in required_columns:
    if col not in df.columns:
        raise ValueError(f"Missing required column: {col}")

df.dropna(subset=['course_id', 'description', 'tags'], inplace=True)

df['content'] = df['description'].astype(str) + ' ' + df['tags'].astype(str)

# TF-IDF vectorization
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['content'])

# Cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

def get_index_from_course_id(course_id):
    matches = df[df['course_id'] == course_id]
    if matches.empty:
        return None
    return matches.index[0]

def recommend_courses(course_id, top_n=5, focus_area=None):
    idx = get_index_from_course_id(course_id)
    if idx is None:
        return f"❌ Course ID '{course_id}' not found in the dataset."

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    sim_scores = [s for s in sim_scores if s[0] != idx]

    if focus_area:
        filtered_scores = []
        for i, score in sim_scores:
            tags = df.iloc[i]['tags']
            if focus_area.lower() in tags.lower():
                filtered_scores.append((i, score))
    else:
        filtered_scores = sim_scores

    top_indices = [i for i, _ in filtered_scores[:top_n]]
    recommended_courses = df.iloc[top_indices][['course_id', 'tags']]

    return recommended_courses.reset_index(drop=True)

# Example usage
sample_course_id = 'course_8'
focus_area = 'machine learning'

print(f"\n📘 Recommendations similar to '{sample_course_id}' in focus area '{focus_area}':\n")
recommendations = recommend_courses(course_id=sample_course_id, top_n=5, focus_area=focus_area)
print(recommendations.to_string(index=False))


📘 Recommendations similar to 'course_8' in focus area 'machine learning':

course_id                                        tags
 course_5 business, analytics, machine learning, java
course_38           machine learning, java, analytics
course_45           java, analytics, machine learning
course_46 java, strategy, machine learning, analytics
course_14           machine learning, data, analytics
