In [15]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [21]:
# Step 1: Load and Preprocess the Dataset

courses_df = pd.read_csv("C:\\Users\\chvas\\OneDrive\\Desktop\\udemy_course_data.csv")


In [18]:
# Filling missing values (if any)
courses_df.fillna('', inplace=True)

In [19]:
# Convert price and subscribers to numeric (for recommendation purposes)
courses_df['price'] = pd.to_numeric(courses_df['price'], errors='coerce')
courses_df['num_subscribers'] = pd.to_numeric(courses_df['num_subscribers'], errors='coerce')


In [20]:
# Step 2: Collaborative Filtering using Pandas
# We will create a simple collaborative filtering system based on 'num_reviews' (as a proxy for user ratings)

# Create a pivot table where rows are 'course_title' and columns are 'user_id' (simulated)
# We'll fill missing values with 0 (no review or rating)
ratings_df = courses_df.pivot_table(index='course_title', values='num_reviews', aggfunc='mean').fillna(0)


In [22]:
#Calculate similarity between courses based on reviews
item_similarity = cosine_similarity(ratings_df)
item_similarity_df = pd.DataFrame(item_similarity, index=ratings_df.index, columns=ratings_df.index)


In [23]:
# Function to recommend courses based on item similarity
def recommend_courses_collaborative(course_title, item_similarity_df=item_similarity_df):
    # Get the similarity scores for the course
    sim_scores = item_similarity_df[course_title].sort_values(ascending=False)
    
    # Return the top 5 most similar courses (excluding the course itself)
    return sim_scores[1:6]


In [24]:

# Example: Recommend similar courses to "Ultimate Investment Banking Course"
collaborative_recommendations = recommend_courses_collaborative("Ultimate Investment Banking Course")
print("Collaborative filtering recommendations:")
print(collaborative_recommendations)


Collaborative filtering recommendations:
course_title
#1 Piano Hand Coordination: Play 10th Ballad in Eb Key songs    1.0
#10 Hand Coordination - Transfer Chord Ballad 9 - C & Bb Key    1.0
#12 Hand Coordination: Let your Hands dance with Ballad 9       1.0
株式投資に向く性格に変えるための心理学を学ぶ                                          1.0
株式投資で本当のファンダメンタル分析ができるようになる                                     1.0
Name: Ultimate Investment Banking Course, dtype: float64


In [25]:
# Step 3: Content-Based Filtering using TF-IDF and Cosine Similarity
# Combine relevant features for content-based recommendation
courses_df['features'] = courses_df['course_title'] + ' ' + courses_df['subject'] + ' ' + courses_df['level']



In [26]:
#Use TF-IDF to analyze text data (course details)
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(courses_df['features'])

# Compute cosine similarity between courses
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [None]:

# Function to recommend similar courses based on a given course
def recommend_courses_content_based(course_title, cosine_sim=cosine_sim, courses_df=courses_df):
    # Get the index of the course that matches the title
    idx = courses_df.index[courses_df['course_title'] == course_title][0]
    

In [None]:
# Get similarity scores for all courses
sim_scores = list(enumerate(cosine_sim[idx]))

In [None]:
# Sort the courses based on similarity scores (descending)
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    

In [None]:
 # Get the indices of the top 5 most similar courses
sim_indices = [i[0] for i in sim_scores[1:6]]
    

In [30]:
# Get the index of the course that matches the title
idx = courses_df.index[courses_df['course_title'] == course_title][0]

# Get similarity scores for all courses
sim_scores = list(enumerate(cosine_sim[idx]))

# Sort the courses based on similarity scores (descending)
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

# Get the indices of the top 5 most similar courses (excluding the first one, which is the course itself)
sim_indices = [i[0] for i in sim_scores[1:6]]


In [32]:
# Display the top 5 most similar courses
print(f"Courses similar to '{course_title}':")
for idx in sim_indices:
    print(courses_df['course_title'].iloc[idx])

Courses similar to 'Ultimate Investment Banking Course':
The Complete Investment Banking Course 2017
Advanced Accounting for Investment Banking
The Investment Banking Recruitment Series
Business Banking 101
Intro to Investment Banking, M&A, IPO, Modeling + Free Book
