#### Course Recommendation System using Udemy Dataset

#### Algo
+ Cosine Similarity
+ Linear Similarity


#### Workflow
+ Dataset
+ Vectorized our dataset
+ Cosine Similarity Matrix
+ ID,Score
+ Recommend


In [25]:
pip install neattext

In [26]:
# Load EDA Pkgs
import pandas as pd
import neattext.functions as nfx

In [27]:
# Load ML/Rc Pkgs
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity,linear_kernel

In [28]:
# Load our dataset
df = pd.read_csv("../input/udemy-courses/udemy_courses.csv")

In [29]:
df.head()

In [30]:
df['course_title']

In [31]:
dir(nfx)

In [32]:
# Clean Text:stopwords,special charac
df['clean_course_title'] = df['course_title'].apply(nfx.remove_stopwords)

In [33]:
# Clean Text:stopwords,special charac
df['clean_course_title'] = df['clean_course_title'].apply(nfx.remove_special_characters)

In [34]:
df[['course_title','clean_course_title']]

In [62]:
# Vectorize our Text
count_vect = CountVectorizer()
cv_mat = count_vect.fit_transform(df['clean_course_title'])

# Sparse
cv_mat

In [37]:
# Dense
cv_mat.todense()

In [38]:
df_cv_words = pd.DataFrame(cv_mat.todense(),columns=count_vect.get_feature_names())

In [39]:
df_cv_words.head()

In [40]:
# Cosine Similarity Matrix
cosine_sim_mat = cosine_similarity(cv_mat)

In [41]:
cosine_sim_mat

In [63]:
import seaborn as sns
sns.heatmap(cosine_sim_mat[0:10],annot=True)

In [43]:
df.head()

In [44]:
# Get Course ID/Index 
# 코스이름을 인덱스로
course_indices = pd.Series(df.index,index=df['course_title']).drop_duplicates()

In [45]:
course_indices

In [46]:
course_indices['How To Maximize Your Profits Trading Options']

In [47]:
idx = course_indices['How To Maximize Your Profits Trading Options']

In [48]:
idx

In [49]:
scores = list(enumerate(cosine_sim_mat[idx]))

In [50]:
scores

In [51]:
# Sort our scores per cosine score
sorted_scores = sorted(scores,key=lambda x:x[1],reverse=True)

In [52]:
# Omit the First Value/itself
sorted_scores[1:]

In [53]:
# Selected Courses Indices
selected_course_indices = [i[0] for i in sorted_scores[1:]]

In [54]:
selected_course_indices

In [55]:
# Selected Courses Scores
selected_course_scores = [i[1] for i in sorted_scores[1:]]

In [56]:
recommended_result = df['course_title'].iloc[selected_course_indices]

In [57]:
rec_df = pd.DataFrame(recommended_result)

In [58]:
rec_df.head()

In [59]:
rec_df['similarity_scores'] = selected_course_scores

In [60]:
rec_df

In [66]:
def recommend_course(title,num_of_rec=10):
    # ID for title
    idx = course_indices[title]
    # Course Indice
    # Search inside cosine_sim_mat
    scores = list(enumerate(cosine_sim_mat[idx]))
    # Scores
    # Sort Scores
    sorted_scores = sorted(scores,key=lambda x:x[1],reverse=True)
    # Recomm
    selected_course_indices = [i[0] for i in sorted_scores[1:]]
    selected_course_scores = [i[1] for i in sorted_scores[1:]]
    result = df['course_title'].iloc[selected_course_indices]
    rec_df = pd.DataFrame(result)
    rec_df['similarity_scores'] = selected_course_scores
    return rec_df.head(num_of_rec)
    

In [67]:
recommend_course('Trading Options Basics',20)

In [69]:
df.to_csv("udemy_courses_clean.csv")