## Course Similarity Inspector

This document is used to inspect which courses have been flagged as being similar to each other in the Orange and Udacity 
course catelogues.

In [None]:
import pandas as pd
import numpy as np
from ipywidgets import interact
import numpy as np
from ipywidgets import widgets
from IPython.display import *

In [None]:
course_query = %sql SELECT * FROM course_description_catalog;
udacity_course_query = %sql SELECT * FROM udacity_course_list;
course_similarity_query = %sql SELECT * FROM course_similarity_table;
udacity_similarity_query = %sql SELECT * FROM udacity_course_similarity_table;

In [None]:
df_courses = course_query.DataFrame()
df_udacity_courses = udacity_course_query.DataFrame()
df_course_similarity = course_similarity_query.DataFrame()
df_udacity_similarity = udacity_similarity_query.DataFrame()

In [None]:
def GetSimilarCourses(row, df, return_limit = None):
    course_id = row['course#']
    ii = np.where(df['course_number_1'] == course_id)[0]
    jj = np.where(df['course_number_2'] == course_id)[0]
    similar_course_1_id = df['course_number_2'].values[ii]
    similar_scores_1 = df['similiarity'].values[ii]
    similar_course_2_id = df['course_number_1'].values[jj]
    similar_scores_2 = df['similiarity'].values[jj]
    df_result = pd.DataFrame({'course_id': np.append(similar_course_1_id, similar_course_2_id), 
                             'similarity': np.append(similar_scores_1, similar_scores_2)})
    df_result.sort_values('similarity', ascending=False, inplace=True)
    if return_limit is not None:
        df_result = df_result[0:return_limit].copy()
    return(df_result)

In [None]:
def GetSimilarUdacityCourses(row, df, return_limit = None):
    course_id = row['course#']
    ii = np.where(df['orange_course_id'] == course_id)[0]
    similar_course_id = df['udacity_course_id'].values[ii]
    similar_scores = df['similiarity'].values[ii]
    df_result = pd.DataFrame({'course_id': similar_course_id, 
                             'similarity': similar_scores})
    df_result.sort_values('similarity', ascending=False, inplace=True)
    if return_limit is not None:
        df_result = df_result[0:return_limit].copy()
    return(df_result)

In [None]:
def GetAllSimilarCourses(row, df_orange, df_udacity, return_limit = None):
    df_result = GetSimilarCourses(row, df_orange, return_limit=return_limit)
    df_udacity_result = GetSimilarUdacityCourses(row, df_udacity_similarity, return_limit=return_limit)
    df_result = pd.merge(df_result, df_courses, left_on = 'course_id', right_on = 'course#', how='left')[['course#', 'series', 'course title', 'similarity']]
    df_udacity_result = pd.merge(df_udacity_result, df_udacity_courses, left_on = 'course_id', right_on = 'key', how='left')[['course_id', 'title', 'similarity']]
    return((df_result, df_udacity_result))

In [None]:


df_courses_grp = df_courses.groupby("series")

def print_course(course):
    df_sub = df_courses_grp.get_group(select_series_widget.value)
    row = df_sub[df_sub['course title'] == course].iloc[0]
    df_similiar_sub = GetSimilarCourses(row, df_course_similarity, return_limit=5)
    df_courses_aug = pd.merge(df_courses, df_similiar_sub, left_on = 'course#', right_on='course_id', how='inner')
    df_courses_aug = df_courses_aug[['course#', 'series', 'course title', 'similarity']].copy()    
    df_courses_aug.sort_values("similarity", inplace=True, ascending=False)
    return(HTML(df_courses_aug.to_html()))

def select_series(series):
    select_course_widget.options = df_courses_grp.get_group(series)['course title'].sort_values().tolist()

select_series_widget = widgets.Select(options=df_courses.series.drop_duplicates().sort_values().tolist())
init = select_series_widget.value
select_course_widget = widgets.Select(options=df_courses_grp.get_group(init)['course title'].tolist())
j = widgets.interactive(print_course, course=select_course_widget)
i = widgets.interactive(select_series, series=select_series_widget)
display(i)
display(j)



In [None]:
def print_udacity_course(course):
    df_sub = df_courses_grp.get_group(select_udacity_series_widget.value)
    row = df_sub[df_sub['course title'] == course].iloc[0]
    df_similiar_sub = GetSimilarUdacityCourses(row, df_udacity_similarity, return_limit=5)
    df_courses_aug = pd.merge(df_udacity_courses, df_similiar_sub, left_on = 'key', right_on='course_id', how='inner')
    df_courses_aug = df_courses_aug[['key', 'title', 'short_summary', 'similarity']].copy()    
    df_courses_aug.sort_values("similarity", inplace=True, ascending=False)
    return(HTML(df_courses_aug.to_html()))

def select_udacity_series(series):
    select_udacity_course_widget.options = df_courses_grp.get_group(series)['course title'].sort_values().tolist()

select_udacity_series_widget = widgets.Select(options=df_courses.series.drop_duplicates().sort_values().tolist())
init = select_udacity_series_widget.value
select_udacity_course_widget = widgets.Select(options=df_courses_grp.get_group(init)['course title'].tolist())
j_udacity = widgets.interactive(print_udacity_course, course=select_udacity_course_widget)
i_udacity = widgets.interactive(select_udacity_series, series=select_udacity_series_widget)

display(i_udacity)
display(j_udacity)