In [2]:
# !pip install faiss-gpu
# !pip install gradio

## init

In [4]:
import numpy as np
import pandas as pd
import random

import gradio as gr
import faiss

from sklearn.feature_extraction.text import TfidfVectorizer

import warnings
warnings.filterwarnings('ignore')

In [5]:
data = pd.read_csv(r'coursera_courses.csv')

In [6]:
data.shape

(1000, 13)

In [7]:
data.head()

Unnamed: 0.1,Unnamed: 0,course_title,course_organization,course_certificate_type,course_time,course_rating,course_reviews_num,course_difficulty,course_url,course_students_enrolled,course_skills,course_summary,course_description
0,196,(ISC)² Systems Security Certified Practitioner...,ISC2,Specialization,3 - 6 Months,4.7,484.0,Beginner,https://www.coursera.org/specializations/sscp-...,6706.0,"['Security Software', 'Risk Management', 'Acce...",[],Pursue better IT security job opportunities an...
1,648,.NET FullStack Developer,Board Infinity,Specialization,1 - 3 Months,4.3,49.0,Intermediate,https://www.coursera.org/specializations/dot-n...,2140.0,"['Web API', 'Web Development', 'Cascading Styl...",['Master .NET full stack web dev: from .NET co...,Develop the proficiency required to design and...
2,928,21st Century Energy Transition: how do we make...,University of Alberta,Course,1 - 3 Months,4.8,59.0,Beginner,https://www.coursera.org/learn/21st-century-en...,4282.0,[],['Understand the complexity of systems supplyi...,"Affordable, abundant and reliable energy is fu..."
3,580,A Crash Course in Causality: Inferring Causal...,University of Pennsylvania,Course,1 - 3 Months,4.7,515.0,Intermediate,https://www.coursera.org/learn/crash-course-in...,38833.0,[],[],We have all heard the phrase “correlation does...
4,918,A life with ADHD,University of Geneva,Course,1 - 3 Months,,,Beginner,https://www.coursera.org/learn/life-with-adhd,,"['differential diagnosis and comorbidities', '...",[' Understand what ADHD is and the challenges ...,What is ADHD and what are the challenges that ...


## create corpus

In [8]:
course_corpus = data['course_title']

## create vector database index

In [9]:
vectorizer = TfidfVectorizer(analyzer='char_wb', ngram_range=(3,3), min_df=5)
X = vectorizer.fit_transform(course_corpus)

In [12]:
#convert sparse matrix to numpy array
X_array = np.float32(X.toarray())

# create vector database index
index = faiss.IndexFlatL2(X_array.shape[1])

# add vectors to the index
index.add(X_array)

## testing search

In [19]:
search_text = ["data science"]
search_text_vector = vectorizer.transform(search_text)
search_text_vector_array = np.float32(search_text_vector.toarray())

distances, indices = index.search(search_text_vector_array, 5)

for course_index in indices[0]:
    print(f"Course Ttile: {data['course_title'][course_index]} from {data['course_organization'][course_index]}")

Course Ttile: Data Science from Johns Hopkins University
Course Ttile: IBM Data Science from IBM
Course Ttile: Fractal Data Science from Fractal Analytics
Course Ttile: Introduction to Data Science from IBM
Course Ttile: Foundations of Data Science from Google


## deploy via gradio

In [27]:
def recommend_course(title):
    search_text = [title]
    search_text_vector = vectorizer.transform(search_text)
    search_text_vector_array = np.float32(search_text_vector.toarray())
    distances, indices = index.search(search_text_vector_array, 5)

    n_1 = f"Course Tile {data['course_title'][indices[0][0]]} from {data['course_organization'][indices[0][0]]}"
    n_2 = f"Course Tile {data['course_title'][indices[0][1]]} from {data['course_organization'][indices[0][1]]}"
    n_3 = f"Course Tile {data['course_title'][indices[0][2]]} from {data['course_organization'][indices[0][2]]}"

    return n_1, n_2, n_3

In [28]:
demo = gr.Interface(
    title="Course Recommender",
    description="Enter the course title to get the recommendations",
    fn=recommend_course,
    inputs=gr.Text(label="Course Keywords"),
    outputs=[gr.Text(label="Course 1"), gr.Text(label="Course 2"), gr.Text(label="Course 2")])

demo.launch(share=True)