## Import Libraries

In [10]:
!pip install openai --quiet
!pip install tenacity --quiet

In [25]:
import json

import pandas as pd
import numpy as np


import openai
from tenacity import retry, wait_random_exponential, stop_after_attempt


### Load Data

In [2]:
## Load data
path = 'data/Coursera.csv'
data = pd.read_csv(path)

In [3]:
data.head()

Unnamed: 0,course_name,university,difficulty_level,course_rating,course_url,description,skills
0,Write A Feature Length Screenplay For Film Or ...,Michigan State University,Beginner,4.8,https://www.coursera.org/learn/write-a-feature...,Write a Full Length Feature Film Script In th...,Drama Comedy peering screenwriting film D...
1,Business Strategy: Business Model Canvas Analy...,Coursera Project Network,Beginner,4.8,https://www.coursera.org/learn/canvas-analysis...,"By the end of this guided project, you will be...",Finance business plan persona (user experien...
2,Silicon Thin Film Solar Cells,�cole Polytechnique,Advanced,4.1,https://www.coursera.org/learn/silicon-thin-fi...,This course consists of a general presentation...,chemistry physics Solar Energy film lambda...
3,Finance for Managers,IESE Business School,Intermediate,4.8,https://www.coursera.org/learn/operational-fin...,"When it comes to numbers, there is always more...",accounts receivable dupont analysis analysis...
4,Retrieve Data using Single-Table SQL Queries,Coursera Project Network,Beginner,4.6,https://www.coursera.org/learn/single-table-sq...,In this course you�ll learn how to effectively...,Data Analysis select (sql) database manageme...


In [4]:
data.columns

Index(['course_name', 'university', 'difficulty_level', 'course_rating',
       'course_url', 'description', 'skills'],
      dtype='object')

In [7]:
## check for missing values
print(data.isnull().sum())

course_name         0
university          0
difficulty_level    0
course_rating       0
course_url          0
description         0
skills              0
dtype: int64


### Create embeddings for the courses name

In [6]:
def get_embeddings(text):
    response = openai.Embedding.create(
        input = text,
        model = "text-embedding-ada-002",
    )["data"][0]["embedding"]
    return response

In [27]:
## read api key
key = json.load(open('api.json'))

In [36]:

# Set up your OpenAI API credentials
openai.api_key = key['open_api']

## get embeddings for all courses
data['embedding'] = data['course_name'].apply(get_embeddings)

In [37]:
data.head()

Unnamed: 0,course_name,university,difficulty_level,course_rating,course_url,description,skills,embedding
0,Write A Feature Length Screenplay For Film Or ...,Michigan State University,Beginner,4.8,https://www.coursera.org/learn/write-a-feature...,Write a Full Length Feature Film Script In th...,Drama Comedy peering screenwriting film D...,"[0.004957424942404032, -0.013018687255680561, ..."
1,Business Strategy: Business Model Canvas Analy...,Coursera Project Network,Beginner,4.8,https://www.coursera.org/learn/canvas-analysis...,"By the end of this guided project, you will be...",Finance business plan persona (user experien...,"[-0.011336499825119972, -0.022729190066456795,..."
2,Silicon Thin Film Solar Cells,�cole Polytechnique,Advanced,4.1,https://www.coursera.org/learn/silicon-thin-fi...,This course consists of a general presentation...,chemistry physics Solar Energy film lambda...,"[0.002505358075723052, -0.006338656414300203, ..."
3,Finance for Managers,IESE Business School,Intermediate,4.8,https://www.coursera.org/learn/operational-fin...,"When it comes to numbers, there is always more...",accounts receivable dupont analysis analysis...,"[-0.0029222306329756975, -0.03425585851073265,..."
4,Retrieve Data using Single-Table SQL Queries,Coursera Project Network,Beginner,4.6,https://www.coursera.org/learn/single-table-sq...,In this course you�ll learn how to effectively...,Data Analysis select (sql) database manageme...,"[-0.016929104924201965, 0.013173501938581467, ..."


In [38]:
## save the csv file with embeddings
data.to_csv('data/Coursera_embeddings.csv', index=False)

In [31]:
## function to get the similarity score between embeddings
def get_similarity_score(embedding1, embedding2):
    return np.inner(embedding1, embedding2)

In [39]:
## get the recommendations for a given course
def get_recommendation(user_prompt, data, similarity_score_threshold=0.5, top_n=5):
    user_embedding = get_embeddings(user_prompt)
    data['similarity_score'] = data['embedding'].apply(lambda x: get_similarity_score(user_embedding, x))
    data = data[data['similarity_score'] >= similarity_score_threshold]
    data = data.sort_values(by='similarity_score', ascending=False)
    return data.head(top_n)

In [41]:
## get the recommendations for a given course
course = 'World History and Religion'
get_recommendation(course, data)

Unnamed: 0,course_name,university,difficulty_level,course_rating,course_url,description,skills,embedding,similarity_score
3405,"Philosophy, Science and Religion: Religion and...",The University of Edinburgh,Beginner,4.4,https://www.coursera.org/learn/philosophy-scie...,"Philosophy, Science and Religion mark three of...",reason evolution belief mythology religion...,"[0.016331691294908524, -0.0006601900095120072,...",0.877774
2681,Science & Religion 101,University of Alberta,Not Calibrated,4.5,https://www.coursera.org/learn/science-and-rel...,This course examines the nature of both scienc...,astronomy dichotomy hermeneutics evolutiona...,"[0.024437962099909782, -0.002743568504229188, ...",0.877461
28,"Philosophy, Science and Religion: Philosophy a...",The University of Edinburgh,Intermediate,4.6,https://www.coursera.org/learn/philosophy-scie...,"Philosophy, Science and Religion mark three of...",ordered pair arts and humanities virtue spi...,"[0.011801879853010178, -0.0004650808987207711,...",0.872059
1313,"Philosophy, Science and Religion: Science and ...",The University of Edinburgh,Beginner,4.3,https://www.coursera.org/learn/philosophy-scie...,"Philosophy, Science and Religion mark three of...",evolution sociology history evolutionary bi...,"[0.015422379598021507, -0.001257329131476581, ...",0.865002
2870,Roman Art and Archaeology,University of Arizona,Beginner,4.7,https://www.coursera.org/learn/roman-art-archa...,The objective of this course is to provide an ...,greek mythology Archeology history scientif...,"[0.006569115910679102, -0.0016763578169047832,...",0.860288
