In [5]:
import pandas as pd
import numpy as np
import neattext.functions as nfx
import seaborn as sn

from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity,linear_kernel

In [6]:
df = pd.read_csv("Final.csv", encoding='ISO-8859-1')

In [7]:
df.head()

Unnamed: 0,Course_Id,Course_Name,Difficulty_Level,Course_URL,Course_Description,Course_Rating,Course_Subscribers
0,192062,Write A Feature Length Screenplay For Film Or ...,Beginner,https://www.coursera.org/learn/write-a-feature...,Write a Full Length Feature Film Script In th...,4.8,49216.0
1,523438,Business Strategy: Business Model Canvas Analy...,Beginner,https://www.coursera.org/learn/canvas-analysis...,"By the end of this guided project, you will be...",4.8,24869.0
2,354331,Silicon Thin Film Solar Cells,Advanced,https://www.coursera.org/learn/silicon-thin-fi...,This course consists of a general presentation...,4.1,78781.0
3,320429,Finance for Managers,Intermediate,https://www.coursera.org/learn/operational-fin...,"When it comes to numbers, there is always more...",4.8,58013.0
4,140932,Retrieve Data using Single-Table SQL Queries,Beginner,https://www.coursera.org/learn/single-table-sq...,In this course you?ll learn how to effectively...,4.6,55601.0


In [8]:
# list all the methods present in the neattext function

dir(nfx)

['BTC_ADDRESS_REGEX',
 'CURRENCY_REGEX',
 'CURRENCY_SYMB_REGEX',
 'Counter',
 'DATE_REGEX',
 'EMAIL_REGEX',
 'EMOJI_REGEX',
 'HASTAG_REGEX',
 'MASTERCard_REGEX',
 'MD5_SHA_REGEX',
 'MOST_COMMON_PUNCT_REGEX',
 'NUMBERS_REGEX',
 'PHONE_REGEX',
 'PoBOX_REGEX',
 'SPECIAL_CHARACTERS_REGEX',
 'STOPWORDS',
 'STOPWORDS_de',
 'STOPWORDS_en',
 'STOPWORDS_es',
 'STOPWORDS_fr',
 'STOPWORDS_ru',
 'STOPWORDS_yo',
 'STREET_ADDRESS_REGEX',
 'TextFrame',
 'URL_PATTERN',
 'USER_HANDLES_REGEX',
 'VISACard_REGEX',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__generate_text',
 '__loader__',
 '__name__',
 '__numbers_dict',
 '__package__',
 '__spec__',
 '_lex_richness_herdan',
 '_lex_richness_maas_ttr',
 'clean_text',
 'defaultdict',
 'digit2words',
 'extract_btc_address',
 'extract_currencies',
 'extract_currency_symbols',
 'extract_dates',
 'extract_emails',
 'extract_emojis',
 'extract_hashtags',
 'extract_html_tags',
 'extract_mastercard_addr',
 'extract_md5sha',
 'extract_numbers',
 'extr

In [9]:
df['Course_Name'].iloc[1:5]

1    Business Strategy: Business Model Canvas Analy...
2                        Silicon Thin Film Solar Cells
3                                 Finance for Managers
4         Retrieve Data using Single-Table SQL Queries
Name: Course_Name, dtype: object

In [10]:
# generating clean text by removing the stopwords and special characters

# Convert non-string values to an empty string
df['Clean_title'] = df['Course_Name'].apply(lambda x: nfx.remove_stopwords(str(x)) if isinstance(x, str) else '')

# Continue with your cleaning process
df['Clean_title'] = df['Clean_title'].apply(nfx.remove_special_characters)

# Display the cleaned titles for a subset
print(df['Clean_title'].iloc[1:5])

1    Business Strategy Business Model Canvas Analys...
2                        Silicon Thin Film Solar Cells
3                                     Finance Managers
4                Retrieve Data SingleTable SQL Queries
Name: Clean_title, dtype: object


In [11]:
# vectorizing the course_title

countvect = CountVectorizer()

cv_mat = countvect.fit_transform(df['Clean_title'])

cv_mat

<4242x4020 sparse matrix of type '<class 'numpy.int64'>'
	with 17672 stored elements in Compressed Sparse Row format>

In [12]:
cv_mat.todense()

matrix([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [13]:
df_cv_words = pd.DataFrame(cv_mat.todense(), columns=countvect.get_feature_names_out())

df_cv_words

Unnamed: 0,10,10000,101,102,103,13,1650,17002000,1760,1861,...,your,yourream,yourself,youth,yuan,zap,zcurve,zero,zoning,zos
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4237,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4238,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4239,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4240,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
# cosine similarity matrix

cosine_sim_mat = cosine_similarity(cv_mat)

cosine_sim_mat

array([[1.        , 0.        , 0.18257419, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.18257419, 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [15]:
# drop duplicates 

course_index = pd.Series(df.index,index = df['Course_Name']).drop_duplicates()

In [16]:
course_index

Course_Name
Write A Feature Length Screenplay For Film Or Television                   0
Business Strategy: Business Model Canvas Analysis with Miro                1
Silicon Thin Film Solar Cells                                              2
Finance for Managers                                                       3
Retrieve Data using Single-Table SQL Queries                               4
                                                                        ... 
Global China: From the Mongols to the Ming                              4237
Leaders in Citizen Security and Justice Management for the Caribbean    4238
Computational Neuroscience: Neuronal Dynamics of Cognition              4239
Cities and the Challenge of Sustainable Development                     4240
MathTrackX: Special Functions                                           4241
Length: 4242, dtype: int64

In [17]:
temp = df[df['Course_Name'].str.contains('Python', na=False)]
temp.head()

Unnamed: 0,Course_Id,Course_Name,Difficulty_Level,Course_URL,Course_Description,Course_Rating,Course_Subscribers,Clean_title
16,532544,Python Programming Essentials,Beginner,https://www.coursera.org/learn/python-programming,This course will introduce you to the wonderfu...,4.8,27201.0,Python Programming Essentials
62,613165,Python Tricks and Hacks for Productivity,Advanced,https://www.coursera.org/learn/python-hacks,"By the end of this project, you are going to b...",4.1,74466.0,Python Tricks Hacks Productivity
115,326001,Exception Handling in Python,Beginner,https://www.coursera.org/learn/exception-handl...,"In this 1-hour long project-based course, you ...",4.7,84658.0,Exception Handling Python
178,716970,Using Python to Interact with the Operating Sy...,Advanced,https://www.coursera.org/learn/python-operatin...,"By the end of this course, you?ll be able to m...",4.6,44018.0,Python Interact Operating System
196,168040,AWS Elastic Beanstalk:Deploy a Python(Flask) W...,Beginner,https://www.coursera.org/learn/python-aws-elas...,"In this 1-hour long project-based course, you ...",5.0,25633.0,AWS Elastic BeanstalkDeploy PythonFlask Web Ap...


In [18]:
top6 = temp.sort_values(by = 'Course_Subscribers',ascending=False).head(6)

top6

Unnamed: 0,Course_Id,Course_Name,Difficulty_Level,Course_URL,Course_Description,Course_Rating,Course_Subscribers,Clean_title
779,554931,Introduction to Natural Language Processing in...,Beginner,https://www.coursera.org/learn/intro-natural-l...,"In this 1-hour long project-based course, you ...",3.5,89240.0,Introduction Natural Language Processing Python
1365,411166,"Merge, Sort and Filter Data in Python Pandas",Beginner,https://www.coursera.org/learn/python-pandas-m...,Visualizing data patterns often involves re-ar...,4.3,89137.0,Merge Sort Filter Data Python Pandas
525,628457,Create a Memory Puzzle Game in Python Using Py...,Beginner,https://www.coursera.org/learn/create-memory-p...,"By the end of this project, you will create a ...",4.3,88844.0,Create Memory Puzzle Game Python Pygame
1791,532619,Data Visualization with Python,Beginner,https://www.coursera.org/learn/python-for-data...,"""A picture is worth a thousand words"". We are ...",4.4,87899.0,Data Visualization Python
2808,755868,Machine Learning for Accounting with Python,Advanced,https://www.coursera.org/learn/machine-learnin...,"This course, Machine Learning for Accounting w...",4.5,86895.0,Machine Learning Accounting Python
3142,492927,Create Python Linux Script to Generate a Disk ...,Beginner,https://www.coursera.org/learn/python-linux-sc...,There are many choices when it comes to writin...,4.4,86830.0,Create Python Linux Script Generate Disk Usage...


In [19]:
top5 = temp.sort_values(by = 'Course_Rating',ascending=False).head(6)

top5

Unnamed: 0,Course_Id,Course_Name,Difficulty_Level,Course_URL,Course_Description,Course_Rating,Course_Subscribers,Clean_title
3800,653144,Data Science: Computational Thinking with Python,Beginner,https://www.edx.org/course/foundations-of-data...,We live in an era of unprecedented access to d...,Not Calibrated,74931.0,Data Science Computational Thinking Python
3771,305797,Computing in Python II: Control Structures,Beginner,https://www.edx.org/course/computing-in-python...,Building on your prior knowledge of variables ...,Not Calibrated,40067.0,Computing Python II Control Structures
1471,450125,Design Computing: 3D Modeling in Rhinoceros wi...,Advanced,https://www.coursera.org/learn/3d-modeling-rhi...,Why should a designer learn to code? As our w...,5,28628.0,Design Computing 3D Modeling Rhinoceros Python...
196,168040,AWS Elastic Beanstalk:Deploy a Python(Flask) W...,Beginner,https://www.coursera.org/learn/python-aws-elas...,"In this 1-hour long project-based course, you ...",5,25633.0,AWS Elastic BeanstalkDeploy PythonFlask Web Ap...
3597,565741,Analyzing Data with Python,Beginner,https://www.edx.org/course/analyzing-data-with...,LEARN TO ANALYZE DATA WITH PYTHONLearn how to ...,5,19384.0,Analyzing Data Python
2722,715524,"Cleaning, Reshaping, and Expanding Datasets in...",Conversant,https://www.coursera.org/learn/cleaning-reshap...,It has been said that obtaining and cleaning d...,5,47514.0,Cleaning Reshaping Expanding Datasets Python


In [20]:
index = course_index['Retrieve Data using Single-Table SQL Queries']
print(index)

4


In [21]:
scores = list(enumerate(cosine_sim_mat[index]))
scores

[(0, 0.0),
 (1, 0.0),
 (2, 0.0),
 (3, 0.0),
 (4, 0.9999999999999999),
 (5, 0.0),
 (6, 0.0),
 (7, 0.0),
 (8, 0.0),
 (9, 0.0),
 (10, 0.0),
 (11, 0.0),
 (12, 0.0),
 (13, 0.0),
 (14, 0.18257418583505539),
 (15, 0.0),
 (16, 0.0),
 (17, 0.0),
 (18, 0.0),
 (19, 0.0),
 (20, 0.0),
 (21, 0.0),
 (22, 0.0),
 (23, 0.1690308509457033),
 (24, 0.0),
 (25, 0.0),
 (26, 0.0),
 (27, 0.0),
 (28, 0.0),
 (29, 0.0),
 (30, 0.0),
 (31, 0.0),
 (32, 0.0),
 (33, 0.0),
 (34, 0.0),
 (35, 0.25819888974716115),
 (36, 0.0),
 (37, 0.0),
 (38, 0.0),
 (39, 0.0),
 (40, 0.0),
 (41, 0.0),
 (42, 0.0),
 (43, 0.0),
 (44, 0.0),
 (45, 0.0),
 (46, 0.0),
 (47, 0.0),
 (48, 0.0),
 (49, 0.0),
 (50, 0.0),
 (51, 0.0),
 (52, 0.0),
 (53, 0.0),
 (54, 0.0),
 (55, 0.0),
 (56, 0.0),
 (57, 0.0),
 (58, 0.0),
 (59, 0.0),
 (60, 0.0),
 (61, 0.0),
 (62, 0.0),
 (63, 0.0),
 (64, 0.0),
 (65, 0.0),
 (66, 0.0),
 (67, 0.0),
 (68, 0.0),
 (69, 0.0),
 (70, 0.0),
 (71, 0.0),
 (72, 0.0),
 (73, 0.0),
 (74, 0.0),
 (75, 0.0),
 (76, 0.0),
 (77, 0.0),
 (78, 0.0),


In [22]:
sorted_score = sorted(scores, key=lambda x: np.any(x[1]), reverse=True)
sorted_score

[(4, 0.9999999999999999),
 (14, 0.18257418583505539),
 (23, 0.1690308509457033),
 (35, 0.25819888974716115),
 (84, 0.25819888974716115),
 (109, 0.25819888974716115),
 (119, 0.22360679774997896),
 (129, 0.22360679774997896),
 (153, 0.1690308509457033),
 (185, 0.22360679774997896),
 (186, 0.19999999999999998),
 (198, 0.18257418583505539),
 (219, 0.18257418583505539),
 (220, 0.22360679774997896),
 (226, 0.25819888974716115),
 (239, 0.18257418583505539),
 (263, 0.19999999999999998),
 (275, 0.22360679774997896),
 (285, 0.22360679774997896),
 (305, 0.25819888974716115),
 (307, 0.25819888974716115),
 (329, 0.22360679774997896),
 (334, 0.22360679774997896),
 (342, 0.19999999999999998),
 (349, 0.25819888974716115),
 (363, 0.22360679774997896),
 (376, 0.25819888974716115),
 (390, 0.18257418583505539),
 (391, 0.22360679774997896),
 (405, 0.22360679774997896),
 (410, 0.25819888974716115),
 (424, 0.22360679774997896),
 (430, 0.25819888974716115),
 (468, 0.22360679774997896),
 (480, 0.22360679774997

In [23]:
# so the sorted score list is a collection of tuples 
# which have the index and the value,so i will select the indices first

sorted_indices = [i[0] for i in sorted_score[1:]]

sorted_values = [i[1] for i in sorted_score[1:]]

sorted_values

[0.18257418583505539,
 0.1690308509457033,
 0.25819888974716115,
 0.25819888974716115,
 0.25819888974716115,
 0.22360679774997896,
 0.22360679774997896,
 0.1690308509457033,
 0.22360679774997896,
 0.19999999999999998,
 0.18257418583505539,
 0.18257418583505539,
 0.22360679774997896,
 0.25819888974716115,
 0.18257418583505539,
 0.19999999999999998,
 0.22360679774997896,
 0.22360679774997896,
 0.25819888974716115,
 0.25819888974716115,
 0.22360679774997896,
 0.22360679774997896,
 0.19999999999999998,
 0.25819888974716115,
 0.22360679774997896,
 0.25819888974716115,
 0.18257418583505539,
 0.22360679774997896,
 0.22360679774997896,
 0.25819888974716115,
 0.22360679774997896,
 0.25819888974716115,
 0.22360679774997896,
 0.22360679774997896,
 0.19999999999999998,
 0.39999999999999997,
 0.25819888974716115,
 0.25819888974716115,
 0.25819888974716115,
 0.25819888974716115,
 0.25819888974716115,
 0.22360679774997896,
 0.25819888974716115,
 0.25819888974716115,
 0.22360679774997896,
 0.258198889

In [24]:
recommended_result_df = df.iloc[sorted_indices]

recommended_result_df



Unnamed: 0,Course_Id,Course_Name,Difficulty_Level,Course_URL,Course_Description,Course_Rating,Course_Subscribers,Clean_title
14,639144,Grab Data Fast with Vertical and Horizontal LO...,Beginner,https://www.coursera.org/learn/grab-data-fast-...,Data can come our way in multiple forms and fr...,4.5,61339.0,Grab Data Fast Vertical Horizontal LOOKUP
23,486794,Preparing for the Google Cloud Professional Da...,Beginner,https://www.coursera.org/learn/preparing-cloud...,"From the course: ""The best way to prepare for ...",4.5,36912.0,Preparing Google Cloud Professional Data Engin...
35,233139,Dealing With Missing Data,Advanced,https://www.coursera.org/learn/missing-data,This course will cover the steps used in weigh...,3.5,29060.0,Dealing Missing Data
84,475973,Mastering SQL Joins,Advanced,https://www.coursera.org/learn/mastering-sql-j...,"In this 2-hour long project-based course, you ...",4,15960.0,Mastering SQL Joins
109,727562,Big data and Language 2,Advanced,https://www.coursera.org/learn/big-data-langua...,"In this course, students will understand chara...",5,13450.0,Big data Language 2
...,...,...,...,...,...,...,...,...
4237,722213,Global China: From the Mongols to the Ming,Beginner,https://www.edx.org/course/global-china-from-t...,"In the 13th century, by force of arms, the Mon...",4.1,77607.0,Global China Mongols Ming
4238,405077,Leaders in Citizen Security and Justice Manage...,Intermediate,https://www.edx.org/course/leaders-in-citizen-...,The high rates of crime and violence are two o...,4.3,61696.0,Leaders Citizen Security Justice Management Ca...
4239,602310,Computational Neuroscience: Neuronal Dynamics ...,Advanced,https://www.edx.org/course/computational-neuro...,What happens in your brain when you make a dec...,4.4,70899.0,Computational Neuroscience Neuronal Dynamics C...
4240,260771,Cities and the Challenge of Sustainable Develo...,Beginner,https://www.edx.org/course/cities-and-the-chal...,"According to the United Nations, urbanization ...",4.8,76977.0,Cities Challenge Sustainable Development


In [25]:
recommended_result_df['Similarity_Score'] = np.array(sorted_values)

recommended_result_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommended_result_df['Similarity_Score'] = np.array(sorted_values)


Unnamed: 0,Course_Id,Course_Name,Difficulty_Level,Course_URL,Course_Description,Course_Rating,Course_Subscribers,Clean_title,Similarity_Score
14,639144,Grab Data Fast with Vertical and Horizontal LO...,Beginner,https://www.coursera.org/learn/grab-data-fast-...,Data can come our way in multiple forms and fr...,4.5,61339.0,Grab Data Fast Vertical Horizontal LOOKUP,0.182574
23,486794,Preparing for the Google Cloud Professional Da...,Beginner,https://www.coursera.org/learn/preparing-cloud...,"From the course: ""The best way to prepare for ...",4.5,36912.0,Preparing Google Cloud Professional Data Engin...,0.169031
35,233139,Dealing With Missing Data,Advanced,https://www.coursera.org/learn/missing-data,This course will cover the steps used in weigh...,3.5,29060.0,Dealing Missing Data,0.258199
84,475973,Mastering SQL Joins,Advanced,https://www.coursera.org/learn/mastering-sql-j...,"In this 2-hour long project-based course, you ...",4,15960.0,Mastering SQL Joins,0.258199
109,727562,Big data and Language 2,Advanced,https://www.coursera.org/learn/big-data-langua...,"In this course, students will understand chara...",5,13450.0,Big data Language 2,0.258199
...,...,...,...,...,...,...,...,...,...
4237,722213,Global China: From the Mongols to the Ming,Beginner,https://www.edx.org/course/global-china-from-t...,"In the 13th century, by force of arms, the Mon...",4.1,77607.0,Global China Mongols Ming,0.000000
4238,405077,Leaders in Citizen Security and Justice Manage...,Intermediate,https://www.edx.org/course/leaders-in-citizen-...,The high rates of crime and violence are two o...,4.3,61696.0,Leaders Citizen Security Justice Management Ca...,0.000000
4239,602310,Computational Neuroscience: Neuronal Dynamics ...,Advanced,https://www.edx.org/course/computational-neuro...,What happens in your brain when you make a dec...,4.4,70899.0,Computational Neuroscience Neuronal Dynamics C...,0.000000
4240,260771,Cities and the Challenge of Sustainable Develo...,Beginner,https://www.edx.org/course/cities-and-the-chal...,"According to the United Nations, urbanization ...",4.8,76977.0,Cities Challenge Sustainable Development,0.000000


In [26]:
use_df = recommended_result_df[['Clean_title','Similarity_Score']]
use_df
       

Unnamed: 0,Clean_title,Similarity_Score
14,Grab Data Fast Vertical Horizontal LOOKUP,0.182574
23,Preparing Google Cloud Professional Data Engin...,0.169031
35,Dealing Missing Data,0.258199
84,Mastering SQL Joins,0.258199
109,Big data Language 2,0.258199
...,...,...
4237,Global China Mongols Ming,0.000000
4238,Leaders Citizen Security Justice Management Ca...,0.000000
4239,Computational Neuroscience Neuronal Dynamics C...,0.000000
4240,Cities Challenge Sustainable Development,0.000000


In [27]:
def recommend_courses(title, num_rec=5):
    # Calculate the course index
    course_index = pd.Series(df.index, index=df['Course_Name']).drop_duplicates()

    # Filter courses with matching strings in the title
    matching_courses = df[df['Course_Name'].str.contains(title, case=False, na=False)]

    if not matching_courses.empty:
        # Get the index of matching courses
        index = course_index[matching_courses['Course_Name']]

        # Sort matching courses by subscribers and ratings
        sorted_courses = df.loc[index].sort_values(by=['Course_Subscribers', 'Course_Rating'], ascending=[False, False])

        # Return the top recommended courses
        recommended_courses = sorted_courses.head(num_rec)
        return recommended_courses[['Course_Name', 'Course_Description', 'Course_URL']]
    else:
        # Return an empty dataframe with a message
        empty_df = pd.DataFrame(columns=['Course_Name','Course_Description', 'Course_URL' ])
        return empty_df

# Example:
ans = recommend_courses("nlp",5)
ans

Unnamed: 0,Course_Name,Course_Description,Course_URL
2496,Transfer Learning for NLP with TensorFlow Hub,This is a hands-on project on transfer learnin...,https://www.coursera.org/learn/transfer-learni...
1361,Deep Learning NLP: Training GPT-2 from scratch,"In this 1-hour long project-based course, we w...",https://www.coursera.org/learn/deep-learning-n...
2497,Build NLP pipelines using scikit-learn,"In this 2-hour long project-based course, you ...",https://www.coursera.org/learn/build-nlp-pipel...
1676,NLP: Twitter Sentiment Analysis,"In this hands-on project, we will train a Naiv...",https://www.coursera.org/learn/twitter-sentime...
962,"AI Workflow: Machine Learning, Visual Recognit...",This is the fourth course in the IBM AI Enterp...,https://www.coursera.org/learn/ibm-ai-workflow...


In [28]:
ans.shape

(5, 3)

In [29]:
ans.columns

Index(['Course_Name', 'Course_Description', 'Course_URL'], dtype='object')

In [30]:
df.head()

Unnamed: 0,Course_Id,Course_Name,Difficulty_Level,Course_URL,Course_Description,Course_Rating,Course_Subscribers,Clean_title
0,192062,Write A Feature Length Screenplay For Film Or ...,Beginner,https://www.coursera.org/learn/write-a-feature...,Write a Full Length Feature Film Script In th...,4.8,49216.0,Write Feature Length Screenplay Film Television
1,523438,Business Strategy: Business Model Canvas Analy...,Beginner,https://www.coursera.org/learn/canvas-analysis...,"By the end of this guided project, you will be...",4.8,24869.0,Business Strategy Business Model Canvas Analys...
2,354331,Silicon Thin Film Solar Cells,Advanced,https://www.coursera.org/learn/silicon-thin-fi...,This course consists of a general presentation...,4.1,78781.0,Silicon Thin Film Solar Cells
3,320429,Finance for Managers,Intermediate,https://www.coursera.org/learn/operational-fin...,"When it comes to numbers, there is always more...",4.8,58013.0,Finance Managers
4,140932,Retrieve Data using Single-Table SQL Queries,Beginner,https://www.coursera.org/learn/single-table-sq...,In this course you?ll learn how to effectively...,4.6,55601.0,Retrieve Data SingleTable SQL Queries


In [31]:
df.to_csv('CleanedTitle.csv',index = None)

In [32]:
import pickle
pickle.dump(recommended_result_df,open('result.pkl','wb'))