# Online Course Recommender


---


## Practice Module: Intelligent Reasoning Systems (IRS)

## Data Preparation: Udemy Courses




## File Path & Library Setup & final df setup

In [None]:
# Load All Necessary Packages

import os
from google.colab import drive

import re
import pandas as pd
import numpy as np

seed = 18

print('Versions of key libraries')
print('-------------------------')
print('pandas:  ', pd.__version__)
print('numpy:   ', np.__version__)

Versions of key libraries
-------------------------
pandas:   1.1.5
numpy:    1.19.5


In [None]:
# Load Data (Raw Web Scrapped Data - Udemy Courses)
os.chdir('/content')
rawdata = pd.read_csv('v_courses_info_business.csv')

In [None]:
rawdata.shape

(9986, 14)

In [None]:
rawdata.head()
df = rawdata.copy()

In [None]:
final = pd.DataFrame(columns = ['Course Name','Course URL','Categories','Short Description', 'Long Description', 'Difficulty', 'Duration', 'Free Option', 'Rating', 'Original rating', 'Numberofrated', 'Numberofenroll', 'Paid Option', 'Language', 'Subtitle Language', 'Platform', 'Provider', 'Image URL'])
final

Unnamed: 0,Course Name,Course URL,Categories,Short Description,Long Description,Difficulty,Duration,Free Option,Rating,Original rating,Numberofrated,Numberofenroll,Paid Option,Language,Subtitle Language,Platform,Provider,Image URL


## Drop unwanted data

In [None]:
# remove or modify those incomplete data such as row with uncorrect difficulty, row missing enroll or numbers of rating
difficulty_list = ['All Levels', 'Beginner', 'Intermediate', 'Expert']
df = df[df.level.isin(difficulty_list)]
df.shape

(9984, 14)

In [None]:
# remove illegal duration row
df = df[df.length.str.contains('total') | df.length.str.contains('question')]
df.shape

(9983, 14)

## Drop non-English courses

In [None]:
# remove those courses which are not in English
# or no info in language(Udemy no longer provide)
df = df[df.language == 'English']
df.shape

(9892, 14)

In [None]:
df.head()

Unnamed: 0,name,description,link,instructors,rating,length,level,bestseller,price,numbersofrating,category,enroll,language,subtitle
0,The Complete SQL Bootcamp 2021: Go from Zero t...,Become an expert at SQL!,https://www.udemy.com/course/the-complete-sql-...,Jose Portilla,4.7,9 total hours,All Levels,Bestseller,$25.99,"(109,210 ratings)","['Business', 'Business Analytics & Intelligenc...","410,023 students",English,"English [Auto], French [Auto]"
1,PMP Exam Prep Seminar - 2021 Exam Content with...,PMP Exam Prep Seminar - Earn 35 PDUs by comple...,https://www.udemy.com/course/pmp-pmbok6-35-pdus/,Joseph Phillips,4.6,30 total hours,All Levels,Bestseller,$22.99,"(72,148 ratings)","['Project Management', 'PMBOK']","216,840 students",English,"English [Auto], French [Auto]"
2,Tableau 2020 A-Z: Hands-On Tableau Training fo...,Learn Tableau 2020 for data science step by st...,https://www.udemy.com/course/tableau10/,"Kirill Eremenko, Ligency Team",4.6,9 total hours,All Levels,Bestseller,$23.99,"(70,606 ratings)","['Business', 'Business Analytics & Intelligenc...","262,889 students",English,"English [Auto], French [Auto]"
3,Microsoft Power BI - A Complete Introduction [...,"Learn how to use Microsoft's Power BI Tools, i...",https://www.udemy.com/course/powerbi-complete-...,"Manuel Lorenz, Academind by Maximilian Schwarz...",4.6,20 total hours,Beginner,,$19.99,"(50,251 ratings)","['Business Analytics & Intelligence', 'Microso...","180,836 students",English,"English, French [Auto]"
4,Agile Crash Course: Agile Project Management; ...,Get Agile Certified & Learn about the key and ...,https://www.udemy.com/course/agile-crash-course/,Mauricio Rubio - Agile Guru & Founder of Agile...,4.4,2.5 total hours,Beginner,Bestseller,$29.99,"(50,258 ratings)","['Business', 'Project Management', 'Agile']","149,096 students",English,"English [Auto], French [Auto]"


## Fill 0 to those rating is NaN
## Fill None to those subtitle is NaN


In [None]:
meanval = np.mean(df.rating)
df.rating.fillna(meanval,inplace = True)
df.subtitle.fillna('English(default)',inplace = True)
df.price.fillna('free',inplace = True)
df.numbersofrating.fillna('0',inplace = True)
df.category.fillna('',inplace = True)

## Preprocess Data

### Get Course Name, Short Description, Course URL, Provider Column

In [None]:
final['Course Name'] = df.name
final['Short Description'] = df.description
final['Course URL'] = df.link
final['Provider'] = df.instructors
final.head()

Unnamed: 0,Course Name,Course URL,Categories,Short Description,Long Description,Difficulty,Duration,Free Option,Rating,Original rating,Numberofrated,Numberofenroll,Paid Option,Language,Subtitle Language,Platform,Provider,Image URL
0,The Complete SQL Bootcamp 2021: Go from Zero t...,https://www.udemy.com/course/the-complete-sql-...,,Become an expert at SQL!,,,,,,,,,,,,,Jose Portilla,
1,PMP Exam Prep Seminar - 2021 Exam Content with...,https://www.udemy.com/course/pmp-pmbok6-35-pdus/,,PMP Exam Prep Seminar - Earn 35 PDUs by comple...,,,,,,,,,,,,,Joseph Phillips,
2,Tableau 2020 A-Z: Hands-On Tableau Training fo...,https://www.udemy.com/course/tableau10/,,Learn Tableau 2020 for data science step by st...,,,,,,,,,,,,,"Kirill Eremenko, Ligency Team",
3,Microsoft Power BI - A Complete Introduction [...,https://www.udemy.com/course/powerbi-complete-...,,"Learn how to use Microsoft's Power BI Tools, i...",,,,,,,,,,,,,"Manuel Lorenz, Academind by Maximilian Schwarz...",
4,Agile Crash Course: Agile Project Management; ...,https://www.udemy.com/course/agile-crash-course/,,Get Agile Certified & Learn about the key and ...,,,,,,,,,,,,,Mauricio Rubio - Agile Guru & Founder of Agile...,


### Add Categories

In [None]:
string = "['asdfa', 'asdf']"
string[1:-1].split(', ')[0][1:-1]

'asdfa'

In [None]:
def get_cat(strs):
  if strs != '':
    strs = strs[1:-1].split(', ')
    answer = ''
    for item in strs:
      answer = answer+item[1:-1]+','
    return answer[:-1]

In [None]:
final['Categories'] = df['category'].apply(lambda x: get_cat(x))

### Get Course Difficulty

In [None]:
# Extract Course Difficulty, converted to numerical category and normalised
def conv_diff(strs):
  if strs == 'All Levels' or strs == 'Beginner':
    diff = 0
  elif strs == 'Intermediate':
    diff = 1
  else:
    diff = 2
  return diff

final['Difficulty'] = df['level'].apply(lambda x: conv_diff(x)) # difficulty in numerical category

final.head()

Unnamed: 0,Course Name,Course URL,Categories,Short Description,Long Description,Difficulty,Duration,Free Option,Rating,Original rating,Numberofrated,Numberofenroll,Paid Option,Language,Subtitle Language,Platform,Provider,Image URL
0,The Complete SQL Bootcamp 2021: Go from Zero t...,https://www.udemy.com/course/the-complete-sql-...,"Business,Business Analytics & Intelligence,SQL",Become an expert at SQL!,,0,,,,,,,,,,,Jose Portilla,
1,PMP Exam Prep Seminar - 2021 Exam Content with...,https://www.udemy.com/course/pmp-pmbok6-35-pdus/,"Project Management,PMBOK",PMP Exam Prep Seminar - Earn 35 PDUs by comple...,,0,,,,,,,,,,,Joseph Phillips,
2,Tableau 2020 A-Z: Hands-On Tableau Training fo...,https://www.udemy.com/course/tableau10/,"Business,Business Analytics & Intelligence,Tab...",Learn Tableau 2020 for data science step by st...,,0,,,,,,,,,,,"Kirill Eremenko, Ligency Team",
3,Microsoft Power BI - A Complete Introduction [...,https://www.udemy.com/course/powerbi-complete-...,"Business Analytics & Intelligence,Microsoft Po...","Learn how to use Microsoft's Power BI Tools, i...",,0,,,,,,,,,,,"Manuel Lorenz, Academind by Maximilian Schwarz...",
4,Agile Crash Course: Agile Project Management; ...,https://www.udemy.com/course/agile-crash-course/,"Business,Project Management,Agile",Get Agile Certified & Learn about the key and ...,,0,,,,,,,,,,,Mauricio Rubio - Agile Guru & Founder of Agile...,


### Get Course Duration

In [None]:
string = '0.2 dsfaasdfa 2431 sdfgsdgfsdgsdfg'
re.findall(r'\d+[.]*\d*',string)

['0.2', '2431']

In [None]:
# Get Course Duration in unit of hours

def duration_conv(strs):
  num = float(re.findall(r'\d+[.]*\d*',strs)[0])
  if 'question' in strs:
    num = num/25
  if 'min' in strs:
    dur = 0
  elif num<10:
    dur = 0
  elif num<50:
    dur = 1
  else:
    dur = 2
  return dur

final['Duration'] = df['length'].apply(lambda x: duration_conv(x))

final.shape

(9892, 18)

In [None]:
final

Unnamed: 0,Course Name,Course URL,Categories,Short Description,Long Description,Difficulty,Duration,Free Option,Rating,Original rating,Numberofrated,Numberofenroll,Paid Option,Language,Subtitle Language,Platform,Provider,Image URL
0,The Complete SQL Bootcamp 2021: Go from Zero t...,https://www.udemy.com/course/the-complete-sql-...,"Business,Business Analytics & Intelligence,SQL",Become an expert at SQL!,,0,0,,,,,,,,,,Jose Portilla,
1,PMP Exam Prep Seminar - 2021 Exam Content with...,https://www.udemy.com/course/pmp-pmbok6-35-pdus/,"Project Management,PMBOK",PMP Exam Prep Seminar - Earn 35 PDUs by comple...,,0,1,,,,,,,,,,Joseph Phillips,
2,Tableau 2020 A-Z: Hands-On Tableau Training fo...,https://www.udemy.com/course/tableau10/,"Business,Business Analytics & Intelligence,Tab...",Learn Tableau 2020 for data science step by st...,,0,0,,,,,,,,,,"Kirill Eremenko, Ligency Team",
3,Microsoft Power BI - A Complete Introduction [...,https://www.udemy.com/course/powerbi-complete-...,"Business Analytics & Intelligence,Microsoft Po...","Learn how to use Microsoft's Power BI Tools, i...",,0,1,,,,,,,,,,"Manuel Lorenz, Academind by Maximilian Schwarz...",
4,Agile Crash Course: Agile Project Management; ...,https://www.udemy.com/course/agile-crash-course/,"Business,Project Management,Agile",Get Agile Certified & Learn about the key and ...,,0,0,,,,,,,,,,Mauricio Rubio - Agile Guru & Founder of Agile...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9981,PMI ACP Agile Certified Practitioner Practice ...,https://www.udemy.com/course/pmi-acp-agile-cer...,"Project Management,PMI-ACP",A practice tests designed to cover all the top...,,0,1,,,,,,,,,,Ashutosh Deshmukh,
9982,Failure Mode Effect Analysis for Process Risk ...,https://www.udemy.com/course/fmea-for-process-...,"Business,Industry,Failure Mode and Effects Ana...",Include supplier quality audit in FMEA and FME...,,0,0,,,,,,,,,,Evelyn 7E academy,
9983,Bignners to Pro Course on Financial Modeling a...,https://www.udemy.com/course/bignners-to-pro-c...,"Business,Business Strategy,Financial Modeling",Financial Modeling and Valuation,,0,0,,,,,,,,,,Harsh Barar,
9984,The Real Estate Invoice Contract Download & Guide,https://www.udemy.com/course/the-real-estate-i...,"Business,Real Estate,Real Estate Investing",Download A Copy Of My PROVEN Real Estate Invoi...,,0,0,,,,,,,,,,Ben Clardy,


### Get free option or not

In [None]:
def isfree(strs):
  if 'free' in strs:
    f = 1
  else:
    f = 0
  return f

final['Free Option'] = df.price.apply(lambda x:isfree(x))
final.head()

Unnamed: 0,Course Name,Course URL,Categories,Short Description,Long Description,Difficulty,Duration,Free Option,Rating,Original rating,Numberofrated,Numberofenroll,Paid Option,Language,Subtitle Language,Platform,Provider,Image URL
0,The Complete SQL Bootcamp 2021: Go from Zero t...,https://www.udemy.com/course/the-complete-sql-...,"Business,Business Analytics & Intelligence,SQL",Become an expert at SQL!,,0,0,0,,,,,,,,,Jose Portilla,
1,PMP Exam Prep Seminar - 2021 Exam Content with...,https://www.udemy.com/course/pmp-pmbok6-35-pdus/,"Project Management,PMBOK",PMP Exam Prep Seminar - Earn 35 PDUs by comple...,,0,1,0,,,,,,,,,Joseph Phillips,
2,Tableau 2020 A-Z: Hands-On Tableau Training fo...,https://www.udemy.com/course/tableau10/,"Business,Business Analytics & Intelligence,Tab...",Learn Tableau 2020 for data science step by st...,,0,0,0,,,,,,,,,"Kirill Eremenko, Ligency Team",
3,Microsoft Power BI - A Complete Introduction [...,https://www.udemy.com/course/powerbi-complete-...,"Business Analytics & Intelligence,Microsoft Po...","Learn how to use Microsoft's Power BI Tools, i...",,0,1,0,,,,,,,,,"Manuel Lorenz, Academind by Maximilian Schwarz...",
4,Agile Crash Course: Agile Project Management; ...,https://www.udemy.com/course/agile-crash-course/,"Business,Project Management,Agile",Get Agile Certified & Learn about the key and ...,,0,0,0,,,,,,,,,Mauricio Rubio - Agile Guru & Founder of Agile...,


In [None]:
final.Duration.unique()

array([0, 1, 2])

### Get Course Rating

In [None]:

string = '(565,433 ratings)'
re.findall(r'\d+[,]*\d*[,]*\d*',string)[0].replace(',','')

'565433'

In [None]:
# Get Course Rating as normalized number of enrolls

def get_number(strs):
  num = re.findall(r'\d+[,]*\d*[,]*\d*',strs)[0].replace(',','')
  return int(num)

num_enrolls = df.enroll.apply(lambda x:get_number(x))
num_ratings = df.numbersofrating.apply(lambda x:get_number(x))
ratings = df.rating.apply(lambda x:float(x))
maxenroll = max(num_enrolls)
print('the maximum enroll number is:',maxenroll)
final['Original rating'] = ratings
final['Numberofrated'] = num_ratings
final['Numberofenroll'] = num_enrolls
final['Rating'] = ratings/5.0*num_ratings/maxenroll


the maximum enroll number is: 414191


In [None]:
final.head()

Unnamed: 0,Course Name,Course URL,Categories,Short Description,Long Description,Difficulty,Duration,Free Option,Rating,Original rating,Numberofrated,Numberofenroll,Paid Option,Language,Subtitle Language,Platform,Provider,Image URL
0,The Complete SQL Bootcamp 2021: Go from Zero t...,https://www.udemy.com/course/the-complete-sql-...,"Business,Business Analytics & Intelligence,SQL",Become an expert at SQL!,,0,0,0,0.24785,4.7,109210,410023,,,,,Jose Portilla,
1,PMP Exam Prep Seminar - 2021 Exam Content with...,https://www.udemy.com/course/pmp-pmbok6-35-pdus/,"Project Management,PMBOK",PMP Exam Prep Seminar - Earn 35 PDUs by comple...,,0,1,0,0.160255,4.6,72148,216840,,,,,Joseph Phillips,
2,Tableau 2020 A-Z: Hands-On Tableau Training fo...,https://www.udemy.com/course/tableau10/,"Business,Business Analytics & Intelligence,Tab...",Learn Tableau 2020 for data science step by st...,,0,0,0,0.15683,4.6,70606,262889,,,,,"Kirill Eremenko, Ligency Team",
3,Microsoft Power BI - A Complete Introduction [...,https://www.udemy.com/course/powerbi-complete-...,"Business Analytics & Intelligence,Microsoft Po...","Learn how to use Microsoft's Power BI Tools, i...",,0,1,0,0.111617,4.6,50251,180836,,,,,"Manuel Lorenz, Academind by Maximilian Schwarz...",
4,Agile Crash Course: Agile Project Management; ...,https://www.udemy.com/course/agile-crash-course/,"Business,Project Management,Agile",Get Agile Certified & Learn about the key and ...,,0,0,0,0.106779,4.4,50258,149096,,,,,Mauricio Rubio - Agile Guru & Founder of Agile...,


### Get Subtitle Language

In [None]:
string = 'English(defaut)'
string.replace('\xa0',' ').replace(',',' ').split(' ')


['English(defaut)']

In [None]:
def get_subs(strs):
  lst = strs.replace('\xa0',' ').replace(',',' ').split(' ')
  subs = ''
  for item in lst:
    if item != '' and item != '[Auto]':
      subs = subs+item+','
  return subs[:-1]

get_subs(string)

'English(defaut)'

In [None]:
# Get course list of subtitle language available

final['Subtitle Language'] = df.subtitle.apply(lambda x:get_subs(x))
final.head()

Unnamed: 0,Course Name,Course URL,Categories,Short Description,Long Description,Difficulty,Duration,Free Option,Rating,Original rating,Numberofrated,Numberofenroll,Paid Option,Language,Subtitle Language,Platform,Provider,Image URL
0,The Complete SQL Bootcamp 2021: Go from Zero t...,https://www.udemy.com/course/the-complete-sql-...,"Business,Business Analytics & Intelligence,SQL",Become an expert at SQL!,,0,0,0,0.24785,4.7,109210,410023,,,"English,French",,Jose Portilla,
1,PMP Exam Prep Seminar - 2021 Exam Content with...,https://www.udemy.com/course/pmp-pmbok6-35-pdus/,"Project Management,PMBOK",PMP Exam Prep Seminar - Earn 35 PDUs by comple...,,0,1,0,0.160255,4.6,72148,216840,,,"English,French",,Joseph Phillips,
2,Tableau 2020 A-Z: Hands-On Tableau Training fo...,https://www.udemy.com/course/tableau10/,"Business,Business Analytics & Intelligence,Tab...",Learn Tableau 2020 for data science step by st...,,0,0,0,0.15683,4.6,70606,262889,,,"English,French",,"Kirill Eremenko, Ligency Team",
3,Microsoft Power BI - A Complete Introduction [...,https://www.udemy.com/course/powerbi-complete-...,"Business Analytics & Intelligence,Microsoft Po...","Learn how to use Microsoft's Power BI Tools, i...",,0,1,0,0.111617,4.6,50251,180836,,,"English,French",,"Manuel Lorenz, Academind by Maximilian Schwarz...",
4,Agile Crash Course: Agile Project Management; ...,https://www.udemy.com/course/agile-crash-course/,"Business,Project Management,Agile",Get Agile Certified & Learn about the key and ...,,0,0,0,0.106779,4.4,50258,149096,,,"English,French",,Mauricio Rubio - Agile Guru & Founder of Agile...,


### Paid Option

In [None]:
# Check if free option is available

final['Paid Option'] = df.price.apply(lambda x: 0 if x == 'free' else x)
final.head()

Unnamed: 0,Course Name,Course URL,Categories,Short Description,Long Description,Difficulty,Duration,Free Option,Rating,Original rating,Numberofrated,Numberofenroll,Paid Option,Language,Subtitle Language,Platform,Provider,Image URL
0,The Complete SQL Bootcamp 2021: Go from Zero t...,https://www.udemy.com/course/the-complete-sql-...,"Business,Business Analytics & Intelligence,SQL",Become an expert at SQL!,,0,0,0,0.24785,4.7,109210,410023,$25.99,,"English,French",,Jose Portilla,
1,PMP Exam Prep Seminar - 2021 Exam Content with...,https://www.udemy.com/course/pmp-pmbok6-35-pdus/,"Project Management,PMBOK",PMP Exam Prep Seminar - Earn 35 PDUs by comple...,,0,1,0,0.160255,4.6,72148,216840,$22.99,,"English,French",,Joseph Phillips,
2,Tableau 2020 A-Z: Hands-On Tableau Training fo...,https://www.udemy.com/course/tableau10/,"Business,Business Analytics & Intelligence,Tab...",Learn Tableau 2020 for data science step by st...,,0,0,0,0.15683,4.6,70606,262889,$23.99,,"English,French",,"Kirill Eremenko, Ligency Team",
3,Microsoft Power BI - A Complete Introduction [...,https://www.udemy.com/course/powerbi-complete-...,"Business Analytics & Intelligence,Microsoft Po...","Learn how to use Microsoft's Power BI Tools, i...",,0,1,0,0.111617,4.6,50251,180836,$19.99,,"English,French",,"Manuel Lorenz, Academind by Maximilian Schwarz...",
4,Agile Crash Course: Agile Project Management; ...,https://www.udemy.com/course/agile-crash-course/,"Business,Project Management,Agile",Get Agile Certified & Learn about the key and ...,,0,0,0,0.106779,4.4,50258,149096,$29.99,,"English,French",,Mauricio Rubio - Agile Guru & Founder of Agile...,


### Adding a Column for platform identifier

In [None]:
# Add Platform name
# 0 - Edx
# 1 - Udemy

final['Platform'] = [1 for i in range(len(final['Course Name']))]

### Adding a cloumn for language (all English)

In [None]:
final['Language'] = ['English' for i in range(len(final['Course Name']))]

## Combine data 

In [None]:
final

Unnamed: 0,Course Name,Course URL,Categories,Short Description,Long Description,Difficulty,Duration,Free Option,Rating,Original rating,Numberofrated,Numberofenroll,Paid Option,Language,Subtitle Language,Platform,Provider,Image URL
0,The Complete SQL Bootcamp 2021: Go from Zero t...,https://www.udemy.com/course/the-complete-sql-...,"Business,Business Analytics & Intelligence,SQL",Become an expert at SQL!,,0,0,0,0.247850,4.700000,109210,410023,$25.99,English,"English,French",1,Jose Portilla,
1,PMP Exam Prep Seminar - 2021 Exam Content with...,https://www.udemy.com/course/pmp-pmbok6-35-pdus/,"Project Management,PMBOK",PMP Exam Prep Seminar - Earn 35 PDUs by comple...,,0,1,0,0.160255,4.600000,72148,216840,$22.99,English,"English,French",1,Joseph Phillips,
2,Tableau 2020 A-Z: Hands-On Tableau Training fo...,https://www.udemy.com/course/tableau10/,"Business,Business Analytics & Intelligence,Tab...",Learn Tableau 2020 for data science step by st...,,0,0,0,0.156830,4.600000,70606,262889,$23.99,English,"English,French",1,"Kirill Eremenko, Ligency Team",
3,Microsoft Power BI - A Complete Introduction [...,https://www.udemy.com/course/powerbi-complete-...,"Business Analytics & Intelligence,Microsoft Po...","Learn how to use Microsoft's Power BI Tools, i...",,0,1,0,0.111617,4.600000,50251,180836,$19.99,English,"English,French",1,"Manuel Lorenz, Academind by Maximilian Schwarz...",
4,Agile Crash Course: Agile Project Management; ...,https://www.udemy.com/course/agile-crash-course/,"Business,Project Management,Agile",Get Agile Certified & Learn about the key and ...,,0,0,0,0.106779,4.400000,50258,149096,$29.99,English,"English,French",1,Mauricio Rubio - Agile Guru & Founder of Agile...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9981,PMI ACP Agile Certified Practitioner Practice ...,https://www.udemy.com/course/pmi-acp-agile-cer...,"Project Management,PMI-ACP",A practice tests designed to cover all the top...,,0,1,0,0.000000,4.241297,0,0,$14.99,English,English(default),1,Ashutosh Deshmukh,
9982,Failure Mode Effect Analysis for Process Risk ...,https://www.udemy.com/course/fmea-for-process-...,"Business,Industry,Failure Mode and Effects Ana...",Include supplier quality audit in FMEA and FME...,,0,0,0,0.000000,4.241297,0,0,$14.99,English,English,1,Evelyn 7E academy,
9983,Bignners to Pro Course on Financial Modeling a...,https://www.udemy.com/course/bignners-to-pro-c...,"Business,Business Strategy,Financial Modeling",Financial Modeling and Valuation,,0,0,0,0.000000,4.241297,0,1,$14.99,English,English,1,Harsh Barar,
9984,The Real Estate Invoice Contract Download & Guide,https://www.udemy.com/course/the-real-estate-i...,"Business,Real Estate,Real Estate Investing",Download A Copy Of My PROVEN Real Estate Invoi...,,0,0,0,0.000000,4.241297,0,49,$14.99,English,English,1,Ben Clardy,


## Save Output to file

In [None]:
filename = 'p_Udemy_business.csv'
final.to_csv('p_datasets/'+filename, index=False, encoding='utf_8_sig')

## Combine all to one

In [None]:
import os

os.chdir('/content')
dirname = 'p_datasets'
filelist = os.listdir(dirname)
filelist = [file for file in filelist if 'p_' in file]
filelist

['p_Udemy_design.csv',
 'p_Udemy_business.csv',
 'p_Udemy_personalDev.csv',
 'p_Udemy_music.csv',
 'p_Udemy_itNsoftware.csv',
 'p_Udemy_officeProductivity.csv',
 'p_Udemy_lifestyle.csv',
 'p_Udemy_financeNaccounting.csv',
 'p_Udemy_teachNacademic.csv',
 'p_Udemy_marketing.csv',
 'p_Udemy_development.csv',
 'p_Udemy_photoNvideo.csv',
 'p_Udemy_healthNfit.csv']

In [None]:
os.chdir('p_datasets')
fulldataset = pd.DataFrame()
for file in filelist:
  data = pd.read_csv(file)
  fulldataset = pd.concat([fulldataset,data])


In [None]:
fulldataset

Unnamed: 0,Course Name,Course URL,Categories,Short Description,Long Description,Difficulty,Duration,Free Option,Rating,Original rating,Numberofrated,Numberofenroll,Paid Option,Language,Subtitle Language,Platform,Provider,Image URL
0,The Ultimate Drawing Course - Beginner to Adva...,https://www.udemy.com/course/the-ultimate-draw...,"Graphic Design & Illustration,Drawing",Learn the #1 most important building block of ...,,0,1,0,0.200174,4.600000,109305,453470,NT$470,English,"English,French",1,"Jaysen Batchelor, Quinton Ross",
1,Character Art School: Complete Character Drawi...,https://www.udemy.com/course/character-art-sch...,"Design,Other Design,Character Design",Learn How to Draw People and Character Designs...,,0,1,0,0.106542,4.600000,58177,260798,NT$470,English,"English,French",1,Scott Harris,
2,Complete Blender Creator: Learn 3D Modelling f...,https://www.udemy.com/course/blendertutorial/,"Design,3D & Animation,Blender",Use Blender to Create Beautiful 3D models for ...,,0,1,0,0.086477,4.600000,47221,238288,NT$470,English,"English,French",1,"GameDev.tv Team, Rick Davidson, Grant Abbitt",
3,Design Thinking in 3 Steps,https://www.udemy.com/course/designit-design-t...,"Design,User Experience Design,Design Thinking","Understand your audience, envision a creative ...",,0,0,0,0.065123,4.400000,37177,83499,"NT$6,590",English,"English,French",1,"Designit Strategic Design, Alan Cooper",
4,User Experience Design Essentials - Adobe XD U...,https://www.udemy.com/course/ui-ux-web-design-...,"Design,User Experience Design,User Interface","Use XD to get a job in UI Design, User Interfa...",,0,1,0,0.063100,4.600000,34456,136757,NT$470,English,"English,French",1,Daniel Walter Scott,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5609,Limit Sugar Sweetened Beverages to Not More Th...,https://www.udemy.com/course/limit-sugar-sweet...,"Health & Fitness,Nutrition",Recommendation #21 of 30 for Optimizing Health...,,0,0,1,0.000000,4.446485,0,214,0,English,English,1,"Nicholas Cohen, MD",
5610,Limit Processed Foods to Not More Than One Ser...,https://www.udemy.com/course/limit-processed-f...,"Health & Fitness,Nutrition",Recommendation #22 of 31 for Optimizing Health...,,0,0,1,0.000006,4.446485,1,202,0,English,English,1,"Nicholas Cohen, MD",
5611,how to be an expert in the word of bodybuilding,https://www.udemy.com/course/how-to-be-an-expe...,"Health & Fitness,Fitness,Health",The comprehensive guide: prepares you to be an...,,0,0,1,0.000000,4.446485,0,347,0,English,English,1,Anas Idrissi,
5612,Meditation - The Art of Inner Peace and Happin...,https://www.udemy.com/course/meditation-the-ar...,"Health & Fitness,Meditation",This is Part 5 of Meditation - The Art of Inne...,,0,0,1,0.000000,4.446485,0,222,0,English,English,1,Nima King,


In [None]:
len(fulldataset['Course Name'].unique())

79286

In [None]:
fulldataset = fulldataset.drop_duplicates(subset=['Course Name'])

In [None]:
len(fulldataset)

79286

In [None]:
fulldataset.to_csv('fulldataset.csv', index=False, encoding='utf_8_sig')