In [1]:
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
pd.options.display.max_colwidth = 100

In [3]:
file_name = r"C:\Users\eduar\mydevrepo\tools\udacity_free_courses.html"
f = open(file_name, "r", encoding="utf-8")
html_doc = f.read()

In [4]:
soup = BeautifulSoup(html_doc, "html.parser")

In [5]:
levels = set([level.string for level in soup.find_all('span', class_='card_level__2HNxe')])
levels

{None, 'advanced', 'beginner', 'intermediate'}

In [6]:
periods = set([str(period.string).split()[-1] for period in soup.find_all('span', class_='card_duration__1hWII')])
periods

{'Day', 'Days', 'Hour', 'Hours', 'Month', 'Months', 'None', 'Week', 'Weeks'}

In [7]:
affiliates = set([affiliate.string for affiliate in soup.find_all('p', class_='card_affiliatesContent__26Up9')])
all_affiliates = set()

for affiliate in affiliates:
    course_affiliates = [course_affiliate.strip() for course_affiliate in affiliate.split(',')]
    all_affiliates.update(course_affiliates)

# all_affiliates
        

In [8]:
cards = soup.find_all('a', class_='card_container__25DrK')

In [9]:
def get_num_weeks(duration):
    num_period, period = duration.split()
    num_weeks = int(num_period)

    if period.startswith('None'):
        num_weeks = None
    elif period.startswith('Hour'):
        num_weeks = 0
    elif period.startswith('Day'):
        num_weeks = 0
    elif period.startswith('Month'):
        num_weeks *= 4
        
    num_weeks = int(num_weeks)    

    return num_weeks

In [10]:
data = []

for card in cards:

    course_name = card.find('h2', class_='card_title__35G97').string
    course_summary = card.find('p', class_='card_summary__1HlQ7').string
    course_detailContent = card.find('p', class_='card_detailContent__2eJIl')
    if course_detailContent:
        course_detailContent = course_detailContent.string
    course_url = 'https://www.udacity.com' + card['href']

    affiliates = card.find('p', class_='card_affiliatesContent__26Up9')
    if affiliates:
        affiliates = affiliates.string.rstrip()
    else:
        affiliates = ''

    level = card.find('span', class_='card_level__2HNxe').string

    duration = card.find('span', class_='card_duration__1hWII').string

    num_weeks = None

    if duration:
        num_weeks = get_num_weeks(duration)

    course = {
        'affiliates': affiliates,
        'course_name': course_name,
        'level': level,
        'num_weeks': num_weeks,
        'duration': duration,
        'course_url': course_url,
        'course_summary': course_summary,
        'course_detailContent': course_detailContent
    }

    data.append(course)

In [11]:
# online_df = pd.DataFrame(data=data).sort_values(by='num_weeks')
online_df = pd.DataFrame(data=data)
len(online_df)

191

In [12]:
courses_file = 'udacity_free_courses.csv'

In [13]:
def read_local_curses():
    return pd.read_csv(courses_file)

In [14]:
df = read_local_curses()

In [15]:
# new courses
online_df[~online_df['course_name'].isin(df['course_name'])]

Unnamed: 0,affiliates,course_name,level,num_weeks,duration,course_url,course_summary,course_detailContent


In [16]:
# Not listed courses
df[~df['course_name'].isin(online_df['course_name'])]

Unnamed: 0,affiliates,course_name,year,level,num_weeks,duration,completed,course_url,course_summary,course_detailContent
191,Kaggle,Intro to Descriptive Statistics,,beginner,8.0,2 Months,,https://www.udacity.com/course/intro-to-descriptive-statistics--ud827,"Statistics is an important field of math that is used to analyze, interpret, and predict outcome...",
192,Bosch,AIoT Foundations,,intermediate,8.0,2 Months,,https://www.udacity.com/course/aiot-foundations--ud074,Immerse yourself in the full lifecycle of AIoT-enabled products and solutions by exploring AIoT ...,
193,,Intro to Data Science,,intermediate,8.0,2 Months,,https://www.udacity.com/course/intro-to-data-science--ud359,The Introduction to Data Science class will survey the foundational topics in data science,


In [17]:
# year courses
# local_free_courses[local_free_courses['year'].notna()]

In [18]:
# Update file
# df.insert(6, "completed", ['' for i in range(len(df))], True)
# df

In [19]:
# Update date
# course_year = 2021
# df.loc[df['course_name'] == course_name_update, 'year'] = course_year

In [20]:
# Check Course
# course_name_update = "Statistics"
# course_name_update in df['course_name'].to_list()

In [21]:
# Add Course

# affiliates = None
# year = None
# course_url = 'https://www.udacity.com/course/intro-to-data-science--ud359'
# level = 'intermediate' # [None, 'advanced', 'beginner', 'intermediate']
# duration = '2 Months' # ['Day', 'Days', 'Hour', 'Hours', 'Month', 'Months', 'None', 'Week', 'Weeks']
# num_weeks = get_num_weeks(duration)
# course_summary = 'The Introduction to Data Science class will survey the foundational topics in data science'

# new_course = {
#     'affiliates': [affiliates],
#     'year': [year],
#     'course_name': [course_name_update],
#     'level': [level],
#     'num_weeks': [num_weeks],
#     'duration': [duration],
#     'completed': [None],
#     'course_url': [course_url],
#     'course_summary': [course_summary],
#     'course_detailContent': [None]}

# new_df = pd.concat([df, pd.DataFrame(data=new_course)], ignore_index=True)
# new_df.to_csv(courses_file, index=False)
# df = read_local_curses()

In [22]:
# Update completed
# df.loc[df['course_name'] == course_name_update, 'completed'] = 'yes'

In [23]:
mycourses = pd.Series([
    'Introduction to Python Programming',
    'Intro to Deep Learning with PyTorch',
    'Introduction to Machine Learning Course',
    'AWS DeepRacer',
    'AIoT Foundations',
    'Introduction to TensorFlow Lite',
    'Reinforcement Learning',
    'Introduction to Graduate Algorithms',
    'Intro to TensorFlow for Deep Learning',
    'Machine Learning',
    'Intro to Data Science',
    'Data Science Interview Prep',
    'Machine Learning: Unsupervised Learning',
    'AWS Machine Learning Foundations Course'
])
len(mycourses)

14

In [24]:
# mycurses not in local
mycourses[~mycourses.isin(df['course_name'])]

Series([], dtype: object)

In [25]:
def get_course(tag1, tag2=''):
    
    tag1 = tag1.lower()
    tag2 = tag2.lower()

    if tag2 != '':
        new_df = df[
            df['affiliates'].str.lower().str.contains(tag1) | 
            df['affiliates'].str.lower().str.contains(tag2) | 
            df['course_name'].str.lower().str.contains(tag1) | 
            df['course_name'].str.lower().str.contains(tag2) |
            df['course_summary'].str.lower().str.contains(tag1) | 
            df['course_summary'].str.lower().str.contains(tag2) | 
            df['course_detailContent'].str.lower().str.contains(tag1) | 
            df['course_detailContent'].str.lower().str.contains(tag2) 
        ]      
    else:
        new_df = df[
            df['affiliates'].str.lower().str.contains(tag1) | 
            df['course_name'].str.lower().str.contains(tag1) |
            df['course_summary'].str.lower().str.contains(tag1) | 
            df['course_detailContent'].str.lower().str.contains(tag1)]

    return new_df[['affiliates', 'course_name', 'year', 'level', 'num_weeks', 'duration', 'completed', 'course_url']]


In [26]:
get_course('TensorFlow')

Unnamed: 0,affiliates,course_name,year,level,num_weeks,duration,completed,course_url
159,TensorFlow Lite,Introduction to TensorFlow Lite,,intermediate,8.0,2 Months,,https://www.udacity.com/course/intro-to-tensorflow-lite--ud190
160,TensorFlow,Intro to TensorFlow for Deep Learning,,intermediate,8.0,2 Months,,https://www.udacity.com/course/intro-to-tensorflow-for-deep-learning--ud187


In [27]:
get_course('AWS', 'Amazon Web Services')


Unnamed: 0,affiliates,course_name,year,level,num_weeks,duration,completed,course_url
54,AWS DeepRacer,AWS DeepRacer,,intermediate,2.0,2 Weeks,,https://www.udacity.com/course/aws-deepracer--ud014
74,Amazon Web Services,Full Stack Foundations,,intermediate,3.0,3 Weeks,,https://www.udacity.com/course/full-stack-foundations--ud088
148,,AWS Machine Learning Foundations Course,2021.0,intermediate,8.0,2 Months,,https://www.udacity.com/course/aws-machine-learning-foundations--ud065


In [28]:
get_course('Facebook')

Unnamed: 0,affiliates,course_name,year,level,num_weeks,duration,completed,course_url
26,Facebook for Developers,Passwordless Login Solutions for Android,,,1.0,1 Week,,https://www.udacity.com/course/passwordless-login-solutions-for-android--ud357
29,Facebook for Developers,Passwordless Login Solutions for iOS,,intermediate,1.0,1 Week,,https://www.udacity.com/course/passwordless-login-solutions-for-ios--ud1028
57,Facebook for Developers,Mobile Design and Usability for iOS,,intermediate,2.0,2 Weeks,,https://www.udacity.com/course/mobile-design-and-usability-for-ios--ud1034
61,Facebook for Developers,Mobile Design and Usability for Android,,intermediate,2.0,2 Weeks,,https://www.udacity.com/course/mobile-design-and-usability-for-android--ud358
141,Facebook AI,Secure and Private AI,,advanced,8.0,2 Months,,https://www.udacity.com/course/secure-and-private-ai--ud185
153,Facebook,Data Analysis with R,,intermediate,8.0,2 Months,,https://www.udacity.com/course/data-analysis-with-r--ud651
157,Facebook AI,Intro to Deep Learning with PyTorch,,intermediate,8.0,2 Months,,https://www.udacity.com/course/deep-learning-pytorch--ud188


In [29]:
get_course('machine learning')

Unnamed: 0,affiliates,course_name,year,level,num_weeks,duration,completed,course_url
14,Insight,Spark,,intermediate,0.0,10 Hours,,https://www.udacity.com/course/learn-spark-at-udacity--ud2002
21,,Machine Learning Interview Preparation,,intermediate,1.0,1 Week,,https://www.udacity.com/course/machine-learning-interview-prep--ud1001
23,,Data Science Interview Prep,2015.0,intermediate,1.0,1 Week,yes,https://www.udacity.com/course/data-science-interview-prep--ud944
54,AWS DeepRacer,AWS DeepRacer,,intermediate,2.0,2 Weeks,,https://www.udacity.com/course/aws-deepracer--ud014
90,Georgia Institute of Technology,Machine Learning: Unsupervised Learning,,intermediate,4.0,1 Month,,https://www.udacity.com/course/machine-learning-unsupervised-learning--ud741
93,Microsoft Azure,AI Fundamentals,,beginner,4.0,1 Month,,https://www.udacity.com/course/ai-fundamentals--ud099
145,Microsoft Azure,Introduction to Machine Learning using Microsoft Azure,,intermediate,8.0,2 Months,,https://www.udacity.com/course/introduction-to-machine-learning-using-microsoft-azure--ud00333
148,,AWS Machine Learning Foundations Course,2021.0,intermediate,8.0,2 Months,,https://www.udacity.com/course/aws-machine-learning-foundations--ud065
162,,Introduction to Machine Learning Course,,intermediate,10.0,10 Weeks,,https://www.udacity.com/course/intro-to-machine-learning--ud120
169,Georgia Institute of Technology,Artificial Intelligence,,intermediate,16.0,4 Months,,https://www.udacity.com/course/artificial-intelligence--ud954


In [30]:
get_course('deep learning')

Unnamed: 0,affiliates,course_name,year,level,num_weeks,duration,completed,course_url
95,,Intel® Edge AI Fundamentals with OpenVINO™,,intermediate,4.0,1 Month,,https://www.udacity.com/course/intel-edge-AI-fundamentals-with-openvino--ud132
157,Facebook AI,Intro to Deep Learning with PyTorch,,intermediate,8.0,2 Months,,https://www.udacity.com/course/deep-learning-pytorch--ud188
159,TensorFlow Lite,Introduction to TensorFlow Lite,,intermediate,8.0,2 Months,,https://www.udacity.com/course/intro-to-tensorflow-lite--ud190
160,TensorFlow,Intro to TensorFlow for Deep Learning,,intermediate,8.0,2 Months,,https://www.udacity.com/course/intro-to-tensorflow-for-deep-learning--ud187


In [31]:
get_course('data scien')

Unnamed: 0,affiliates,course_name,year,level,num_weeks,duration,completed,course_url
23,,Data Science Interview Prep,2015.0,intermediate,1.0,1 Week,yes,https://www.udacity.com/course/data-science-interview-prep--ud944
151,MongoDB,Data Wrangling with MongoDB,,intermediate,8.0,2 Months,,https://www.udacity.com/course/data-wrangling-with-mongodb--ud032
183,Georgia Institute of Technology,Machine Learning,,intermediate,16.0,4 Months,,https://www.udacity.com/course/machine-learning--ud262
191,Kaggle,Intro to Descriptive Statistics,,beginner,8.0,2 Months,,https://www.udacity.com/course/intro-to-descriptive-statistics--ud827
193,,Intro to Data Science,,intermediate,8.0,2 Months,,https://www.udacity.com/course/intro-to-data-science--ud359


In [32]:
get_course('python')

Unnamed: 0,affiliates,course_name,year,level,num_weeks,duration,completed,course_url
28,,HTTP & Web Servers,,intermediate,1.0,1 Week,,https://www.udacity.com/course/http-web-servers--ud303
65,,Authentication & Authorization: OAuth,,intermediate,2.0,2 Weeks,,https://www.udacity.com/course/authentication-authorization-oauth--ud330
70,,Designing RESTful APIs,,intermediate,3.0,3 Weeks,,https://www.udacity.com/course/designing-restful-apis--ud388
74,Amazon Web Services,Full Stack Foundations,,intermediate,3.0,3 Weeks,,https://www.udacity.com/course/full-stack-foundations--ud088
96,,Intro to Relational Databases,,intermediate,4.0,4 Weeks,,https://www.udacity.com/course/intro-to-relational-databases--ud197
120,,Introduction to Python Programming,2018.0,beginner,5.0,5 Weeks,yes,https://www.udacity.com/course/introduction-to-python--ud1110
121,,Intro to Data Analysis,,beginner,6.0,6 Weeks,,https://www.udacity.com/course/intro-to-data-analysis--ud170
139,,Differential Equations in Action,,intermediate,8.0,2 Months,,https://www.udacity.com/course/differential-equations-in-action--cs222
149,,Design of Computer Programs,,advanced,8.0,2 Months,,https://www.udacity.com/course/design-of-computer-programs--cs212
155,,Software Debugging,,intermediate,8.0,2 Months,,https://www.udacity.com/course/software-debugging--cs259
