# Scrape Datacamp

In [1]:
# import libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [2]:
# display the full text in a cell
pd.set_option('display.max_colwidth', 100)
pd.set_option('display.colheader_justify', 'left')

In [3]:
# turn a string into a clickable URL
def make_clickable(val):
    return '<a href="{}">{}</a>'.format(val,val)

In [4]:
# collect the page 
url = "https://www.datacamp.com/courses/tech:r"
page = requests.get(url)

In [5]:
# parse the page
soup = BeautifulSoup(page.text, 'html.parser')

In [6]:
# find the section with all the courses
all_courses = soup.find(class_='courses__explore-list row')

In [7]:
# get all the sections which contain a class description
all_courses_names = all_courses.find_all('a', class_='course-block__link')

In [8]:
# create a matrix of all the data
data = []

# parse each course for the course specific content
for course in all_courses_names:
    title = course.find_all(class_='course-block__title')[0].get_text()
    description = course.find_all(class_='course-block__description')[0].get_text().strip()
    time = course.find_all(class_='course-block__length')[0].get_text()
    link = course['href']
    data.append([title, description, time, link])

In [9]:
# create a pandas dataframe with all the data
column_names = ['Title', 'Description', 'Time', 'Link']
df_classes = pd.DataFrame(data)
df_classes.columns = column_names

In [10]:
# make the URL link
df_classes['URL'] = 'https://www.datacamp.com' + df_classes.Link

In [11]:
df_classes.drop(['Link'], axis=1, inplace=True)

In [12]:
df_classes.sort_values('Time').head()

Unnamed: 0,Title,Description,Time,URL
3,Importing Data in R (Part 1),"In this course, you will learn to read CSV, XLS, and text files in R using tools like readxl and...",3 hours,https://www.datacamp.com/courses/importing-data-in-r-part-1
40,Working with the RStudio IDE (Part 1),Learn the basics of the important features of the RStudio IDE.,3 hours,https://www.datacamp.com/courses/working-with-the-rstudio-ide-part-1
11,Importing Data in R (Part 2),"Parse data in any format. Whether it's flat files, statistical software, databases, or data righ...",3 hours,https://www.datacamp.com/courses/importing-data-in-r-part-2
62,Working with the RStudio IDE (Part 2),"Further your knowledge of RStudio and learn how to integrate Git, LaTeX, and Shiny",3 hours,https://www.datacamp.com/courses/working-with-the-rstudio-ide-part-2
22,Reporting with R Markdown,Learn to create interactive analyses and automated reports with R Markdown.,3 hours,https://www.datacamp.com/courses/reporting-with-r-markdown


In [13]:
df_classes[df_classes.Title.str.contains('Visual')]

Unnamed: 0,Title,Description,Time,URL
2,Data Visualization with ggplot2 (Part 1),Learn to produce meaningful and beautiful data visualizations with ggplot2 by understanding the ...,5 hours,https://www.datacamp.com/courses/data-visualization-with-ggplot2-1
13,Data Visualization in R,This course provides a comprehensive introduction to working with base graphics in R.,4 hours,https://www.datacamp.com/courses/data-visualization-in-r
21,Data Visualization with ggplot2 (Part 2),"Take your data visualization skills to the next level with coordinates, facets, themes, and best...",5 hours,https://www.datacamp.com/courses/data-visualization-with-ggplot2-2
32,Data Visualization with ggplot2 (Part 3),This course covers some advanced topics including strategies for handling large data sets and sp...,6 hours,https://www.datacamp.com/courses/data-visualization-with-ggplot2-part-3
51,Visualizing Time Series Data in R,"Learn how to visualize time series in R, then practice with a stock-picking case study.",4 hours,https://www.datacamp.com/courses/visualizing-time-series-data-in-r
65,Data Visualization in R with ggvis,"Learn to create interactive graphs to display distributions, relationships, model fits, and more...",4 hours,https://www.datacamp.com/courses/ggvis-data-visualization-r-tutorial
66,Data Visualization in R with lattice,Learn to visualize multivariate datasets using lattice graphics.,4 hours,https://www.datacamp.com/courses/data-visualization-in-r-with-lattice


In [14]:
# df_classes.style.format({'URL': make_clickable})

In [15]:
# df_classes.style.set_properties(**{'text-align': 'left'})