In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist

import re
import pandas as pd

In [2]:
# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

/usr/local/bin/chromedriver


In [2]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

## Get latest news article and paragraph

In [3]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url)

In [4]:
#Scape Page
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

grid = soup.find('ul', class_='item_list')

article = grid.find('li', class_='slide')

news_title = article.find('div', class_='content_title').text

news_p = article.find('div', class_='article_teaser_body').text

print('Scrapping Complete!')

print(news_title)

print(news_p)

Scrapping Complete!
Meet the People Behind NASA's Perseverance Rover
These are the scientists and engineers who built NASA's next Mars rover and who will guide it to a safe landing in Jezero Crater. 


## Get featured image from Mars website

In [5]:
# URL of page to be scraped
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [6]:
#Scrape Page
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

browser.click_link_by_partial_text('FULL IMAGE')
#browser.links.find_by_partial_text('FULL IMAGE')



In [7]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

image_relative_path = soup.find('img', class_='fancybox-image')
image_relative_path = image_relative_path['src']

featured_image_url = 'https://www.jpl.nasa.gov/' + image_relative_path

image_relative_path
featured_image_url

'https://www.jpl.nasa.gov//spaceimages/images/mediumsize/PIA17832_ip.jpg'

## Get latest weather from Twitter page

In [8]:
# URL of page to be scraped
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

In [9]:
#Scrape Page
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

mars_weather = soup.find('span', text = re.compile('InSight sol')).text

mars_weather = mars_weather.replace('InSight s', 'S') 

print('Scrapping Complete!')

mars_weather

Scrapping Complete!


'Sol 506 (2020-04-29) low -93.5ºC (-136.2ºF) high -3.7ºC (25.3ºF)\nwinds from the WNW at 4.5 m/s (10.1 mph) gusting to 15.6 m/s (34.8 mph)\npressure at 6.80 hPa'

## Get Mars fact from planets fact Website

In [10]:
# URL of page to be scraped
url = 'https://space-facts.com/mars/'
browser.visit(url)

In [11]:
#Scrape Page
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

mars_fact_table = soup.find('table', class_="tablepress tablepress-id-p-mars")

mars_fact_table = pd.read_html(str(mars_fact_table)) #[0]

print('Scrapping Complete!')

mars_fact_table

Scrapping Complete!


[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

## Get Hemisphere images

## Combine all scrapped data into a dictionary

In [12]:
data_entry = {}

data_entry['news_title'] = news_title
data_entry['news_p'] = news_p
data_entry['featured_image'] = featured_image_url
data_entry['mars_weather'] = mars_weather
data_entry['mars_fact_table'] = mars_fact_table

data_entry

{'news_title': "Meet the People Behind NASA's Perseverance Rover",
 'news_p': "These are the scientists and engineers who built NASA's next Mars rover and who will guide it to a safe landing in Jezero Crater. ",
 'featured_image': 'https://www.jpl.nasa.gov//spaceimages/images/mediumsize/PIA17832_ip.jpg',
 'mars_weather': 'Sol 506 (2020-04-29) low -93.5ºC (-136.2ºF) high -3.7ºC (25.3ºF)\nwinds from the WNW at 4.5 m/s (10.1 mph) gusting to 15.6 m/s (34.8 mph)\npressure at 6.80 hPa',
 'mars_fact_table': [                      0                              1
  0  Equatorial Diameter:                       6,792 km
  1       Polar Diameter:                       6,752 km
  2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
  3                Moons:            2 (Phobos & Deimos)
  4       Orbit Distance:       227,943,824 km (1.38 AU)
  5         Orbit Period:           687 days (1.9 years)
  6  Surface Temperature:                   -87 to -5 °C
  7         First Record:              