In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
import pandas as pd

In [4]:
# Visit the Nasa site 
news_url = 'https://mars.nasa.gov/news/'

# Retrieve page with the requests module
response = requests.get(news_url)

# Create BeautifulSoup object; parse with 'lxml'
soup = bs(response.text, 'html.parser')

In [5]:
# Extract the title of the news article
title = soup.find('div', class_="content_title").text.strip()
title

"Alabama High School Student Names NASA's Mars Helicopter"

In [6]:
# Extract the teaser paragraph about the news article
paragraph = soup.find('div', class_="image_and_description_container").text.strip()
paragraph

"Vaneeza Rupani's essay was chosen as the name for the small spacecraft, which will mark NASA's first attempt at powered flight on another planet."

In [7]:
# open the chrome driver
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [8]:
# visit the Nasa Images site
nasa_images_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(nasa_images_url)

In [9]:
# Extract the url of the featured image
image_html = browser.html
soup = bs(image_html, 'html.parser')

article = soup.find('a', class_='button fancybox')
href = article['data-fancybox-href']
featured_image_url = "https://www.jpl.nasa.gov" + href
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA12826_ip.jpg'

In [12]:
# Visit the Mars Weather Twitter page
weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(weather_url)

In [13]:
# Extract the current weather on Mars
weather_html = browser.html
soup = bs(weather_html, 'html.parser')

mars_weather = soup.find('div', class_='css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0').text.strip()
mars_weather

'InSight sol 539 (2020-06-02) low -91.9ºC (-133.4ºF) high -4.5ºC (23.9ºF)\nwinds from the WNW at 6.6 m/s (14.7 mph) gusting to 21.1 m/s (47.1 mph)\npressure at 7.30 hPa'

In [14]:
# Visit the Space Facts page about Mars
facts_url = 'https://space-facts.com/mars/'
browser.visit(facts_url)

In [15]:
# Extract the Mars Facts table as a Pandas dataframe
table = pd.read_html(facts_url)
profile = table[0]
profile_df = profile.rename(columns={0: 'Description', 1: 'Value'})
profile_df

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [16]:
# Visit the USGS Astrogeology site
hemisphere_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemisphere_url)

In [17]:
# Extract the name of each of Mars's hemispheres and the url of the image of that hemisphere, then insert into MongoDB
hemisphere_html = browser.html
soup = bs(hemisphere_html, 'html.parser')

results = soup.find_all('div', class_="item")

hemisphere_image_urls = []

for result in results: 
    heading = result.find('h3').text.replace('Enhanced', '')
    link = result.find('a')['href']
    url = "https://astrogeology.usgs.gov" + link
    browser.visit(url)
    image_html = browser.html
    soup = bs(image_html, 'html.parser')
    img_url = soup.find('div', class_="downloads").find('a')['href']
    print(heading)
    print(img_url)
    hemisphere = {
        'title': heading,
        'img_url': img_url
    }
    hemisphere_image_urls.append(hemisphere)

Cerberus Hemisphere 
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
Schiaparelli Hemisphere 
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
Syrtis Major Hemisphere 
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
Valles Marineris Hemisphere 
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


In [20]:
mars_data = {
    "news_title": title,
    "news_paragraph": paragraph,
    "featured_image": featured_image_url,
    "mars_weather": mars_weather,
    "mars_facts": profile_df,
    "hemisphere_image_urls": hemisphere_image_urls
}
mars_data

{'news_title': "Alabama High School Student Names NASA's Mars Helicopter",
 'news_paragraph': "Vaneeza Rupani's essay was chosen as the name for the small spacecraft, which will mark NASA's first attempt at powered flight on another planet.",
 'featured_image': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA12826_ip.jpg',
 'mars_weather': 'InSight sol 539 (2020-06-02) low -91.9ºC (-133.4ºF) high -4.5ºC (23.9ºF)\nwinds from the WNW at 6.6 m/s (14.7 mph) gusting to 21.1 m/s (47.1 mph)\npressure at 7.30 hPa',
 'mars_facts':             Description                          Value
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         Fir