In [1]:
from bs4 import BeautifulSoup as bs
import pandas as pd
import pymongo
from pprint import pprint
import requests
from splinter import Browser
import lxml

In [2]:
#MARS NEWS
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [3]:
#Prepare empty list for headlines and paragraphs
news_info=[]

html = browser.html
soup = bs(html, 'lxml')
results = soup.find_all('div', class_='list_text')

#Loop through results to get the headlines and paragraphs
for result in results:
    try:
        news_title = result.a.text
        news_paragraph = result.find('div', class_='article_teaser_body').text
        
        news_info.append({"Headline": news_title,
                          "Paragraph": news_paragraph})

    except Exception as e:
        print(e)

In [4]:
news_info

[{'Headline': 'Curiosity on the Move Again',
  'Paragraph': "NASA's Mars Curiosity rover drove about 197 feet over the weekend to a site called Lake Orcadie, pushing its total odometry to over 12 miles."},
 {'Headline': 'The Mars InSight Landing Site Is Just Plain Perfect',
  'Paragraph': 'If the InSight landing zone were ice cream, it would be vanilla.'},
 {'Headline': "Five Things to Know About InSight's Mars Landing",
  'Paragraph': "NASA engineers will be holding their breath when their spacecraft heads into Mars' atmosphere on Nov. 26."},
 {'Headline': 'NASA Launches a New Podcast to Mars',
  'Paragraph': "NASA's new eight-episode series 'On a Mission' follows the InSight spacecraft on its journey to Mars and details the extraordinary challenges of landing on the Red Planet."},
 {'Headline': 'Update on Opportunity Rover Recovery Efforts',
  'Paragraph': 'After a review of the progress of the listening campaign, NASA will continue its current strategy for attempting to make contact

In [5]:
#MARS IMAGE
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)
html = browser.html
soup = bs(html, 'lxml')

In [6]:
#Define the base image URL of high-res image
base_imgurl = 'https://www.jpl.nasa.gov/spaceimages/images/largesize/'

#Locate the image, strip into components, and get only the 8-digit image name
image_name = soup.find('div', class_='img').find('img')['src']
image_name = image_name.split("/")[-1:][0][0:8]

#Concatenate the image URL components
featured_image_url = base_imgurl + image_name + '_hires.jpg'
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA22801_hires.jpg'

In [7]:
#MARS TWEET
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)
html = browser.html
soup = bs(html, 'lxml')

In [8]:
mars_weather = soup.find('div', class_='js-tweet-text-container').find('p').text
mars_weather

'Sol 2224 (2018-11-08), high 0C/32F, low -72C/-97F, pressure at 8.65 hPa, daylight 06:19-18:36'

In [9]:
#MARS FACTS
url = 'http://space-facts.com/mars/'
tables = pd.read_html(url)

#Specify column titles for fact table
mars_facts = tables[0]
mars_facts.columns = ['Statistic', 'Detail']

#Convert DataFrame to HTML
mars_facts = mars_facts.to_html()
pprint(mars_facts)

('<table border="1" class="dataframe">\n'
 '  <thead>\n'
 '    <tr style="text-align: right;">\n'
 '      <th></th>\n'
 '      <th>Statistic</th>\n'
 '      <th>Detail</th>\n'
 '    </tr>\n'
 '  </thead>\n'
 '  <tbody>\n'
 '    <tr>\n'
 '      <th>0</th>\n'
 '      <td>Equatorial Diameter:</td>\n'
 '      <td>6,792 km</td>\n'
 '    </tr>\n'
 '    <tr>\n'
 '      <th>1</th>\n'
 '      <td>Polar Diameter:</td>\n'
 '      <td>6,752 km</td>\n'
 '    </tr>\n'
 '    <tr>\n'
 '      <th>2</th>\n'
 '      <td>Mass:</td>\n'
 '      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n'
 '    </tr>\n'
 '    <tr>\n'
 '      <th>3</th>\n'
 '      <td>Moons:</td>\n'
 '      <td>2 (Phobos &amp; Deimos)</td>\n'
 '    </tr>\n'
 '    <tr>\n'
 '      <th>4</th>\n'
 '      <td>Orbit Distance:</td>\n'
 '      <td>227,943,824 km (1.52 AU)</td>\n'
 '    </tr>\n'
 '    <tr>\n'
 '      <th>5</th>\n'
 '      <td>Orbit Period:</td>\n'
 '      <td>687 days (1.9 years)</td>\n'
 '    </tr>\n'
 '    <tr>\n'
 '      <th>6</th>\n

In [39]:
#MARS HEMISPHERES
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

In [50]:
browser.visit(url)

#Prepare empty list to store dictionary of image links and titles
mars_hemispheres = []

#Loop through 4 hemispheres
for i in range(4):

    browser.find_by_css("a.product-item h3")[i].click()

    #Get the current HTML page structure
    html = browser.html
    soup = bs(html, 'lxml')
    
    #Identified from the enhanced image, this is the base URL...
    base_url = 'https://astrogeology.usgs.gov'

    #Each hemisphere image location found here...store it in a variable
    hemisphere_image = soup.find_all('img', class_='wide-image')[0]['src']
    image_link = base_url + hemisphere_image
    
    #Store the title in a variable...need to remove ' Enhanced'
    hemisphere_title = soup.find('h2', class_='title').text.replace(' Enhanced', '')
    
    #Append image and title to a dictionary and append to list
    mars_hemispheres.append({'Hemisphere': hemisphere_title,
                             'ImageURL': image_link})
    
    #Back to previous page to loop through other hemispheres.
    browser.click_link_by_text('Back')

In [51]:
mars_hemispheres

[{'Hemisphere': 'Cerberus Hemisphere',
  'ImageURL': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'Hemisphere': 'Schiaparelli Hemisphere',
  'ImageURL': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'Hemisphere': 'Syrtis Major Hemisphere',
  'ImageURL': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'Hemisphere': 'Valles Marineris Hemisphere',
  'ImageURL': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]