In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import time

In [2]:
# Save urls
news_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
weather_url = 'https://twitter.com/marswxreport?lang=en'
facts_url = 'https://space-facts.com/mars/'
hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

## NASA Mars News
#### Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. Assign the text to variables called news_title and news_p that you can reference later.

In [3]:
# Open chrome browser
executable_path = {'executable_path':'chromedriver.exe'}
browser = Browser('chrome',**executable_path,headless=False, incognito=True)

# Visit specified url
browser.visit(news_url)

# Save html from browser in object
html = browser.html

# Pass HTML string to bs
news_html = bs(html,'html.parser')

# Close chrome browser
browser.quit()

In [4]:
# Collect information for the latest news article
latest_article = news_html.find('li',class_="slide")
latest_article

<li class="slide"><div class="image_and_description_container"><a href="/news/8568/nasas-treasure-map-for-water-ice-on-mars/" target="_self"><div class="rollover_description"><div class="rollover_description_inner">A new study identifies frozen water just below the Martian surface, where astronauts could easily dig it up.</div><div class="overlay_arrow"><img alt="More" src="/assets/overlay-arrow.png"/></div></div><div class="list_image"><img alt="" src="/system/news_items/list_view_images/8568_PIA23515_annotated-globe-320x240.gif"/></div><div class="bottom_gradient"><div><h3>NASA's Treasure Map for Water Ice on Mars</h3></div></div></a><div class="list_text"><div class="list_date">December 10, 2019</div><div class="content_title"><a href="/news/8568/nasas-treasure-map-for-water-ice-on-mars/" target="_self">NASA's Treasure Map for Water Ice on Mars</a></div><div class="article_teaser_body">A new study identifies frozen water just below the Martian surface, where astronauts could easily 

In [5]:
# Collect the lastest news title
news_title = latest_article.find('div',class_="content_title").text
news_title

"NASA's Treasure Map for Water Ice on Mars"

In [6]:
# Collect the lastest news paragraph text
news_p = latest_article.find('div',class_="article_teaser_body").text
news_p

'A new study identifies frozen water just below the Martian surface, where astronauts could easily dig it up.'

## JPL Mars Space Images - Featured Image
#### Visit the url for JPL Featured Space Image. Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url. Make sure to find the image url to the full size .jpg image. Make sure to save a complete url string for this image.

In [7]:
# Open chrome browser
executable_path = {'executable_path':'chromedriver.exe'}
browser = Browser('chrome',**executable_path,headless=False, incognito=True)

# Visit specified url
browser.visit(image_url)

# Save html from browser in object
html = browser.html

# Pass HTML string to bs
image_html = bs(html,'html.parser')

# Close chrome browser
browser.quit()

In [8]:
# Collect the featured image href
featured_image_href = image_html.find('a',id="full_image")['data-fancybox-href']
featured_image_href

'/spaceimages/images/mediumsize/PIA19180_ip.jpg'

In [9]:
# Save the complete featured image url
featured_image_url = f"https://www.jpl.nasa.gov{featured_image_href}"
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA19180_ip.jpg'

## Mars Weather
#### Visit the Mars Weather twitter account here and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called mars_weather.

In [10]:
# Open chrome browser
executable_path = {'executable_path':'chromedriver.exe'}
browser = Browser('chrome',**executable_path,headless=False, incognito=True)

# Visit specified url
browser.visit(weather_url)

# Save html from browser in object
html = browser.html

# Pass HTML string to bs
weather_html = bs(html,'html.parser')

# Close chrome browser
browser.quit()

In [11]:
# Collect the latest Mars weather tweet
mars_weather = weather_html.find('p',class_="tweet-text").contents[0]
mars_weather

'InSight sol 373 (2019-12-14) low -98.1ºC (-144.6ºF) high -20.1ºC (-4.1ºF)\nwinds from the SW at 4.9 m/s (11.1 mph) gusting to 19.7 m/s (44.0 mph)\npressure at 6.60 hPa'

## Mars Facts
#### Visit the Mars Facts webpage here and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc. Use Pandas to convert the data to a HTML table string.

In [12]:
# Scrape the Mars facts table and save as a df
mars_facts_df = pd.read_html(facts_url)[0]

# Specify column names
mars_facts_df.columns =['Description','Value'] 

# Print df
mars_facts_df

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [13]:
# Convert and save Pandas df to HTML table
mars_facts = mars_facts_df.to_html(index=False,justify='left',classes='table table-striped table-bordered')
mars_facts

'<table border="1" class="dataframe table table-striped table-bordered">\n  <thead>\n    <tr style="text-align: left;">\n      <th>Description</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>

## Mars Hemispheres
#### Visit the USGS Astrogeology site here to obtain high resolution images for each of Mar's hemispheres. You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image. Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title. Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [14]:
# Create list of hemisphere names
hemispheres_list = ['Cerberus Hemisphere Enhanced',
                  'Schiaparelli Hemisphere Enhanced',
                  'Syrtis Major Hemisphere Enhanced',
                  'Valles Marineris Hemisphere Enhanced']

# Create empty list for hemisphere names and urls
hemispheres_name_url = []

In [15]:
# Open chrome browser
executable_path = {'executable_path':'chromedriver.exe'}
browser = Browser('chrome',**executable_path,headless=False, incognito=True)

# Visit specified url
browser.visit(hemispheres_url)

# Loop to save the hemisphere image urls
for hemisphere in hemispheres_list:

    # Navigate to hemisphere image
    browser.click_link_by_partial_text(hemisphere)

    # Save html from browser in object
    html = browser.html

    # Pass HTML string to bs
    hemisphere_html = bs(html,'html.parser')

    # Collect and save hemisphere name
    hemisphere_name = hemisphere_html.find('h2',class_="title").text
    
    # Collect and save image url
    hemisphere_image_src = hemisphere_html.find('img',class_="wide-image")['src']
    hemisphere_image_url = f'https://astrogeology.usgs.gov{hemisphere_image_src}' 
   
    # Save url and name in dictionary
    hemisphere_dict = {"title":hemisphere_name, "img_url":hemisphere_image_url}
    
    # Add dictionary to list created above
    hemispheres_name_url.append(dict(hemisphere_dict))
    
    # Move back through browsing history to return to main page
    browser.back()
        
# Close chrome browser
browser.quit()

# Print list
hemispheres_name_url



[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]

## Save output in a dictionary

In [16]:
scrape_data = {"news_title":news_title,
                 "news_p":news_p,
                 "featured_image_url":featured_image_url,
                 "mars_weather":mars_weather,
                 "mars_facts":mars_facts,
                 "hemispheres_name_url":hemispheres_name_url}
scrape_data

{'news_title': "NASA's Treasure Map for Water Ice on Mars",
 'news_p': 'A new study identifies frozen water just below the Martian surface, where astronauts could easily dig it up.',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA19180_ip.jpg',
 'mars_weather': 'InSight sol 373 (2019-12-14) low -98.1ºC (-144.6ºF) high -20.1ºC (-4.1ºF)\nwinds from the SW at 4.9 m/s (11.1 mph) gusting to 19.7 m/s (44.0 mph)\npressure at 6.60 hPa',
 'mars_facts': '<table border="1" class="dataframe table table-striped table-bordered">\n  <thead>\n    <tr style="text-align: left;">\n      <th>Description</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Dei