In [1]:
from bs4 import BeautifulSoup
from splinter import Browser
import time


In [2]:
executable_path = {'executable_path': 'C:\downloads\chromedriver\chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# Objective 1: Scrape NASA Mars News site and retrieve the latest news title and paragraph text
news_url = 'https://mars.nasa.gov/news/'
browser.visit(news_url)

In [4]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [5]:
# Pulls all html from the page - lots to sort through
#print(soup.prettify())


In [6]:
# Isolate just the news article section
mars_news = soup.find('div', class_='list_text')
print(mars_news)

<div class="list_text"><div class="list_date">December 14, 2020</div><div class="content_title"><a href="/news/8815/from-jpls-mailroom-to-mars-and-beyond/" target="_self">From JPL's Mailroom to Mars and Beyond</a></div><div class="article_teaser_body">Bill Allen has thrived as the mechanical systems design lead for three Mars rover missions, but he got his start as a teenager sorting letters for the NASA center.</div></div>


In [7]:
# Retrieve the latest entry title and paragraph
news_title = mars_news.find('div', class_='content_title').text
news_p = mars_news.find('div', class_='article_teaser_body').text

print(news_title)
print(news_p)

From JPL's Mailroom to Mars and Beyond
Bill Allen has thrived as the mechanical systems design lead for three Mars rover missions, but he got his start as a teenager sorting letters for the NASA center.


In [8]:
# Objective 2: Scrape JPL Mars Space Images to retrieve the featured image
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(jpl_url)

In [10]:
jpl = browser.html
jpl_soup = BeautifulSoup(jpl, 'html.parser')

In [11]:
#print(jpl_soup.prettify())

In [12]:
# Isolate the featured image section at the top of the page
feat_img_section = jpl_soup.find('div', class_='carousel_container')
print(feat_img_section)

<div class="carousel_container">
<div class="carousel_items">
<article alt="The Day the Earth Smiled: Sneak Preview" class="carousel_item" style="background-image: url('/spaceimages/images/wallpaper/PIA17171-1920x1200.jpg');">
<div class="default floating_text_area ms-layer">
<h2 class="category_title">
</h2>
<h2 class="brand_title">
				  FEATURED IMAGE
				</h2>
<h1 class="media_feature_title">
				  The Day the Earth Smiled: Sneak Preview				</h1>
<div class="description">
</div>
<footer>
<a class="button fancybox" data-description="In this rare image taken on July 19, 2013, the wide-angle camera on NASA's Cassini spacecraft has captured Saturn's rings and our planet Earth and its moon in the same frame." data-fancybox-group="images" data-fancybox-href="/spaceimages/images/mediumsize/PIA17171_ip.jpg" data-link="/spaceimages/details.php?id=PIA17171" data-title="The Day the Earth Smiled: Sneak Preview" id="full_image">
					FULL IMAGE
				  </a>
</footer>
</div>
<div class="gradient_c

In [14]:
# Isolate the high-res image url
jpl_img_url = feat_img_section.find('article', class_='carousel_item')["style"].\
    replace("background-image: url('", "").\
    replace("');", "")
print(jpl_img_url)

/spaceimages/images/wallpaper/PIA17171-1920x1200.jpg


In [15]:
jpl_base_url = 'https://www.jpl.nasa.gov'
jpl_final_url = jpl_base_url + jpl_img_url

In [16]:
import pandas as pd

In [17]:
# Objective 3: Scrape the table using pandas on space-facts.com/mars
mars_facts_url = 'https://space-facts.com/mars/'

In [18]:
# Used the pandas scraping method from class activities
mars_facts = pd.read_html(mars_facts_url)
mars_facts

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [19]:
# Put the facts table from the top of the page (first table found) into a DF
mars_facts_df = mars_facts[0]
mars_facts_df.columns = ["Measurement", "Mars"]
mars_facts_df.set_index('Measurement', inplace=True)
mars_facts_df

Unnamed: 0_level_0,Mars
Measurement,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [20]:
# Also put the Mars/Earth comparison table into a DF to practice
mars_earth_comp_df = mars_facts[1]
mars_earth_comp_df

Unnamed: 0,Mars - Earth Comparison,Mars,Earth
0,Diameter:,"6,779 km","12,742 km"
1,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
2,Moons:,2,1
3,Distance from Sun:,"227,943,824 km","149,598,262 km"
4,Length of Year:,687 Earth days,365.24 days
5,Temperature:,-87 to -5 °C,-88 to 58°C


In [21]:
# Convert the mars_facts_df to an html table string
print(mars_facts_df.to_html())

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Mars</th>
    </tr>
    <tr>
      <th>Measurement</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


In [22]:
facts_html = mars_facts_df.to_html()

In [23]:
# Objective 4: Get the high-res Mars Hemisphere images from USGS astrogeology page
hemis_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemis_url)


In [24]:
hemis = browser.html
hemi_soup = BeautifulSoup(hemis, 'html.parser')

In [25]:
#print(hemi_soup.prettify())

In [26]:
hemi_sections = hemi_soup.find_all('div', class_='item')
print(hemi_sections)

[<div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a><div class="description"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><h3>Cerberus Hemisphere Enhanced</h3></a><span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/><p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p></div> <!-- end description --></div>, <div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/08eac6e22c07fb1fe72223a79252de20_schiapar

In [27]:
hemi_img_urls = []

base_url = 'https://astrogeology.usgs.gov/'

for hemi in hemi_sections:
    #capture the title
    hemi_title = hemi.find('h3').text
    
    #capture the url for the full-size images and add it to the base url
    hemi_url = hemi.find('a', class_='itemLink product-item')['href']
    comb_url = base_url + hemi_url
    
    # Go to the download page to get the high-res image link
    browser.visit(comb_url)
    
    # Use bs4 to get the html
    img_pg_html = browser.html
    img_pg_soup = BeautifulSoup(img_pg_html, 'html.parser')
    
    time.sleep(2)
    # Isolate the image download link section at the top
    link_section = img_pg_soup.find('div', class_='downloads')
    img_url = link_section.find('a')['href']
    
    #Add the dictionaries to the list
    hemi_img_urls.append({'title': hemi_title, 'img_url': img_url})
    
# Print the img urls dict
for hemi in hemi_img_urls:
    print(hemi)
    
    

{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}
{'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}
{'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}
{'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}
