# Web Scraping - Mission to Mars

In [2]:
# Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import time

In [3]:
# Set the executable path and initialise the chrome browser
executable_path = {'executable_path': 'C:/temp/chromedriver'}
browser = Browser('chrome', **executable_path)

## NASA Mars News

In [4]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

# Time delay for landing page 1 second 
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)
time.sleep(1)

In [5]:
# Create a Beautiful Soup object
html = browser.html
soup = bs(html, 'html.parser')

In [6]:
# Collect the latest news titles
# News titles are returned as an iterable list
news_titles = soup.find_all('div', class_="content_title")

# Create list to hold news titles
titles = []

# Loop through returned results
for news in news_titles:
        # Error Handling
        try:
            # Identify and return title of news and print results
            title = news.find('a').text
            # print(news.find('a').text)
            
            titles.append(title)
        except:
            pass

# Get the first titles in the titles list
latest_title = titles[0]
print(latest_title)

NASA's Perseverance Rover 22 Days From Mars Landing


In [7]:
# Collect the latest news body text
news_p = soup.find("div", "article_teaser_body").text
print(news_p)

Seven minutes of harrowing descent to the Red Planet is in the not-so-distant future for the agency’s Mars 2020 mission.  


## JPL Mars Space Images

In [8]:
# URL of page to be scraped
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [54]:
# Splinter interacting with browser - click 'full image' 'button'
# full_image = browser.find_by_id('full_image')
# full_image.click()

In [9]:
# Splinter interacting with browser - click 'full image' 'button'
full_image = browser.find_by_css('h2.mb-3')
full_image.click()

In [10]:
# Create a Beautiful Soup object (Scrape page into Soup)
html = browser.html
soup = bs(html, 'html.parser')

In [11]:
# Find the image url
image_url = soup.find('img', class_="BaseImage")['src']
image_url

'https://d2pn8kiwq2w21t.cloudfront.net/images/jpegPIA24378.width-1024.jpg'

## Mars Facts

In [12]:
# Use read_html function in Pandas to scrape tabular data from page
url = 'https://space-facts.com/mars/'
df = pd.read_html(url)[0]
df.head()

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"


In [14]:
# Rename columns
mars_df = df.rename(columns={0: "Facts", 1: "Mars"})

In [15]:
# Set the index
mars_df.set_index("Facts", inplace=True)
mars_df.head()

Unnamed: 0_level_0,Mars
Facts,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"


In [16]:
# Use Pandas to_html method to generate html table from df
html_table = mars_df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n    </tr>\n    <tr>\n      <th>Facts</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tb

In [17]:
# Strip unwanted new lines to clean up the table
html_table.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Mars</th>    </tr>    <tr>      <th>Facts</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

## Mars Hemispheres

In [19]:
# URL of pages to be scraped
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [20]:
# Create the list for the 4 hemispheres 
hemisphere_images = []

# Get a list of the links
links = browser.find_by_css("a.product-item h3")

# Loop through, click link, find sample anchor and get the href
for i in range(len(links)):
    
    hemisphere = {}
    
    # Find the elements on each loop to avoid a stale element exception
    browser.find_by_css("a.product-item h3")[i].click()
    time.sleep(1)
    
    # Find the Sample image anchor tag and get the href
    sample_elem = browser.links.find_by_text('Sample').first
    hemisphere['img_url'] = sample_elem['href']
    
    # Get the title
    hemisphere['title'] = browser.find_by_css("h2.title").text
    
    # Append object to list
    hemisphere_images.append(hemisphere)
    
    # Navigate backwards and complete for remaining hemisphere's
    browser.back()

In [24]:
hemisphere_images

[{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [23]:
browser.quit()