In [1]:
# Dependencies and Setup
from bs4 import BeautifulSoup
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import requests
import pandas as pd
import time

executable_path = {"executable_path": ChromeDriverManager().install()}

# Running "headless" mode; change to False for browser GUI display
browser = Browser("chrome", **executable_path, headless=True)



Current google-chrome version is 90.0.4430
Get LATEST driver version for 90.0.4430
Driver [C:\Users\pvpch\.wdm\drivers\chromedriver\win32\90.0.4430.24\chromedriver.exe] found in cache


### NASA Mars News

In [2]:
# URL of page to be scraped
url = "https://mars.nasa.gov/news/"

# Retrieving page with the requests module
response = requests.get(url)

# Creating BeautifulSoup object; parse with "html.parser"
soup = BeautifulSoup(response.text, "html.parser")

# Storing results
results = soup.find("div", class_="slide")

# Retrieving news URL
news_partial_url = results.find("div", class_="content_title").a["href"].replace("/news","")
news_url = url + news_partial_url

# Storing title
news_title = results.find("div", class_="content_title").text.strip()

#Storing paragraph text
news_p = results.find("div", class_="rollover_description_inner").text.strip()

In [3]:
print(news_url)

https://mars.nasa.gov/news//8936/nasas-ingenuity-helicopter-to-begin-new-demonstration-phase/


In [4]:
print(news_title)

NASA's Ingenuity Helicopter to Begin New Demonstration Phase


In [5]:
print(news_p)

The Red Planet rotorcraft will shift focus from proving flight is possible on Mars to demonstrating flight operations that future aerial craft could utilize.


### JPL Mars Space Images - Featured Image

In [6]:
# URL of page to be scraped
url2 = "https://www.jpl.nasa.gov/images?search=&category=Mars"

# Accessing page using Splinter
browser.visit(url2)

# Navigating the page to find the title and image URL for the current "Featured Mars Image", storing both
browser.links.find_by_partial_href("images").click()
time.sleep(2)
featured_image_title = browser.find_by_css("h1").text
browser.links.find_by_partial_href("original_images").click()
featured_image_url = browser.url

In [7]:
print(featured_image_title)

Baldet Crater - False Color


In [8]:
print(featured_image_url)

https://d2pn8kiwq2w21t.cloudfront.net/original_images/jpegPIA24633.jpg


### Mars Facts

In [9]:
# URL of page to be scraped
url3 = "https://space-facts.com/mars/"

# Scraping the facts table using pandas
tables = pd.read_html(url3)

# Converting table to formatted DataFrame
df = tables[0].rename(columns={0: "Description", 1: "Mars"}).set_index("Description")

# Converting the DataFrame back to an HTML table
html_table = df.to_html()

In [10]:
df

Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [11]:
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n

### Mars Hemispheres

In [12]:
# URL of page to be scraped
url4 = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

# Accessing page using Splinter
browser.visit(url4)

# Creating HTML object
html = browser.html

# Creating BeautifulSoup object; parse with "html.parser"
soup = BeautifulSoup(html, "html.parser")

# Scraping the page for the div items for each hempisphere
items = soup.find_all("div", class_="item")

# Creating an empty list for storing subsequent results
hemisphere_image_urls = []

# Looping through items for the relevant data
for item in range(len(items)):

    # Navigating the page to find the links to the standalone page for each hemisphere
    link = browser.find_by_css("h3")
    link[item].click()

    # Locating and storing the formatted hemisphere name
    img = browser.find_by_css("h2").text
    title = img.replace(" Enhanced","")
    
    # Locating and storing the URL for the full-size image
    img_url = browser.links.find_by_partial_href("tif/full")["href"]
    
    # Creating dictionary with title/URL pairs and appending to list
    hemisphere_image_urls.append({"title": title, "img_url": img_url})
    
    # Navigating back one page before restarting loop
    browser.back()
    
# Quitting browser session  
browser.quit()

In [13]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]