In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as soup
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Set splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# Part 1: Scraping 

## Complete your initial scraping using Jupyter Notebook, BeautifulSoup, Pandas, and Requests/Splinter.

In [3]:
#  Scrape the Mars News Site and collect the latest News Title and Paragraph Text. 
# Assign the text to variables that you can reference later.


# Visit the Mars News Site
url = 'https://redplanetscience.com/'
browser.visit(url)

# Add time delay
browser.is_element_present_by_css('div.list_text', wait_time=1)

True

In [4]:
html = browser.html
news_soup = soup(html, 'html.parser')

In [5]:
#print(news_soup.prettify())

In [6]:
slide_elem = news_soup.select_one('div.list_text')

In [7]:
# Get the news title
news_title = slide_elem.find('div', class_='content_title').get_text()
news_title

'NASA to Reveal Name of Its Next Mars Rover'

In [8]:
# Get the paragraph text
news_p = slide_elem.find('div', class_='article_teaser_body').get_text()
news_p

"After a months-long contest among students to name NASA's newest Mars rover, the agency will reveal the winning name — and the winning student — this Thursday. "

In [9]:
print(f"Title:\n{news_title}")
print("----------------------------------------------------------")
print(f"Paragraph:\n{news_p}")

Title:
NASA to Reveal Name of Its Next Mars Rover
----------------------------------------------------------
Paragraph:
After a months-long contest among students to name NASA's newest Mars rover, the agency will reveal the winning name — and the winning student — this Thursday. 


# JPL Mars Space Images: Featured Image

In [10]:
# Visit the URL for the Featured Space Image 
url = 'https://spaceimages-mars.com'
browser.visit(url)

In [11]:
# Find the image URL for the current Featured Mars Image
full_image_link = browser.find_by_tag('button')[1]
full_image_link.click()

In [12]:
# Parse the resulting html with soup
html = browser.html
img_soup = soup(html,'html.parser')
print(img_soup.prettify())

<html class="fancybox-margin fancybox-lock">
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <link href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" rel="stylesheet"/>
  <!-- <link rel="stylesheet" type="text/css" href="css/font.css"> -->
  <link href="css/app.css" rel="stylesheet" type="text/css"/>
  <link href="https://stackpath.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
  <title>
   Space Image
  </title>
  <style type="text/css">
   .fancybox-margin{margin-right:17px;}
  </style>
 </head>
 <body>
  <div class="header">
   <nav class="navbar navbar-expand-lg">
    <a class="navbar-brand" href="#">
     <img id="logo" src="image/nasa.png"/>
     <span class="logo">
      Jet Propulsion Laboratory
     </span>
     <span class="logo1">
      California Institute of Technology
     </span>
    </a>
    <button aria-controls="navbarNav" aria-expand

In [13]:
# Find the featured image url
img_url_rel = img_soup.find('img', class_='fancybox-image').get('src')
print(f"Featured Image: {img_url_rel}")

Featured Image: image/featured/mars3.jpg


In [14]:
# Assign the URL for featured_image_url.
featured_image_url = f'https://spaceimages-mars.com/{img_url_rel}'
print(f"Featured Image URL: {featured_image_url}")

Featured Image URL: https://spaceimages-mars.com/image/featured/mars3.jpg


# Mars Facts

In [15]:
#Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet including
#diameter, mass, etc.

df = pd.read_html('https://galaxyfacts-mars.com')[0]
df.head()

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"


In [16]:
df.columns=['Description', 'Mars', 'Earth']
df.set_index('Description', inplace=True)
df

Unnamed: 0_level_0,Mars,Earth
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
Mars - Earth Comparison,Mars,Earth
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [17]:
# Use Pandas to convert the data to a HTML table string.
df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <td>Mars</td>\n      <td>Earth</td>\n    </tr>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>

# Mars Hemispheres

In [18]:
# Visit the astrogeology site to obtain high-resolution images for each hemisphere of Mars.
url = 'https://marshemispheres.com/'
browser.visit(url)

In [19]:
# You will need to click each of the links to the hemispheres in order to find the image URL to the full-resolution image.
# Append the dictionary with the image URL string and the hemisphere title to a list. 
# This list will contain one dictionary for each hemisphere.
# Save the image URL string for the full resolution hemisphere image and the hemisphere title containing the hemisphere name. 
# Use a Python dictionary to store the data using the keys img_url and title.

# Create a list to hold the images and titles
hemisphere_image_urls = []

# List of hemishperes 
links = browser.find_by_css('a.product-item img')

# Clink the links and retrieve the image urls and titles
for i in range(len(links)):
    
    hemisphereInfo = {}
    
    browser.find_by_css('a.product-item img')[i].click()
    
    sample = browser.links.find_by_text('Sample').first
    hemisphereInfo["img_url"] = sample['href']
    
    # Title
    hemisphereInfo['title'] = browser.find_by_css('h2.title').text
    
    # Append to list
    hemisphere_image_urls.append(hemisphereInfo)
    
    # Browse back to repeat
    browser.back()

# Quit browser
#browser.quit()

In [20]:
# print hemispere image and title 
print(f"Cerberus Hemisphere Enhanced:\n{hemisphere_image_urls[0]}\n")
print(f"Schiaparelli Hemisphere Enhanced:\n{hemisphere_image_urls[1]}\n")
print(f"Syrtis Major Hemisphere Enhanced:\n{hemisphere_image_urls[2]}\n")
print(f"Valles Marineris Hemisphere Enhanced:\n{hemisphere_image_urls[3]}")

Cerberus Hemisphere Enhanced:
{'img_url': 'https://marshemispheres.com/images/full.jpg', 'title': 'Cerberus Hemisphere Enhanced'}

Schiaparelli Hemisphere Enhanced:
{'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg', 'title': 'Schiaparelli Hemisphere Enhanced'}

Syrtis Major Hemisphere Enhanced:
{'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg', 'title': 'Syrtis Major Hemisphere Enhanced'}

Valles Marineris Hemisphere Enhanced:
{'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg', 'title': 'Valles Marineris Hemisphere Enhanced'}


In [21]:
hemisphere_image_urls

[{'img_url': 'https://marshemispheres.com/images/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]