# Mission to Mars

In [1]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Set executable path and initialise Chromium - Linux Specific
executable_path = {'executable_path': '/usr/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [3]:
# URL to page of scraped
news_url = 'https://redplanetscience.com/'

# Visit page with browser
browser.visit(news_url)

In [4]:
# Create BeautifulSoup object & parse with lxml
soup = BeautifulSoup(browser.html, 'lxml')
soup

<html><head>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<link crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta1/dist/css/bootstrap.min.css" integrity="sha384-giJF6kkoqNQ00vy+HMDP7azOuL0xtbfIcaT9wjKHr8RbDVddVHyTfAAsrekwKmP1" rel="stylesheet"/>
<link href="css/font.css" rel="stylesheet" type="text/css"/>
<link href="css/app.css" rel="stylesheet" type="text/css"/>
<link crossorigin="anonymous" href="https://pro.fontawesome.com/releases/v5.10.0/css/all.css" integrity="sha384-AYmEC3Yw5cVb3ZcuHtOA93w35dYTsvhLPVnYs9eStHfGJvOvKxVfELGroGkvsg+p" rel="stylesheet"/>
<title>News - Mars Exploration Program</title>
</head>
<body>
<div class="col-md-12">
<div class="row">
<nav class="navbar navbar-expand-lg navbar-light fixed-top">
<div class="container-fluid">
<a class="navbar-brand" href="#">
<img src="image/nasa.png" width="80"/><span class="logo">MARS Planet Science</span>
<span class="logo1">Exploration Program</spa

In [5]:
# Retrieve the parent div for latest article
parent_div = soup.find('div', class_='list_text')
parent_div

<div class="list_text">
<div class="list_date">March 19, 2022</div>
<div class="content_title">A New Video Captures the Science of NASA's Perseverance Mars Rover</div>
<div class="article_teaser_body">With a targeted launch date of July 30, the next robotic scientist NASA is sending to the to the Red Planet has big ambitions.</div>
</div>

In [6]:
# Scrape results for Latest News Article & Paragraph Text
## Assign the text to variables to reference later
news_title = parent_div.find('div', class_='content_title').get_text()
news_p = parent_div.find('div', class_='article_teaser_body').get_text()

# Test text variables
print(news_title)
print('---')
print(news_p)

A New Video Captures the Science of NASA's Perseverance Mars Rover
---
With a targeted launch date of July 30, the next robotic scientist NASA is sending to the to the Red Planet has big ambitions.


### JPL Mars Space Images - Featured Image

In [7]:
# URL to page of scraped
JPL_url = 'https://spaceimages-mars.com/'

# Visit page with browser
browser.visit(JPL_url)

In [8]:
# Find button to full size .jpg and click it
full_image = browser.find_by_xpath('/html/body/div[1]/div/a')
full_image.click()

In [9]:
# Use soup to find image src & save it to variable
img_soup = BeautifulSoup(browser.html, 'lxml')
image_url = img_soup.find('img', class_='fancybox-image').get('src')

# Base URL & img URL to create complete URL
featured_image_url = JPL_url + image_url

# Test variable for correct img url
featured_image_url

'https://spaceimages-mars.com/image/featured/mars3.jpg'

### Mars Facts

In [10]:
# Use Pandas to scrape the table of planet facts
mars_df = pd.read_html('https://galaxyfacts-mars.com/')[1]
mars_df.columns=['Descriptor', 'Value']
mars_df

Unnamed: 0,Descriptor,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


### Mars Hemispheres

In [11]:
# Visit Astrogeology website where hi-res images of Mars' hemispheres are stored
hemisphere_url = 'https://marshemispheres.com/'

# Visit page with browser
browser.visit(hemisphere_url)

In [12]:
# List for storing hemisphere dictionaries
hemisphere_image_urls = []

# Find and count all images
images = browser.find_by_css('a.product-item h3')

# Loop through images, minus one as last a.product-item h3 is not relevant
for image in range(len(images) - 1):
    # Empty dictionary on each loop
    hemisphere = {}
    # Click the right image on each loop
    browser.find_by_css('a.product-item h3')[image].click()
    # Extract the necessary elements and add to dictionary
    hemisphere['img_url'] = browser.find_by_text('Sample').first['href']
    hemisphere['title'] = browser.find_by_css('h2.title').text
    # Append dictionary to list
    hemisphere_image_urls.append(hemisphere)
    # Reset page for loop
    browser.back()

In [13]:
hemisphere_image_urls

[{'img_url': 'https://marshemispheres.com/images/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [14]:
# Quit Browser
browser.quit()