# Mission To Mars

In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

In [2]:
# set executable path to NASA Mars News
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 91.0.4472
[WDM] - Get LATEST driver version for 91.0.4472
[WDM] - Driver [C:\Users\Edgar\.wdm\drivers\chromedriver\win32\91.0.4472.101\chromedriver.exe] found in cache






In [3]:
# Visit the mars nasa news site
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
# Optional delay for loading the page
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)

True

In [4]:
html = browser.html
news_soup = soup(html, 'html.parser')
slide_elem = news_soup.select_one('ul.item_list li.slide')

In [5]:
# we have our text/code in slide_elem---we will look for <div> element and class=content_title in this variable

slide_elem.find("div", class_='content_title')   # output is an html containing content title inside <div. element

<div class="content_title"><a href="/news/8980/nasas-self-driving-perseverance-mars-rover-takes-the-wheel/" target="_self">NASA's Self-Driving Perseverance Mars Rover 'Takes the Wheel'</a></div>

In [6]:
# the title is somewhere in the html code... thats all we need not the html stuff
# first we will find <a> tag and save it in an object ---news_title

# Use the parent element to find the first `a` tag and save it as `news_title`
news_title = slide_elem.find("div", class_='content_title').get_text()
news_title


"NASA's Self-Driving Perseverance Mars Rover 'Takes the Wheel'"

In [7]:
# now lets look for the summary (teser) using find
# when we find 'teser' from DevTools we get the first summary/teser----article_teaser_body

news_p = slide_elem.find('div', class_="article_teaser_body").get_text()
news_p

'The agency’s newest rover is trekking across the Martian landscape using a newly enhanced auto-navigation system.'

# Featured Images
### after scraping text, lets now add images

In [8]:
# when we open the NASA page, the first image we see is the Featured Image----we want the fullblown image (use its link for url)
# we will first visit the webpage/url---a new page opens with full blown image

# Visit URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

## Using buttons

In [9]:
# Find and click the full image button
full_image_elem = browser.find_by_tag('button')[1]
full_image_elem.click()

https://spaceimages-mars.com/image/featured/mars3.jpg

In [10]:
# parse resulting html with soup
html = browser.html

img_soup = soup(html, 'html.parser')

In [11]:
# we now need the relative image url
img_url_rel = img_soup.select_one('img',class_='fancybox-image').get('src')

img_url_rel

'/_nuxt/img/logo-tribrand-color.8f95c3b.svg'

In [12]:
# use base URL to create absolute URL
img_url = f'https://www.jpl.nasa.gov{img_url_rel}'
img_url

'https://www.jpl.nasa.gov/_nuxt/img/logo-tribrand-color.8f95c3b.svg'

## Getting Mars Facts
### We will be scrapping table content
### A table is made up of smaller containers (tbody) nested in the table tag
###  tr is the tag for each table---table data is stored in td tags----these esablish the columns

In [13]:
# instead of scraping each row, we read the html <td> tags using pythons panda

df = pd.read_html('https://galaxyfacts-mars.com')[0]  # this is the mars facts webpage (0 - tells panda to pull first table)

df.columns=['description', 'Mars', 'Earth']

df.set_index('description', inplace=True)

df

Unnamed: 0_level_0,Mars,Earth
description,Unnamed: 1_level_1,Unnamed: 2_level_1
Mars - Earth Comparison,Mars,Earth
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [14]:
# we will need to add this clean table into a webpage--any changes on the table will be reflected in the webpage
# we will have to turn it back to html using .to_hmtl
df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <td>Mars</td>\n      <td>Earth</td>\n    </tr>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>

## Shut down the  browser
#### Browser should always be shut else it will continue using computer resources

In [15]:
# to shut down the browser
browser.quit()

## Mission to Mars Challenge using Starter Code

In [16]:
# Import Splinter, BeautifulSoup, and Pandas
from splinter import Browser
from bs4 import BeautifulSoup as soup
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager

In [17]:

# set executable path to NASA Mars News
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 91.0.4472
[WDM] - Get LATEST driver version for 91.0.4472
[WDM] - Driver [C:\Users\Edgar\.wdm\drivers\chromedriver\win32\91.0.4472.101\chromedriver.exe] found in cache






### Visiting NASA News Site

In [18]:
# Visit the mars nasa news site
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

# Optional delay for loading the page
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)

True

In [19]:
# Convert the browser html to a soup object and then quit the browser
html = browser.html
news_soup = soup(html, 'html.parser')

slide_elem = news_soup.select_one('ul.item_list li.slide')

In [20]:
slide_elem.find("div", class_='content_title')

<div class="content_title"><a href="/news/8980/nasas-self-driving-perseverance-mars-rover-takes-the-wheel/" target="_self">NASA's Self-Driving Perseverance Mars Rover 'Takes the Wheel'</a></div>

In [21]:
# Use the parent element to find the first a tag and save it as `news_title`
news_title = slide_elem.find("div", class_='content_title').get_text()
news_title

"NASA's Self-Driving Perseverance Mars Rover 'Takes the Wheel'"

In [22]:
# Use the parent element to find the paragraph text
news_p = slide_elem.find('div', class_="article_teaser_body").get_text()
news_p

'The agency’s newest rover is trekking across the Martian landscape using a newly enhanced auto-navigation system.'

### JPL Space Featured Image

In [23]:
# Visit URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [24]:
# Find and click the full image button
full_image_elem = browser.find_by_tag('button')[1]
full_image_elem.click()

In [25]:
# Parse the resulting html with soup
html = browser.html
img_soup = soup(html, 'html.parser')

In [26]:
# we now need the relative image url
img_url_rel = img_soup.select_one('img',class_='fancybox-image').get('src')

img_url_rel

'/_nuxt/img/logo-tribrand-color.8f95c3b.svg'

In [27]:
# Use the base url to create an absolute url
img_url = f'https://www.jpl.nasa.gov{img_url_rel}'
img_url

'https://www.jpl.nasa.gov/_nuxt/img/logo-tribrand-color.8f95c3b.svg'

## Mars Facts

In [28]:
df = pd.read_html('http://space-facts.com/mars/')[0]

df.head()

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"


In [29]:
df.columns=['Description', 'Mars']
df.set_index('Description', inplace=True)
df

Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [30]:
# convert the table data back to html code so we can use it in the web
df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n

In [31]:
# Parse the data
html = browser.html
weather_soup = soup(html, 'html.parser')

In [32]:
# Scrape the Daily Weather Report table
weather_table = weather_soup.find('table', class_='mb_table')
print(weather_table)

None


## Scrape High Resolution Hemisphere Images and Titles

In [33]:
# 1. Use browser to visit the URL 
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [34]:
# 2. Create a list to hold the images and titles.
hemisphere_image_urls = []

# 3. Write code to retrieve the image urls and titles for each hemisphere.
for i in range(4):
    #create empty dictionary
    hemispheres = {}
    browser.find_by_css('a.product-item h3')[i].click()
    element = browser.find_by_text('Sample').first
    img_url = element['href']
    title = browser.find_by_css("h2.title").text
    hemispheres["img_url"] = img_url
    hemispheres["title"] = title
    hemisphere_image_urls.append(hemispheres)
    browser.back()

In [35]:
# 4. Print the list that holds the dictionary of each image url and title.
hemisphere_image_urls

[{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [36]:
# 5. Quit the browser
browser.quit()