# Scrape Missions to Mars:

## Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.
* URL: https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest

## Dependencies

In [35]:
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd

In [36]:
# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

/usr/local/bin/chromedriver


In [37]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [31]:
# Activate URL in browser
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url)

In [32]:
# Extract HTML
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [33]:
# Latest new title:
news_title = soup.find_all(class_ = "content_title")
news_title = news_title[1].text

print(news_title)

Virginia Middle School Student Earns Honor of Naming NASA's Next Mars Rover


In [34]:
# Latest new paragraph:
news_p = soup.find_all(class_ = "article_teaser_body")
news_p = news_p[0].text

print(news_p)

NASA chose a seventh-grader from Virginia as winner of the agency's "Name the Rover" essay contest. Alexander Mather's entry for "Perseverance" was voted tops among 28,000 entries. 


### JPL Mars Space Images - Featured Image

In [8]:
# Activate URL in browser
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [9]:
# Extract HTML
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [10]:
# Latest Space Image:
image = soup.find(class_ = 'button fancybox').get_attribute_list('data-fancybox-href')

image = image[0]

image

'/spaceimages/images/mediumsize/PIA17470_ip.jpg'

In [11]:
# Lastest space image URL: 
base_url = 'https://www.jpl.nasa.gov'

featured_image_url = base_url + image

featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17470_ip.jpg'

### Mars Weather

In [44]:
# Activate URL in browser
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

In [45]:
# Extract HTML
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [46]:
# Latest Weather:
weather = soup.find_all(class_ = "css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0")

weather = weather[27].text

weather = weather.replace('\n', ' ')

weather = weather.replace('InSight ', '')

weather

'sol 455 (2020-03-08) low -95.4ºC (-139.8ºF) high -13.0ºC (8.5ºF) winds from the SSE at 6.0 m/s (13.5 mph) gusting to 20.7 m/s (46.2 mph) pressure at 6.40 hPa'

### Mars Facts

In [16]:
# Activate URL in browser
url = 'https://space-facts.com/mars/'
browser.visit(url)

In [17]:
# Extract HTML
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [18]:
# Facts:
facts = soup.find_all(class_ ="tablepress tablepress-id-p-mars")
description = []
value = []

for fact in facts[0].tbody.find_all("tr"):
    description.append(fact.find_all("td")[0].text)
    value.append(fact.find_all("td")[1].text)

# Results:
# Data Frame
Facts_DF = pd.DataFrame({"Description": description,
                         "Value": value
                        })

Facts_DF

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [19]:
# Facts to HTML:
Facts_HTML = Facts_DF.to_html(index = False)
Facts_HTML

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th>Description</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

### Mars Hemispheres

In [20]:
# Activate URL in browser
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [21]:
# Extract HTML
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [22]:
# Hemispheres:
hemispheres = soup.find(class_ = "collapsible results")
List = []
for hemisphere in hemispheres:
    List.append(hemisphere.find("a"))

# Extract href's:
urls = []
for element in List[1:5]:
    urls.append(element.get_attribute_list('href')[0])

urls

['/search/map/Mars/Viking/cerberus_enhanced',
 '/search/map/Mars/Viking/schiaparelli_enhanced',
 '/search/map/Mars/Viking/syrtis_major_enhanced',
 '/search/map/Mars/Viking/valles_marineris_enhanced']

Extract imagenes:

- 1 de 4

In [23]:
# Activate Browser
url1 = 'https://astrogeology.usgs.gov'
url = url1 + urls[0]
browser.visit(url)

# Extract elements from HTML
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

hem_title_1 = soup.find(class_ = "title").text

hem_img_1 = soup.find(target = "_blank").get_attribute_list('href')

- 2 de 4

In [24]:
# Activate Browser
url1 = 'https://astrogeology.usgs.gov'
url = url1 + urls[1]
browser.visit(url)

# Extract elements from HTML
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

hem_title_2 = soup.find(class_ = "title").text

hem_img_2 = soup.find(target = "_blank").get_attribute_list('href')

- 3 de 4

In [25]:
# Activate Browser
url1 = 'https://astrogeology.usgs.gov'
url = url1 + urls[2]
browser.visit(url)

# Extract elements from HTML
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

hem_title_3 = soup.find(class_ = "title").text

hem_img_3 = soup.find(target = "_blank").get_attribute_list('href')

- 4 de 4

In [26]:
# Activate Browser
url1 = 'https://astrogeology.usgs.gov'
url = url1 + urls[3]
browser.visit(url)

# Extract elements from HTML
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

hem_title_4 = soup.find(class_ = "title").text

hem_img_4 = soup.find(target = "_blank").get_attribute_list('href')

Final dictionary of lists

In [27]:
hemisphere_image_urls = [
    {"title": hem_title_1, "img_url": hem_img_1[0]},
    {"title": hem_title_2, "img_url": hem_img_2[0]},
    {"title": hem_title_3, "img_url": hem_img_3[0]},
    {"title": hem_title_4, "img_url": hem_img_4[0]}
]
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]