# Mission to Mars - web scraping 

In [1]:
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import requests
import time

In [2]:
# Connect to the Chrome browser

executable_path = {"executable_path": "C:/Users/cindy/Downloads/chromedriver_win32/chromedriver.exe"}
browser = Browser("chrome", **executable_path, headless=False)


# NASA Mars News

In [25]:
url = "https://mars.nasa.gov/news/"
browser.visit(url)
html = browser.html

soup = BeautifulSoup(html, "html.parser")
# print(soup.prettify())

# find most recent news title from mars.nasa.gov

news_title = soup.find_all('div', class_='content_title')[1].text
print(news_title)

How NASA's Perseverance Mars Team Adjusted to Work in the Time of Coronavirus 


In [26]:
# find the paragraph for the most recent news title from mars.nasa.gov

news_p = soup.find('div', class_='article_teaser_body').text    
print(news_p)

Like much of the rest of the world, the Mars rover team is pushing forward with its mission-critical work while putting the health and safety of their colleagues and community first.


# JPL Mars Space Images - Featured Image

In [27]:
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url)
html = browser.html

# click the button to get to the page with the full image, wait 5 seconds for the page to load and click next button

browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(5)

browser.click_link_by_partial_text('more info')



In [28]:
html = browser.html
soup_jpl = BeautifulSoup(html, "html.parser")
# print(soup_jpl.prettify())

In [29]:
featured_url_image1 = soup_jpl.find('figure', class_='lede')
# print(featured_url_image1)

In [30]:
featured_url_image2 = featured_url_image1.find('a')['href']

featured_image_url = f'https://www.jpl.nasa.gov{featured_url_image2}'
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA20465_hires.jpg


# Mars Weather twitter account

In [31]:
#  Added in a sleep timer to allow all the data to load

url = "https://twitter.com/marswxreport?lang=en"
browser.visit(url)

time.sleep(10)

html = browser.html
soup_twitter = BeautifulSoup(html, "html.parser")
# print(soup_twitter.prettify())

In [32]:
# re is Python 'regular expression' programming language embedded in Python to perform matching.
import re
pattern = re.compile(r'sol')

mars_weather = soup_twitter.find('span', text=pattern).text
print(mars_weather)

InSight sol 501 (2020-04-24) low -93.5ºC (-136.2ºF) high -4.3ºC (24.3ºF)
winds from the SW at 5.0 m/s (11.3 mph) gusting to 15.8 m/s (35.3 mph)
pressure at 6.70 hPa


# Mars Facts

In [33]:
# This will read HTML tables into a list of dataframe objects

url = "https://space-facts.com/mars/"
mars_list = pd.read_html(url)
mars_list2 = mars_list[2]
mars_list2

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [34]:
# Rename the columns
mars_list2.columns = ['Feature', 'Value']
print(mars_list2)

                Feature                          Value
0  Equatorial Diameter:                       6,792 km
1       Polar Diameter:                       6,752 km
2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
3                Moons:            2 (Phobos & Deimos)
4       Orbit Distance:       227,943,824 km (1.38 AU)
5         Orbit Period:           687 days (1.9 years)
6  Surface Temperature:                   -87 to -5 °C
7         First Record:              2nd millennium BC
8          Recorded By:           Egyptian astronomers


In [35]:
# put the data back into html format with html tags
mars_list2 = mars_list2.set_index('Feature')
mars_facts_html = mars_list2.to_html(classes='table table-bordered')
mars_facts_html

'<table border="1" class="dataframe table table-bordered">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Feature</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers

# Mars Hemispheres

In [52]:
#  This will scrape Mars 4 hemispheres and put their name/url image link into a dictionary

url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)

time.sleep(10)

html = browser.html
soup_hemi = BeautifulSoup(html, "html.parser")
#print(soup_hemi.prettify())

In [53]:
hemi_list = soup_hemi.find('div', class_='collapsible results')
#print(hemi_list.prettify)

In [54]:
hemi_list2 = hemi_list.find_all('div', class_='item')
# print(hemi_list2)

In [55]:
# loop through the HTML to find the 4 hemispheres and the URL for the image.  Put into a dictionary.

hemi_dict = []

for x in hemi_list2:
    title = x.find("h3").text
    print (title)
    title = title.replace("Enhanced", "")
    link = x.find("a")["href"]
    img_link = "https://astrogeology.usgs.gov" + link
    print (img_link)
    browser.visit(img_link)
    time.sleep(5)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    image = soup.find('div', class_='downloads')
    image2 = image.find("a")["href"]
    hemi_dict.append({"title": title, "img url": image2})


Cerberus Hemisphere Enhanced
https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced
Schiaparelli Hemisphere Enhanced
https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced
Syrtis Major Hemisphere Enhanced
https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced
Valles Marineris Hemisphere Enhanced
https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced


In [56]:
hemi_dict

[{'title': 'Cerberus Hemisphere ',
  'img url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere ',
  'img url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere ',
  'img url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere ',
  'img url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]