In [95]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import re
import pandas as pd

In [2]:
# Initialize browser with chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [34]:
# Have browser visit NASA Mars News Site
url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
browser.visit(url)

In [38]:
# Pull url html and run BeautifulSoup
html = browser.html
soup = bs(html,"lxml")

In [39]:
# Latest headline is stored in an 'a' tag in a div with class 'content_title'
# Those are stored in the gallery list: 'ul' with class 'item_list'
news_gallery = soup.find('ul',{'class':'item_list'})
news_div = news_gallery.find('div',{'class':'content_title'})
news_title = news_div.find('a').text
# Paragraph teaser is in div with class 'article_teaser_body'
news_p = soup.find('div',{'class':'article_teaser_body'}).text
print(news_title)
print(news_p)

10.9 Million Names Now Aboard NASA's Perseverance Mars Rover
As part of NASA's 'Send Your Name to Mars' campaign, they've been stenciled onto three microchips along with essays from NASA's 'Name the Rover' contest. Next stop: Mars.


### JPL Mars Space Images - Featured Image

In [40]:
# Have browser visit JPL Space Imagery Site
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url)

In [41]:
# Reset html and soup
html = browser.html
soup = bs(html,'lxml')

In [43]:
# Find (full-size) featured image url
# 'ul' with class 'articles' then the first 'a' with class 'fancybox'
# 'data-fancybox-href' attribute holds the largesize image
jpl_base_url = "https://www.jpl.nasa.gov"
jpl_articles = soup.find('ul',{'class':'articles'})
featured_image_url = jpl_base_url + jpl_articles.find('a',{'class':'fancybox'})['data-fancybox-href']
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA23814_hires.jpg


### Mars Weather

In [90]:
# Visit https://twitter.com/marswxreport?lang=en
url = "https://twitter.com/marswxreport?lang=en"
browser.visit(url)

In [91]:
# Scroll the page down to load tweets
browser.execute_script("window.scrollTo(400, document.body.scrollHeight);")

In [92]:
# Reset html and soup
html = browser.html
soup = bs(html,'lxml')

In [94]:
mars_weather = soup.find('span',text=re.compile('^InSight sol')).text

print(mars_weather)

InSight sol 452 (2020-03-05) low -94.2ºC (-137.7ºF) high -9.2ºC (15.4ºF)
winds from the SSW at 6.5 m/s (14.5 mph) gusting to 19.6 m/s (43.9 mph)
pressure at 6.30 hPa


### Mars Facts

In [64]:
# Visit https://space-facts.com/mars/
url = "https://space-facts.com/mars/"

In [67]:
tables = pd.read_html(url)
# Manually find table is index 0
# for table in tables:
#     print(table)
mars_table = tables[0]
mars_table

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [69]:
html_table = mars_table.to_html(header=False,index=False)

### Mars Hemispheres

In [70]:
# Visit https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)

In [71]:
# Reset html and soup
html = browser.html
soup = bs(html,'lxml')

In [76]:
items = soup.find_all('div',{'class':'item'})
page_urls = []
for div in items:
    page_urls.append("https://astrogeology.usgs.gov" + div.find('a',{'class':'itemLink product-item'})['href'])
print(page_urls)

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']


In [84]:
hemisphere_image_urls = []
# Cycle through hemisphere pages and pull the data
for page in page_urls:
    # Manipulate browser and reset soup
    browser.visit(page)
    html = browser.html
    soup = bs(html,'lxml')
    
    # Find data
    img_dict = {}
    img_dict['img_url'] = soup.find('a',text='Original')['href']
    img_dict['title'] = soup.find('h2',{'class':'title'}).text
    
    #Append img_dict to hemisphere_image_urls
    hemisphere_image_urls.append(img_dict)

In [85]:
hemisphere_image_urls

[{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif',
  'title': 'Valles Marineris Hemisphere Enhanced'}]