In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import requests

In [2]:
# Initialize browser
def init_browser():
    # @NOTE: Replace the path with your actual path to the chromedriver
    executable_path = {"executable_path": "chromedriver"}
    return Browser("chrome", **executable_path, headless=False)

### NASA Mars News scraping

In [3]:
browser = init_browser()       # Initialize browser
# Visit NASA Mars News Site  collect the latest News Title and Paragraph Text
url="https://mars.nasa.gov/news/"
browser.visit(url)
html=browser.html
    # Scrape page into soup
soup=bs(html,"html.parser")

In [4]:
# get the most recent news title and teaser
latest_news = soup.find('li', class_="slide")
news_title = latest_news.find('div', class_="content_title").text
news_title

'Curiosity on the Move Again'

In [5]:
news_teaser=latest_news.find('div', class_="article_teaser_body").text
news_teaser

"NASA's Mars Curiosity rover drove about 197 feet over the weekend to a site called Lake Orcadie, pushing its total odometry to over 12 miles."

In [6]:
news_date=latest_news.find('div', class_="list_date").text
news_date

'November  6, 2018'

### JPL Mars Space Images

In [8]:
    # Visit JPL Mars Space Images  Use splinter to navigate the site and find the image url for the current Featured Mars Image
base_url="https://www.jpl.nasa.gov"
images_url="/spaceimages/?search=&category=Mars/"
url=base_url + images_url
     # click past first page
browser.visit(url)
browser.click_link_by_partial_text('FULL IMAGE')
browser.click_link_by_partial_text('more info')
html=browser.html
    # Scrape page into soup 
soup=bs(html,"html.parser")

In [13]:
figure=soup.find('figure',class_='lede')
rel_image=figure.find('a')['href']
featured_image_url=base_url + rel_image
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19382_hires.jpg


### Mars Weather

In [14]:
    # Visit Mars Weather twitter account  scrape the latest Mars weather tweet from the page
url="https://twitter.com/marswxreport?lang=en"
    # Scrape page into soup
browser.visit(url)
html=browser.html
soup=bs(html,"html.parser")

In [15]:
mars_weather = soup.find('p', class_="TweetTextSize").text
mars_weather

'Sol 2224 (2018-11-08), high 0C/32F, low -72C/-97F, pressure at 8.65 hPa, daylight 06:19-18:36'

### Mars Facts

In [3]:
import pandas as pd

In [4]:
# Visit Mars Facts webpage  use Pandas to scrape the table of facts about the planet( Diameter, Mass, etc)  
url="http://space-facts.com/mars/"
    # Scrape page into pandas
tables = pd.read_html(url)
df=tables[0]

In [5]:
df_a=df.rename(columns={0:'Description',1:'Value'})
df_b=df_a.set_index(['Description','Value'])
html_table = df_b.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th></th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <th>6,792 km</th>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <th>6,752 km</th>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <th>6.42 x 10^23 kg (10.7% Earth)</th>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <th>2 (Phobos &amp; Deimos)</th>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <th>227,943,824 km (1.52 AU)</th>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <th>687 days (1.9 years)</th>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <th>-153 to 20 °C</th>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <th>2nd millennium BC</th>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <th>Egyptian astronomers</th>\n    </tr>

### Mars Hemispheres

In [19]:
    # Visit USGS Astrogeology site  obtain high resolution images for each of Mar's hemispheres.
base_url="https://astrogeology.usgs.gov"
image_url="/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
url=base_url + image_url
    # Scrape page into soup
browser.visit(url)
hemisphere_image_urls=[]

In [20]:
# Cerberus Hemisphere
browser.click_link_by_partial_text('Cerberus Hemisphere')
html=browser.html
soup=bs(html,"html.parser")
rel_image=soup.find('img',class_="wide-image")['src']
image_url=base_url + rel_image
print(image_url)
image_title=soup.find('h2', class_="title").text
print(image_title)
#save in dictionary and add to image list
image_dict={"title":image_title, "img_url":image_url}
hemisphere_image_urls.append(image_dict)

https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg
Cerberus Hemisphere Enhanced


In [21]:
# Schiaparelli Hemisphere
browser.back()
browser.click_link_by_partial_text('Schiaparelli Hemisphere')
html=browser.html
soup=bs(html,"html.parser")
rel_image=soup.find('img',class_="wide-image")['src']
image_url=base_url + rel_image
print(image_url)
image_title=soup.find('h2', class_="title").text
print(image_title)
#save in dictionary and add to image list
image_dict={"title":image_title, "img_url":image_url}
hemisphere_image_urls.append(image_dict)

https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg
Schiaparelli Hemisphere Enhanced


In [22]:
# Syrtis Major Hemisphere
browser.back()
browser.click_link_by_partial_text('Syrtis Major Hemisphere')
html=browser.html
soup=bs(html,"html.parser")
rel_image=soup.find('img',class_="wide-image")['src']
image_url=base_url + rel_image
print(image_url)
image_title=soup.find('h2', class_="title").text
print(image_title)
#save in dictionary and add to image list
image_dict={"title":image_title, "img_url":image_url}
hemisphere_image_urls.append(image_dict)

https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg
Syrtis Major Hemisphere Enhanced


In [23]:
# Valles Marineris Hemisphere
browser.back()
browser.click_link_by_partial_text('Valles Marineris Hemisphere')
html=browser.html
soup=bs(html,"html.parser")
rel_image=soup.find('img',class_="wide-image")['src']
image_url=base_url + rel_image
print(image_url)
image_title=soup.find('h2', class_="title").text
print(image_title)
image_dict={"title":image_title, "img_url":image_url}
hemisphere_image_urls.append(image_dict)

https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg
Valles Marineris Hemisphere Enhanced


In [24]:
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]


In [25]:
browser.quit()