In [1]:
# Import BeautifulSoup for parsing and splinter for site navigation
from bs4 import BeautifulSoup
from splinter import Browser
executable_path = {"executable_path": "/Users/David W. Jones/class/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

In [2]:
# Visit the NASA news URL
url = "https://mars.nasa.gov/news/"
browser.visit(url)

In [3]:
# Scrape page into soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [4]:
# save the most recent article, title and date
article = soup.find("div", class_="list_text")
news_p = article.find("div", class_="article_teaser_body").text
news_title = article.find("div", class_="content_title").text
news_date = article.find("div", class_="list_date").text
print(news_date)
print(news_title)
print(news_p)

January 23, 2018
NASA's Next Mars Lander Spreads its Solar Wings
NASA's next mission to Mars passed a key test Tuesday, extending the solar arrays that will power the InSight spacecraft once it lands on the Red Planet this November.


In [5]:
# Visit the JPL Mars URL
url2 = "https://jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url2)

# Scrape the browser into soup and use soup to find the image of mars
# Save the image url to a variable called `img_url`
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
image = soup.find("img", class_="thumb")["src"]
img_url = "https://jpl.nasa.gov"+image
featured_image_url = img_url
# Use the requests library to download and save the image from the `img_url` above
import requests
import shutil
response = requests.get(img_url, stream=True)
with open('img.jpg', 'wb') as out_file:
    shutil.copyfileobj(response.raw, out_file)
    
# Display the image with IPython.display
from IPython.display import Image
Image(url='img.jpg')    


In [6]:
# Visit the Mars Weather twitter account and scrap the lates Mars weather tweet.
import tweepy
# Twitter API Keys
from key_vault import (consumer_key, 
                    consumer_secret, 
                    access_token, 
                    access_token_secret)

# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())
target_user = "marswxreport"
full_tweet = api.user_timeline(target_user , count = 1)
mars_weather=full_tweet[0]['text']
mars_weather

'Happy 14th anniversary @MarsRovers Opportunity, here’s to another 7.4 more (Mars years).  That’s a lot of kilometer… https://t.co/U2OXzM0IXB'

In [7]:
# Visit the Mars facts webpage and scrape table data into Pandas
url3 = "http://space-facts.com/mars/"
browser.visit(url3)

In [15]:
# place data into a dataframe, clean it up and output it into an HTML table
import pandas as pd 
grab=pd.read_html(url3)
mars_data=pd.DataFrame(grab[0])
mars_data.columns=['Mars','Data']
mars_table=mars_data.set_index("Mars")
marsdata = mars_table.to_html(classes='marsdata')
marsdata=marsdata.replace('\n', ' ')
marsdata

'<table border="1" class="dataframe marsdata">   <thead>     <tr style="text-align: right;">       <th></th>       <th>Data</th>     </tr>     <tr>       <th>Mars</th>       <th></th>     </tr>   </thead>   <tbody>     <tr>       <th>Equatorial Diameter:</th>       <td>6,792 km</td>     </tr>     <tr>       <th>Polar Diameter:</th>       <td>6,752 km</td>     </tr>     <tr>       <th>Mass:</th>       <td>6.42 x 10^23 kg (10.7% Earth)</td>     </tr>     <tr>       <th>Moons:</th>       <td>2 (Phobos &amp; Deimos)</td>     </tr>     <tr>       <th>Orbit Distance:</th>       <td>227,943,824 km (1.52 AU)</td>     </tr>     <tr>       <th>Orbit Period:</th>       <td>687 days (1.9 years)</td>     </tr>     <tr>       <th>Surface Temperature:</th>       <td>-153 to 20 °C</td>     </tr>     <tr>       <th>First Record:</th>       <td>2nd millennium BC</td>     </tr>     <tr>       <th>Recorded By:</th>       <td>Egyptian astronomers</td>     </tr>   </tbody> </table>'

In [16]:
# Visit the USGS Astogeology site and scrape pictures of the hemispheres
url4 = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url4)

In [17]:
# Use splinter to loop through the 4 images and load them into a dictionary
import time 
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
mars_hemis=[]

In [18]:
# loop through the four tags and load the data to the dictionary

for i in range (4):
    time.sleep(5)
    images = browser.find_by_tag('h3')
    images[i].click()
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    partial = soup.find("img", class_="wide-image")["src"]
    img_title = soup.find("h2",class_="title").text
    img_url = 'https://astrogeology.usgs.gov'+ partial
    dictionary={"title":img_title,"img_url":img_url}
    mars_hemis.append(dictionary)
    browser.back()   

In [19]:
print(mars_hemis)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]
