In [1]:
#dependencies
import pandas as pd
import requests
from splinter import Browser
from bs4 import BeautifulSoup
import html5lib

### NASA Mars News

In [2]:
#retrieve page to be scraped
with Browser() as browser:
    url = 'https://mars.nasa.gov/news/'
    browser.visit(url)
    html = browser.html
    
    #create BeautifulSoup object; parse with 'html.parser'
    soup = BeautifulSoup(html, 'html.parser')

    #examine the results, then determine element that contains desired info
    #results are returned as an iterable list
    results = soup.find_all('div', class_ = 'list_text')

In [3]:
#check how many results are returned
print(len(results))

15


In [130]:
#loop through returned results to scrape desired info
for result in results:
    
    #error handling
    try:
        
        #retrieve article title
        title = result.find('a')
        
        #access the text content of the title and print
        news_t = title.text
        print('||' + news_t)
        print(' ')

        #retrieve article teaser 
        para = result.find('div', class_ = 'article_teaser_body')
        
        #access the text content of the article teaser and print
        news_p = para.text
        print(news_p)
        print('__________________________________________________')
        print(' ')

    except:
        print('error')

### JPL Mars Space Images - Featured Image

In [133]:
#url for jpl main site
jpl_url = 'https://www.jpl.nasa.gov'

#retrieve page to be scraped
with Browser() as browser:
    url = jpl_url + '/spaceimages/?search=&category=Mars'
    browser.visit(url)
    html = browser.html
    
    #create BeautifulSoup object; parse with 'html.parser'
    soup = BeautifulSoup(html, 'html.parser')

    #examine the results, then determine element that contains desired info
    #results are returned as an iterable list
    results = soup.find_all('div', class_ = 'carousel_items')

In [134]:
#loop over results to scrape image data
for result in results:
    
    #retrieve and print image title & description
    image_title = result.find('h1', class_ = 'media_feature_title')  
    image_descript = result.a['data-description']
    print(image_title.text)
    print(image_descript)
    print(' ')
    
    #retrieve image link and print full image url
    image_link = result.a['data-fancybox-href']
    featured_image_url = jpl_url + image_link
    print(featured_image_url)


				  Weighing in on the Dumbbell Nebula				
The 'Dumbbell nebula,' also known as Messier 27, pumps out infrared light in this image from NASA's Spitzer Space Telescope. Planetary nebulae are now known to be the remains of stars that once looked a lot like our sun.
 
https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA14417_ip.jpg


### Mars Weather

In [26]:
#retreive page to be scraped
with Browser() as browser:
    url = 'https://twitter.com/marswxreport?lang=en'
    browser.visit(url)
    weather_html = browser.html
    
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(weather_html, 'html.parser')

#examine the results, then determine element that contains desired info
#results are returned as an iterable list
results = soup.find('div', class_='js-tweet-text-container')

In [28]:
#retrieve tweet and view text
mars_weather = results.find('p').text
mars_weather

'Sol 1848 (Oct 17, 2017), Sunny, high -28C/-18F, low -80C/-112F, pressure at 8.65 hPa, daylight 05:59-17:42'

### Mars Facts

In [32]:
#url for page to be scraped
fact_url = 'https://space-facts.com/mars/'

#read the table from the page and view
fact_table = pd.read_html(fact_url)
fact_table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [38]:
#create a dataframe from the first (and in this case,only) table and view
fact_df = fact_table[0]
fact_df.columns = ['Quantity', 'Value']
fact_df

Unnamed: 0,Quantity,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [41]:
#generate HTML table from dataframe and strip newlines and view
fact_html = fact_df.to_html().replace('\n', '')
fact_html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Quantity</th>      <th>Value</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronomer