In [42]:
#dependencies
import pandas as pd
import requests
from splinter import Browser
from bs4 import BeautifulSoup
import html5lib

In [43]:
#create an empty dictionary to store all mars scraped data
mars_data_dict = {}

## NASA Mars News

In [44]:
#retrieve page to be scraped
with Browser() as browser:
    url = 'https://mars.nasa.gov/news/'
    browser.visit(url)
    news_html = browser.html
    
    #create BeautifulSoup object; parse with 'html.parser'
    soup = BeautifulSoup(news_html, 'html.parser')

    #examine the results, then determine element that contains desired info
    #results are returned as an iterable list
    results = soup.find_all('div', class_ = 'list_text')

In [45]:
#create empty lists for article title and article teaser
news_titles = []
news_paras = []


#loop through results to scrape desired info
for result in results:
    
    #error handling
    try:
        
        #retrieve article title and add to titles list
        title = result.find('a').text
        news_titles.append(title)

        #retrieve article teaser and add to teasers list
        para = result.find('div', class_ = 'article_teaser_body').text
        news_paras.append(para)

    except:
        print('error')
        
#print latest article title and teaser        
print('||' + news_titles[0] + '\n\n' + news_paras[0])

||Mars Rover Mission Progresses Toward Resumed Drilling

NASA's Mars rover Curiosity team is working to restore Curiosity's sample-drilling capability using new techniques. The latest development is a preparatory test on Mars.


In [46]:
#store latest article scraped data in variables and add to mars data dict
news_t = news_titles[0]
news_p = news_paras[0]
mars_data_dict['news_title'] = news_t
mars_data_dict['news_para'] = news_p

#view mars data dict
mars_data_dict

{'news_p': "NASA's Mars rover Curiosity team is working to restore Curiosity's sample-drilling capability using new techniques. The latest development is a preparatory test on Mars.",
 'news_t': 'Mars Rover Mission Progresses Toward Resumed Drilling'}

## JPL Mars Space Images - Featured Image

In [48]:
#url for jpl main site
jpl_url = 'https://www.jpl.nasa.gov'

#retrieve page to be scraped
with Browser() as browser:
    url = jpl_url + '/spaceimages/?search=&category=Mars'
    browser.visit(url)
    image_html = browser.html
    
    #create BeautifulSoup object; parse with 'html.parser'
    soup = BeautifulSoup(image_html, 'html.parser')

    #examine the results, then determine element that contains desired info
    #results are returned as an iterable list
    results = soup.find_all('div', class_ = 'carousel_items')

In [49]:
#loop through results to scrape featured image data
for result in results:
    #error handling
    try:
        
        #retrieve and print image title & description
        image_title = result.find('h1', class_ = 'media_feature_title').text  
        image_descript = result.a['data-description']
        print(image_title)
        print(image_descript)
        print(' ')

        #retrieve image link and print full image url
        image_link = result.a['data-fancybox-href']
        featured_image_url = jpl_url + image_link
        print(featured_image_url)
    
    except:
        print('error')


				  Stuck on the Rings				
Like a drop of dew hanging on a leaf, Tethys appears to be stuck to the A and F rings from this perspective of NASA's Cassini spacecraft.
 
https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18284_ip.jpg


In [50]:
#add featured image data to mars data dict and view dict
mars_data_dict['featured_image_title'] = image_title
mars_data_dict['featured_image_descript'] = image_descript
mars_data_dict['featured_image_url'] = featured_image_url

mars_data_dict

{'featured_image_descript': "Like a drop of dew hanging on a leaf, Tethys appears to be stuck to the A and F rings from this perspective of NASA's Cassini spacecraft.",
 'featured_image_title': '\n\t\t\t\t  Stuck on the Rings\t\t\t\t',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18284_ip.jpg',
 'news_p': "NASA's Mars rover Curiosity team is working to restore Curiosity's sample-drilling capability using new techniques. The latest development is a preparatory test on Mars.",
 'news_t': 'Mars Rover Mission Progresses Toward Resumed Drilling'}

## Mars Weather

In [52]:
#retreive page to be scraped
with Browser() as browser:
    url = 'https://twitter.com/marswxreport?lang=en'
    browser.visit(url)
    weather_html = browser.html
    
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(weather_html, 'html.parser')

#examine the results, then determine element that contains desired info
#results are returned as an iterable list
results = soup.find('div', class_='js-tweet-text-container')

In [53]:
#retrieve tweet and view text
mars_weather = results.find('p').text
mars_weather

'Sol 1852 (Oct 22, 2017), Sunny, high -31C/-23F, low -80C/-112F, pressure at 8.63 hPa, daylight 05:58-17:41'

In [54]:
#add weather data to mars data dict and view dict
mars_data_dict['mars_weather'] = mars_weather
mars_data_dict

{'featured_image_descript': "Like a drop of dew hanging on a leaf, Tethys appears to be stuck to the A and F rings from this perspective of NASA's Cassini spacecraft.",
 'featured_image_title': '\n\t\t\t\t  Stuck on the Rings\t\t\t\t',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18284_ip.jpg',
 'mars_weather': 'Sol 1852 (Oct 22, 2017), Sunny, high -31C/-23F, low -80C/-112F, pressure at 8.63 hPa, daylight 05:58-17:41',
 'news_p': "NASA's Mars rover Curiosity team is working to restore Curiosity's sample-drilling capability using new techniques. The latest development is a preparatory test on Mars.",
 'news_t': 'Mars Rover Mission Progresses Toward Resumed Drilling'}

## Mars Facts

In [55]:
#url for page to be scraped
fact_url = 'https://space-facts.com/mars/'

#read the table from the page and view
fact_table = pd.read_html(fact_url)
fact_table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [56]:
#create a dataframe from the first (and in this case,only) table and view
fact_df = fact_table[0]
fact_df.columns = ['Quantity', 'Value']
fact_df

Unnamed: 0,Quantity,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [57]:
#generate HTML table from dataframe and strip newlines and view
fact_html = fact_df.to_html().replace('\n', '')
fact_html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Quantity</th>      <th>Value</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronomer

In [58]:
#add fact data HTML to mars data dict and view dict
mars_data_dict['mars_facts'] = fact_html
mars_data_dict

{'featured_image_descript': "Like a drop of dew hanging on a leaf, Tethys appears to be stuck to the A and F rings from this perspective of NASA's Cassini spacecraft.",
 'featured_image_title': '\n\t\t\t\t  Stuck on the Rings\t\t\t\t',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18284_ip.jpg',
 'mars_facts': '<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Quantity</th>      <th>Value</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th

## Mars Hemispheres

In [59]:
#create a list of urls for pages to be scraped
hemi_urls = ['https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced',
             'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced',
             'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced',
             'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'
            ]

#create dict key list and empty list for dict values
dict_keys = ['title', 'img_url']
dict_values = []

#create an empty list to add dicts of each title & image url 
hemisphere_image_urls = []

#url for usgs main site
usgs_url = 'https://astrogeology.usgs.gov'

#loop through each url to retrieve individual pages to scrape
for item in hemi_urls:

    #error handling
    try:
        
        #retireive page to be scraped
        with Browser() as browser:
            url = item
            browser.visit(url)
            hemi_html = browser.html

        #create BeautifulSoup object; parse with 'html.parser'
        soup = BeautifulSoup(hemi_html, 'html.parser')

        #examine the results, then determine element that contains image title
        #results are returned as an iterable list
        title_results = soup.find('h2', class_ = 'title')

        #loop through results to retrieve title data, add to values list and print title 
        for result in title_results:
            
            #error handling
            try:
            
                hemi_title = title_results.text
                dict_values.append(hemi_title)
                print(hemi_title)

            except:
                print('error')
                
        #examine the results, then determine element that contains image link
        #results are returned as an iterable list
        link_results = soup.find_all('img', class_ = 'wide-image')

        #loop through results to retrieve image link, add to values list and print url
        for result in link_results:
            
            #error handling
            try:
            
                hemi_link = result['src']
                hemi_image_url = usgs_url + hemi_link
                dict_values.append(hemi_image_url)  
                print(hemi_image_url)
            
            except:
                print('error')
                
        #create a dict for each title/url pair and add to list     
        hemi_dict = dict(zip(dict_keys, dict_values))
        hemisphere_image_urls.append(hemi_dict)
        
        
    except:
        print('error')   
        
#show list of dicts
hemisphere_image_urls

Valles Marineris Hemisphere Enhanced
https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg
Cerberus Hemisphere Enhanced
https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg
Schiaparelli Hemisphere Enhanced
https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg
Syrtis Major Hemisphere Enhanced
https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg


[{'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [60]:
#add hemisphere image data to mars data dict and view dict
mars_data_dict['hemisphere_image_data'] = hemisphere_image_urls
mars_data_dict

{'featured_image_descript': "Like a drop of dew hanging on a leaf, Tethys appears to be stuck to the A and F rings from this perspective of NASA's Cassini spacecraft.",
 'featured_image_title': '\n\t\t\t\t  Stuck on the Rings\t\t\t\t',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18284_ip.jpg',
 'hemisphere_image_data': [{'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
   'title': 'Valles Marineris Hemisphere Enhanced'},
  {'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
   'title': 'Valles Marineris Hemisphere Enhanced'},
  {'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
   'title': 'Valles Marineris Hemisphere Enhanced'},
  {'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4