In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
import time

## Step 1 - Scraping

### NASA Mars News

In [41]:
# # https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

/usr/local/bin/chromedriver


In [42]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [43]:
nasa_url = 'https://mars.nasa.gov/news/'
browser.visit(nasa_url)

In [44]:
html = browser.html
bsoup = bs(html,"lxml")

In [45]:
news_title = bsoup.find('div', class_='content_title').text
print(news_title)

news_p=bsoup.find('div', class_='article_teaser_body').text
print(news_p)

Media Get a Close-Up of NASA's Mars 2020 Rover
The clean room at NASA's Jet Propulsion Laboratory was open to the media to see NASA's next Mars explorer before it leaves for Florida in preparation for a summertime launch.


### JPL Mars Space Images - Featured Image

In [53]:
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(jpl_url)

In [54]:
html = browser.html
soup = bs(html, 'lxml')

In [55]:
image_name= bsoup.find('article', class_='carousel_item')['alt'] 
print(image_name)

Scoop Marks in the Sand at 'Rocknest'


In [57]:
base_url = 'https://www.jpl.nasa.gov'
img_url = soup.find(attrs={'data-title':image_name})["data-fancybox-href"] 
combo_url = base_url + img_url
print(combo_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA16469_ip.jpg


### Mars Weather

In [58]:
weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(weather_url)

In [59]:
html = browser.html
bsoup = bs(html, 'lxml')

In [60]:
mars_weather= bsoup.find('p', class_='css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0')
print(mars_weather)

None


### Mars Facts

In [61]:
facts_url = 'https://space-facts.com/mars/'

In [62]:
mars_facts_tables = pd.read_html(facts_url)
mars_facts_tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
           

In [63]:
mars_df = mars_facts_tables[1]
mars_df

Unnamed: 0,Mars - Earth Comparison,Mars,Earth
0,Diameter:,"6,779 km","12,742 km"
1,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
2,Moons:,2,1
3,Distance from Sun:,"227,943,824 km","149,598,262 km"
4,Length of Year:,687 Earth days,365.24 days
5,Temperature:,-153 to 20 °C,-88 to 58°C


In [64]:
mars_df = mars_df.drop(columns=['Earth'])
mars_df

Unnamed: 0,Mars - Earth Comparison,Mars
0,Diameter:,"6,779 km"
1,Mass:,6.39 × 10^23 kg
2,Moons:,2
3,Distance from Sun:,"227,943,824 km"
4,Length of Year:,687 Earth days
5,Temperature:,-153 to 20 °C


In [65]:
mars_df = mars_df.rename(columns=
                         {"Mars - Earth Comparison": "Measure"})
mars_df

Unnamed: 0,Measure,Mars
0,Diameter:,"6,779 km"
1,Mass:,6.39 × 10^23 kg
2,Moons:,2
3,Distance from Sun:,"227,943,824 km"
4,Length of Year:,687 Earth days
5,Temperature:,-153 to 20 °C


In [66]:
html_table = mars_df.to_html(header=None,index=False)
html_table.replace('\n', '')
html_table

'<table border="1" class="dataframe">\n  <tbody>\n    <tr>\n      <td>Diameter:</td>\n      <td>6,779 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <td>Distance from Sun:</td>\n      <td>227,943,824 km</td>\n    </tr>\n    <tr>\n      <td>Length of Year:</td>\n      <td>687 Earth days</td>\n    </tr>\n    <tr>\n      <td>Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n  </tbody>\n</table>'

In [67]:
mars_df.to_html('mars_table.html')

In [36]:
!open mars_table.html

### Mars Hemispheres

In [68]:
hemisphere_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemisphere_url)
html = browser.html
bsoup = bs(html, 'lxml')

hemisphere_urls = []

#include all 4 hemispheres  
xpath = '//*[@id="product-section"]/div[2]/div/div/a'

    hemisphere_anchors = browser.find_by_xpath(xpath)

# Loop through results 
for anchor in hemisphere_anchors:
    try:
        hemisphere_title = anchor.find_by_tag('h3').text
        hemisphere_href = anchor['href']
        #request the next page using the href
        hemisphere_page = requests.get(hemisphere_href).text
        bsoup = bs(hemisphere_page, 'lxml')
        anchor_tag_page2 = bsoup.select('#wide-image > div > ul > li:nth-child(1) > a')
        hemisphere_url =  anchor_tag_page2[0]['href']
        img_dict = { "image title": hemisphere_title, "image url": hemisphere_url }
        hemisphere_urls.append(img_dict)

    except Exception as e:
        print(e)
        print("This is an exception being thrown")


hemisphere_urls

[{'image title': 'Cerberus Hemisphere Enhanced',
  'image url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'image title': 'Schiaparelli Hemisphere Enhanced',
  'image url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'image title': 'Syrtis Major Hemisphere Enhanced',
  'image url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'image title': 'Valles Marineris Hemisphere Enhanced',
  'image url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]