# Step 1 - Scraping

In [1]:
# modules and dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import time 
import pandas as pd
import numpy as np

### NASA MARS News

In [2]:
# Setting up chromedriver path
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
# visit URL of page to be scraped
url_NASA_news = 'https://mars.nasa.gov/news'
browser.visit(url_NASA_news)

In [3]:
# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

#see image in screenshots folder for determination of "div class"

# find title of the latest news article from NASA Mars news site
news_title = soup.find_all('div', class_='content_title')

print(news_title[0].text)

Mars 2020 Unwrapped and Ready for More Testing


In [4]:
# find paragraph out of the latest news article from NASA Mars news site
news_p = soup.find_all('div', class_='article_teaser_body')

#print(news_title.text)
print(news_p[0].text)

In time-lapse video, bunny-suited engineers remove the inner layer of protective foil on NASA's Mars 2020 rover after it was relocated for testing.


In [5]:
#close browser
browser.quit()

### JPL Mars Space Images - Featured Image

In [6]:
# Setting up chromedriver path
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
# visit URL of page to be scraped
url_space_image = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url_space_image)
time.sleep(2)

In [7]:
# Find and click the "full image" button
browser.click_link_by_id('full_image')

In [8]:
time.sleep(2)
#find a click "more info" button
browser.click_link_by_partial_text('more info')

In [9]:
# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
image_soup = BeautifulSoup(html, 'html.parser')

#find the image and then extract the url
image = image_soup.find('figure', class_='lede')
image_url=image.find('img')['src']
image_url

'/spaceimages/images/largesize/PIA14106_hires.jpg'

In [10]:
featured_image_url= 'https://www.jpl.nasa.gov' + image_url 

print(featured_image_url )

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA14106_hires.jpg


In [11]:
#close browser
browser.quit()

### Mars Weather

In [12]:
#url to be scraped
weather_url = 'https://twitter.com/marswxreport?lang=en'

#set up splinter
browser = Browser('chrome', headless=False)
browser.visit(weather_url)
time.sleep(1)

In [13]:
# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
weather_soup = BeautifulSoup(html, 'html.parser')

In [14]:
#find current weather tweet text
weather = weather_soup.find('div', class_='js-tweet-text-container')
mars_weather= weather.p.text
print(mars_weather)

InSight sol 317 (2019-10-18) low -103.2ºC (-153.8ºF) high -26.2ºC (-15.2ºF)
winds from the SSE at 5.6 m/s (12.5 mph) gusting to 22.2 m/s (49.7 mph)
pressure at 7.10 hPapic.twitter.com/LNDEvGwVDw


In [15]:
#close browser
browser.quit()

### Mars Facts

In [16]:
#url to be scraped
facts_url='https://space-facts.com/mars/'  

#retrieve facts table for mars only
table_of_facts=pd.read_html(facts_url)[1]

# assign column titles
table_of_facts.columns=['Description', 'Value']
table_of_facts.set_index("Description", inplace = True)
table_of_facts

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [17]:
# Convert the data to HTML table string
mars_facts_html = table_of_facts.to_html()
mars_facts_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\

In [18]:
#clean up the table
mars_facts_html.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [19]:
#save the table into html file
table_of_facts.to_html('Mars_facts.html')

### Mars Hemispheres

In [20]:
# URL of page to be scraped
url_USGSA = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Setting up chromedriver path
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(url_USGSA)

In [21]:
# Create an empty list to hold dictionaries of hemisphere title with the image url string
hemisphere_image_urls = []

# Get a list of all of the hemispheres
links = browser.find_by_css("h3")

# Loop through those links to find title and href and sore in the dictionary
for i in range(len(links)):
    # create a dictionary to store retrieved data
    hemisphere = {}
    
    # find css for the link-header and click it
    browser.find_by_css("h3")[i].click()
    
    # Find Hemisphere title and strip it of extra text
    hemisphere['title'] = browser.find_by_css("h2.title").text.replace(" Enhanced", "")
    
    # Find text Sample associated with the image and extract the href
    hemisphere['img_url'] =browser.find_link_by_text('Sample').first['href']
    
    # Append title and url stored in hemisphere to a list
    hemisphere_image_urls.append(hemisphere)
    
    # Navigate back to loop again
    browser.back()
    
#show the list
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [22]:
#close browser
browser.quit()