# Mission to Mars 
## Web Scraping Assignment by John Zhao 

In [1]:
# Import dependencies ('boilerplate')
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
from splinter import Browser
import time

# Step 1: Scraping
## I. NASA Mars News

In [2]:
# URL
url = 'https://mars.nasa.gov/news/'

# Creating a response
response = requests.get(url)

# Create BeautifulSoup object, parsed to HTML
soup = bs(response.text, "html.parser")

# Results of scraping
news_title = soup.find('div', class_='content_title').text
news_content = soup.find('div', class_='rollover_description_inner').text
print(news_title,
      news_content)



NASA's Opportunity Rover Mission on Mars Comes to End

 
NASA's Opportunity Mars rover mission is complete after 15 years on Mars. Opportunity's record-breaking exploration laid the groundwork for future missions to the Red Planet.



## II. JPL Mars Space Images - Featured Image

In [3]:
# Setup for Splinter
!which chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

# Target URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

# BS object
html = browser.html
soup = bs(html, 'html.parser')

# Click through to image
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(3)
browser.click_link_by_partial_text('more info')
time.sleep(3)
browser.click_link_by_partial_text('.jpg')
time.sleep(3)

/usr/local/bin/chromedriver


In [4]:
# Save image link using BS object
html2 = browser.html
soup2 = bs(html2, 'html.parser')
featured_image_url = soup2.find('img').get('src')
print(f'The featured image URL is {featured_image_url}')

The featured image URL is https://photojournal.jpl.nasa.gov/jpeg/PIA19168.jpg


## III. Mars Weather

In [5]:
# Scrape data from Twitter handle
tw_rs = requests.get('https://twitter.com/marswxreport?lang=en')
tw_soup = bs(tw_rs.text, 'html.parser')

# Find text
mars_weather = tw_soup.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text').text
print(mars_weather)

While the Sun has set on #Opportunity, the MER program lives on in mountains of data scientists will be making new discoveries from for decades.

We asked 90 Sols and 1km, she gave us 5110 Sols and 45km. So long Oppy, and thanks for all the data. 

MER-B EOMpic.twitter.com/33obWOXbNx


## IV. Mars Facts

In [6]:
# Use Pandas to read HTML from specified URL
mars_table = pd.read_html('http://space-facts.com/mars/')
mars_table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [7]:
# Find table's type
print(type(mars_table))

# Use first table
mars_df = mars_table[0]

# Set column names and then set index to 'Data Type'
mars_df.columns = ['Data Type', 'Information']
mars_df.set_index('Data Type', inplace=True)

# Convert table to HTML
mars_html = mars_df.to_html()

<class 'list'>


In [8]:
# Replace '\n' for a cleaner string
mars_html = mars_html.replace('\n', '')
mars_html

# Optional step to open HTML file in browser
# mars_df.to_html('mars_info.html')
# ! open mars_info.html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Information</th>    </tr>    <tr>      <th>Data Type</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

## V. Mars Hemispheres

In [9]:
# Target URL
tgt_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Creating a response
hemi_response = requests.get(tgt_url)

# Create BeautifulSoup object, parsed to HTML
hemi_soup = bs(hemi_response.text, "html.parser")

# Find all instances of the hemispheres
hemi_attrs = hemi_soup.find_all('a', class_='itemLink product-item')
print(hemi_attrs)

[<a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/dfaf3849e74bf973b59eb50dab52b583_cerberus_enhanced.tif_thumb.png"/><div class="description"><h3>Cerberus Hemisphere Enhanced</h3></div></a>, <a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/7677c0a006b83871b5a2f66985ab5857_schiaparelli_enhanced.tif_thumb.png"/><div class="description"><h3>Schiaparelli Hemisphere Enhanced</h3></div></a>, <a class="itemLink product-item" href="/search/map/Mars/Viking/syrtis_major_enhanced"><img alt="Syrtis Major Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/aae41197e40d6d4f3ea557f8cfe51d15_syrtis_major_enhanced.tif_thumb.png"/><div class="description"><h3>Syrtis Major Hemisphere Enhanced</h3></div></a>, <a class="itemLink product-item" href="/search/ma

In [10]:
# Create 'for loop' to retrieve title and image's URL
# Put items into empty list
hemi_info = []
for x in hemi_attrs:
    # Title's text in <h3>
    title = x.find('h3').text
    # Image's link 
    img_link = 'https://astrogeology.usgs.gov/' + x['href']
    # Getting a response
    img_request = requests.get(img_link)
    # Setup BS object
    img_soup = bs(img_request.text, 'html.parser')
    # Find URL
    img_hres = img_soup.find('div', class_='downloads')
    img_url = img_hres.find('a')['href']
    # Append all info
    hemi_info.append({'Title': title, 'Image URL': img_url})

In [11]:
print(hemi_info)

[{'Title': 'Cerberus Hemisphere Enhanced', 'Image URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'Title': 'Schiaparelli Hemisphere Enhanced', 'Image URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'Title': 'Syrtis Major Hemisphere Enhanced', 'Image URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'Title': 'Valles Marineris Hemisphere Enhanced', 'Image URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
