# MISSION TO MARS

## Step 1 - Scraping

In [1]:
# Dependencies

# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
from splinter import Browser
from bs4 import BeautifulSoup  
import requests
import tweepy
import yaml
import pandas as pd
import time

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

Scrape the NASA Mars News Site and collect:
* The latest News Title (news_title).
* Paragragh Text (paragraph_title). 

In [2]:
# Scrape the NASA Mars News Site. 
url_news = "https://mars.nasa.gov/news/"  
response = requests.get(url_news)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

# Collect the latest News Title 
news_title = soup.find('div', class_="content_title").text
print (news_title)

# Collect the latest New Paragraph Text
news_paragraph = soup.find('div', class_="rollover_description_inner").text
print (news_paragraph)



NASA Invests in Visionary Technology 



NASA is investing in technology concepts, including several from JPL, that may one day be used for future space exploration missions.



In [3]:
mars_data = {}

### JPL Mars Space Images - Featured Image

URL JPL's Featured Space Image:  https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars.

* Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.
* Make sure to find the image url to the full size .jpg image.
* Make sure to save a complete url string for this image.

In [4]:
# Scrape the JPL Mars Space Images Site
url_si = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url_si)
html = browser.html

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

# Collect the featured Mars Image
image = soup.find("a", class_="button fancybox")["data-fancybox-href"]
featured_image_url = "https://www.jpl.nasa.gov" + image
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA20063_ip.jpg


### Mars Weather
* Visit the Mars Weather twitter account: https://twitter.com/marswxreport?lang=en 
* Scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called
mars_weather.

#### Example:
mars_weather = 'Sol 1801 (Aug 30, 2017), Sunny, high -21C/-5F, low -80C/-112F, pressure at 8.82 hPa, daylight 06:09-17:55'

In [6]:
# Given a yaml filename , return the Twitter API Keys

def get_file_contents(filename):
    try:
        with open(filename, 'r') as config_file:
            config = yaml.load(config_file)
            return (config)
    except FileNotFoundError:
        print("'%s' file not found" % filename)

In [8]:
TWITTER_CONFIG_FILE = 'auth.yaml'

# Get the Twitter API Keys
config = get_file_contents(TWITTER_CONFIG_FILE)

# Twitter API Keys
consumer_key = config['twitter']['consumer_key']
consumer_secret = config['twitter']['consumer_secret']
access_token = config['twitter']['access_token']
access_token_secret = config['twitter']['access_token_secret']

# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

target_user= "@MarsWxReport"

In [9]:
# Scrape the latest Mars weather tweet from the Mars Weather twitter account

# Search for all tweets
mars_tweets = api.user_timeline(target_user, count=1)

# Get the latest Mars weather tweet from home feed
mars_weather = mars_tweets[0]["text"]

print (mars_weather)

RT @rtphokie: Narrowing of the forecast continues for #Tiangong1, now predicted to reenter sometime between Saturday 7:15 am and Sunday 11:…


### Mars Facts
Visit the Mars Facts webpage: https://space-facts.com/mars/ and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

Use Pandas to convert the data to a HTML table string.

In [30]:
# Scrape the Mars Facts Webpage
url_mf = "https://space-facts.com/mars"
browser.visit(url_mf)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

results = soup.find_all('table')

#with open(html) as f:
#    soup = bs.BeautifulSoup(f, 'lxml')
#    parsed_table = soup.find_all('table')[1] 
#    data = [[td.a['href'] if td.find('a') else 
#             ''.join(td.stripped_strings)
#             for td in row.find_all('td')]
#            for row in parsed_table.find_all('tr')]
#    mars_df = pd.DataFrame(data[1:], columns=data[0])
print(results) 

[<table class="tablepress tablepress-id-mars" id="tablepress-mars">
<tbody>
<tr class="row-1 odd">
<td class="column-1"><strong>Equatorial Diameter:</strong></td><td class="column-2">6,792 km<br/>
</td>
</tr>
<tr class="row-2 even">
<td class="column-1"><strong>Polar Diameter:</strong></td><td class="column-2">6,752 km<br/>
</td>
</tr>
<tr class="row-3 odd">
<td class="column-1"><strong>Mass:</strong></td><td class="column-2">6.42 x 10^23 kg (10.7% Earth)</td>
</tr>
<tr class="row-4 even">
<td class="column-1"><strong>Moons:</strong></td><td class="column-2">2 (<a href="https://space-facts.com/phobos/">Phobos</a> &amp; <a href="https://space-facts.com/deimos/">Deimos</a>)</td>
</tr>
<tr class="row-5 odd">
<td class="column-1"><strong>Orbit Distance:</strong></td><td class="column-2">227,943,824 km (1.52 AU)</td>
</tr>
<tr class="row-6 even">
<td class="column-1"><strong>Orbit Period:</strong></td><td class="column-2">687 days (1.9 years)<br/>
</td>
</tr>
<tr class="row-7 odd">
<td clas

In [28]:
# With read_html function in Pandas, automatically scrape the tabular data from Mars Facts Webpage.
mars_df = pd.read_html(url_mf)[0]
mars_df.columns = ["Facts", "Data"]
mars_df.set_index("Facts", inplace=True)
mars_df

ImportError: lxml not found, please install it

In [None]:
# With to_html method, we generate the HTML table from mars_df DataFrame.
mars_table_html = "".join(mars_df.to_html().split("\n"))
mars_table_html

### Mars Hemispheres
* Visit the USGS Astrogeology site https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars to obtain high resolution images for each of Mar's hemispheres.

* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

* Save both the image url string for the full resolution hemipshere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.

* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [14]:
# Scrape the USGS Astrogeology site to get Mar's Hemispheres Images
url_mh = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url_mh)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

#list of mars hemispheres
hemis_mars_list = []

In [15]:
results = soup.find_all('h3')

for r in results:
    elem = r.getText()
    browser.click_link_by_partial_text(elem)
    
    time.sleep(3)   # Takes time to return information
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    # Collect the full resolution image and the title of the image.
    image = soup.find("img", class_="wide-image")["src"]
    img_url = "https://astrogeology.usgs.gov" + image
    img_title = soup.find("h2", class_="title").text

    # Keep a dictionary for each hemisphere. The dictionary contains the title and the feature image.
    hemis_mars_list.append({"title": img_title, "img_url": img_url})
    browser.back()    

In [16]:
print(hemis_mars_list)


[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]


In [18]:
# Add  all the data collected to Mars dictionary
mars_data["news_title"] = news_title
mars_data["news_paragraph"] = news_paragraph
mars_data["featured_image_url"] = featured_image_url
mars_data["mars_weather"] = mars_weather
mars_data["mars_hemis"] = hemis_mars_list
mars_data["mars_facts"] = mars_table_html


NameError: name 'mars_table_html' is not defined

In [19]:
print (mars_data["news_title"])
print (mars_data["news_paragraph"])
print (mars_data["featured_image_url"])
print (mars_data["mars_weather"])
print (mars_data["mars_facts"])
print (mars_data["mars_hemis"])



NASA Invests in Visionary Technology 



NASA is investing in technology concepts, including several from JPL, that may one day be used for future space exploration missions.

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA20063_ip.jpg
RT @rtphokie: Narrowing of the forecast continues for #Tiangong1, now predicted to reenter sometime between Saturday 7:15 am and Sunday 11:…


KeyError: 'mars_facts'