# Mission to Mars

In [1]:
# Add dependencies
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests
import urllib.request
from splinter import Browser

## WEB SCRAPING

In [2]:
# NASA Mars News-----------------------------------------------------------------------------------------------------
news_url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
# Collect latest news title and paragraph text

In [3]:
# Access site and get the "soup"
response = requests.get(news_url)
news_soup = bs(response.text, 'html.parser')

In [4]:
# Locate and save title text
titles = news_soup.find_all("div", class_="content_title")
news_title = titles[0].text

In [5]:
# Locate and save paragraph text
paragraphs = news_soup.find_all("div", class_="rollover_description_inner")
news_p = paragraphs[0].text

In [6]:
# JPL Mars-----------------------------------------------------------------------------------------------------------
mars_images_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
# Use splinter to find image url for current featured Mars image and save as featured_image_url
# Full size .jpg image
!which chromedriver

/usr/local/bin/chromedriver


In [7]:
# Define executable path and browser
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [8]:
# Access website on chromedriver
browser.visit(mars_images_url)

In [9]:
# Save url to full size featured jpg image
mars_img_html = browser.html

In [10]:
# Find the main featured image href using splinter -- interact with page
# Click full image button
browser.click_link_by_partial_text('FULL IMAGE')
# Visit page with actual full image
# browser.find_by_xpath('//*[@id="fancybox-lock"]/div/div[2]/div/div[1]/a[2]').click()
# ^^ says button is not interactable
feat_img_html = bs(browser.html, 'html.parser')
long_path = feat_img_html.find('a', class_='button fancybox')['data-fancybox-href']
featured_image_url = 'https://www.jpl.nasa.gov/' + long_path
# Close browser
browser.quit()

In [11]:
# Mars Weather--------------------------------------------------------------------------------------------------------
mars_twitter_url = "https://twitter.com/marswxreport?lang=en"
# Scrape latest Mars weather tweet text as mars_weather

In [12]:
twit_response = requests.get(mars_twitter_url)
twit_soup = bs(twit_response.text, 'html.parser')

In [13]:
# Find all tweet containers
tweets = twit_soup.find_all("div", class_="js-tweet-text-container")
weather_tweets = []
# Filter out tweets that aren't actually weather reports
for tweet in tweets:
    #print(tweet.text)
    if "pressure" in tweet.text:
        weather_tweets.append(tweet)

# Save the most recent weather report tweet
mars_weather = weather_tweets[0].text

In [14]:
# Mars Facts----------------------------------------------------------------------------------------------------------
fact_page_url = "https://space-facts.com/mars/"
# Scrape table containing facts about the planet like diameter, mass, etc.
# Convert data to HTML table string

In [15]:
# Access the page and make the soup
fact_response = requests.get(fact_page_url)
fact_soup = bs(fact_response.text, 'lxml')

In [22]:
# Find the table
table = fact_soup.find('table', attrs={'class':'tablepress-id-comp-mars'})
table_rows = table.find_all('tr')

# Import table as a dataframe
l = []
for tr in table_rows:
    th = tr.find_all('th')
    if len(th) != 0:
        header = [tr.text for tr in th]
    else:
        td = tr.find_all('td')
        row = [tr.text for tr in td]
        l.append(row)
fact_df = pd.DataFrame(l, columns=header)
fact_df

Unnamed: 0,Mars - Earth Comparison,Mars,Earth
0,Diameter:,"6,779 km","12,742 km"
1,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
2,Moons:,2,1
3,Distance from Sun:,"227,943,824 km","149,598,262 km"
4,Length of Year:,687 Earth days,365.24 days
5,Temperature:,-153 to 20 °C,-88 to 58°C


In [17]:
# Convert table to html in pandas
fact_df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars - Earth Comparison</th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Diameter:</td>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Moons:</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Distance from Sun:</td>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Length of Year:</td>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Temperature:</td>\n      <td>-153 to 20 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>\n</table>'

In [18]:
# Mars Hemispheres
astrogeology_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
# obtain high resolution images for each of Mars' hemispheres
# *** YOU WILL NEED TO CLICK EACH OF THE LINKS TO THE HEMISPHERES IN ORDER TO FIND THE IMAGE URL TO THE FULL RESOLUTION IMAGE***
# Save image url for full resolution and hemisphere title in dictionary containing img_url and title


In [19]:
# Each individual link to full size images
# I wasn't sure what link we actually wanted.... and the instructions did not say to use splinter to click
full_img_links = ["https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced",
                 "https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced",
                 "https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced",
                 "https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced"]

hemisphere_image_urls = []
for url in full_img_links:
    hemi_response = requests.get(url)
    hemi_soup = bs(hemi_response.text, 'html.parser')    
    row = {'title': hemi_soup.title.text.split(' Enhanced')[0],
          'img_url': hemi_soup.find('div', class_='downloads').a['href']}
    hemisphere_image_urls.append(row)

## MongoDB and Flask Application

In [20]:
# Use MongoDB with Flask templating to create a new HTML page that displays all of the information scraped
# Convert Jupyter notebook to Python script called scrape_mars.py with a function called scrape that returns one dictionary with all scraped data
# Create route called /scrape that will import your scrape_mars.py and call scrape function
    # Store in Mongo as Python dictionary
# Create root route / that will query Mongo database and pass mars data into HTML template
# Create HTML template file called index.html that will take mars data dictionary and display all th the data in appropriate HTML elements

In [21]:
# SUBMISSION
    # Jupyter Notebook
    # Screenshots of final application
    # Link to your new repository