# 0. PRE-WORK

In [1]:
#import dependencies
from bs4 import BeautifulSoup
from selenium import webdriver
from splinter import Browser
import pandas as pd
import requests
import time
import re

# 1. NASA MARS NEWS
The following module of code will open NASA's Mars news site, and scrape its code for the latest Mars headline and summary text.

In [2]:
#target news url
news_url = 'https://mars.nasa.gov/news'

In [3]:
#use webdriver to load page with dynamic JS before scraping
news_driver = webdriver.Chrome()
news_driver.get(news_url)

#sleep for one interval for JS to load before scrape
time.sleep(1)

In [4]:
#scrape the HTML with BS
news_soup = BeautifulSoup(news_driver.page_source)

In [5]:
#quit browser after Soup is saved
news_driver.quit()

In [6]:
#find first headline + article summary
news_headline = news_soup.find('div', class_='content_title')
news_teaser = news_soup.find('div', class_='article_teaser_body')

#save contents as specified variable
news_title = news_headline.text
news_p = news_teaser.text

#confirmation
print("LATEST MARS HEADLINE + SUMMARY:")
print('-' * 50)
print("- " + news_title)
print("- " + news_p)

LATEST MARS HEADLINE + SUMMARY:
--------------------------------------------------
- NASA's Treasure Map for Water Ice on Mars
- A new study identifies frozen water just below the Martian surface, where astronauts could easily dig it up.


# 2. JPL MARS IMAGES
The following code module will visit the JPL space images site and scrape the location of the featured headline image.

In [7]:
#target jpl image page
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

In [8]:
#setup chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [9]:
#visit url
browser.visit(jpl_url)

In [10]:
#push full image button
jpl_button = browser.find_by_id('full_image')
jpl_button.click()

In [11]:
#soup the html
jpl_soup = BeautifulSoup(browser.html, 'html.parser')

In [12]:
#close browser after scraping HTML
browser.quit()

In [13]:
#isolate the image code
jpl_image = jpl_soup.find('article', class_='carousel_item')

In [14]:
#isolate the tag containing the full-sized image location
jpl_fullimage = jpl_image['style']

In [15]:
#use regex to parse the tag string
jpl_regex = re.search( 'spaceimages/images/wallpaper/\w+-\d+\w\d+.jpg', jpl_fullimage)

In [16]:
#final - image location confirmation
featured_image_url = f'http://jpl.nasa.gov/{jpl_regex.group()}'
print(featured_image_url)

http://jpl.nasa.gov/spaceimages/images/wallpaper/PIA18249-1920x1200.jpg


# 3. MARS WEATHER
The following code module will scrape the latest weather update tweet from the @marswxreport account.

In [17]:
#set target url
weather_url = 'https://twitter.com/marswxreport'

In [18]:
#request html
weather_request = requests.get(weather_url)

In [19]:
#parse through HTML with bs
weather_soup = BeautifulSoup(weather_request.text, 'html.parser')

In [20]:
#find latest tweet
mars_weather = weather_soup.find('p', class_='TweetTextSize').text

In [21]:
#use regex to find pic.twitter.com url
tweet_tail = re.search('pic.twitter.com/\w+', mars_weather)

In [22]:
#remove tweet tail for final string
mars_tweet = mars_weather.replace(tweet_tail.group(), "")

In [23]:
#confirmation
print(mars_tweet)

InSight sol 374 (2019-12-15) low -97.3ºC (-143.1ºF) high -21.2ºC (-6.1ºF)
winds from the SSE at 5.6 m/s (12.5 mph) gusting to 22.0 m/s (49.1 mph)
pressure at 6.50 hPa


# 4. MARS FACTS
The following code module will scrape a table of Mars facts from the target website.

In [24]:
#set target url
facts_url = 'https://space-facts.com/mars/'

In [25]:
#read table via panda
facts_read = pd.read_html(facts_url)

In [26]:
#convert to HTML
facts_table = facts_read[0].to_html()
print(facts_table)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>0</th>
      <th>1</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>2</th>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>6</th>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>7</th>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>8</th>
      <td>Recorded

# 5. MARS HEMISPHERES
The following code module will extract four images of the Mars hemispheres from the target website.

In [27]:
#target hemisphere url
hemi_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

In [28]:
#request the hemi HTML and use bs to parse it

hemi_driver = webdriver.Chrome()
hemi_driver.get(hemi_url)

#sleep for one interval for JS to load before scrape
time.sleep(1)

In [29]:
hemi_soup = BeautifulSoup(hemi_driver.page_source)

In [30]:
#find each link leading to the four hemisphere pages
hemi_links = hemi_soup.find_all('a', class_='product-item')

In [31]:
#holder for scrape of anchor text containing hemisphere names
hemi_names = []

#loop through anchors for text
for x in range(len(hemi_links)):
    if x%2 == 1:
        hemi_split = hemi_links[x].text.split()
        
        hemi_holder = []
        
        for y in hemi_split:
            if y != "Enhanced":
                hemi_holder.append(y)
        
        hemi_names.append(" ".join(hemi_holder))

In [32]:
#holder for image urls
hemi_images = []

#loop through individual pages and save url names
for j in range(4):
    d_url = "https://astrogeology.usgs.gov" + hemi_links[j]['href']
    d_request = hemi_driver.get(d_url)
    d_soup = BeautifulSoup(hemi_driver.page_source)
    d_link = d_soup.find('a', text="Sample")
    hemi_images.append(d_link['href'])

In [33]:
hemi_driver.quit()

In [34]:
#combine hemi names with image urls
hemi_dict = []

for z in range(4):
    hemi_dict.append({'title': hemi_names[z], 'img_url': hemi_images[z]})
    print({'title': hemi_names[z], 'img_url': hemi_images[z]})

{'title': 'Cerberus Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}
{'title': 'Schiaparelli Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}
{'title': 'Syrtis Major Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}
{'title': 'Valles Marineris Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}


In [36]:
#create master dictionary

content = {
    "news_headline": news_title,
    "news_summary": news_p,
    "featured_image": featured_image_url,
    "weather": mars_tweet,
    "facts": facts_table,
    "hemi_n": hemi_dict,
    "hemi_i": hemi_images
}

In [37]:
content

{'news_headline': "NASA's Treasure Map for Water Ice on Mars",
 'news_summary': 'A new study identifies frozen water just below the Martian surface, where astronauts could easily dig it up.',
 'featured_image': 'http://jpl.nasa.gov/spaceimages/images/wallpaper/PIA18249-1920x1200.jpg',
 'weather': 'InSight sol 374 (2019-12-15) low -97.3ºC (-143.1ºF) high -21.2ºC (-6.1ºF)\nwinds from the SSE at 5.6 m/s (12.5 mph) gusting to 22.0 m/s (49.1 mph)\npressure at 6.50 hPa',
 'facts': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moon