# 0. PRE-WORK

In [1]:
#import dependencies
from bs4 import BeautifulSoup
from selenium import webdriver
from splinter import Browser
import pandas as pd
import requests
import time
import re

# 1. NASA MARS NEWS
The following module of code will open NASA's Mars news site, and scrape its code for the latest Mars headline and summary text.

In [2]:
#target news url
news_url = 'https://mars.nasa.gov/news'

In [3]:
#use webdriver to load page with dynamic JS before scraping
news_driver = webdriver.Chrome()
news_driver.get(news_url)

#sleep for one interval for JS to load before scrape
time.sleep(1)

In [4]:
#scrape the HTML with BS
news_soup = BeautifulSoup(news_driver.page_source)

In [5]:
#quit browser after Soup is saved
news_driver.quit()

In [12]:
#find first headline + article summary
news_headline = news_soup.find('div', class_='content_title')
news_teaser = news_soup.find('div', class_='article_teaser_body')

#save contents as specified variable
news_title = news_headline.text
news_p = news_teaser.text

#confirmation
print("LATEST MARS HEADLINE + SUMMARY:")
print('-' * 50)
print("- " + news_title)
print("- " + news_p)

LATEST MARS HEADLINE + SUMMARY:
--------------------------------------------------
- NASA's Treasure Map for Water Ice on Mars
- A new study identifies frozen water just below the Martian surface, where astronauts could easily dig it up.


# 2. JPL MARS IMAGES
The following code module will visit the JPL space images site and scrape the location of the featured headline image.

In [13]:
#target jpl image page
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

In [14]:
#setup chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [15]:
#visit url
browser.visit(jpl_url)

In [16]:
#push full image button
jpl_button = browser.find_by_id('full_image')
jpl_button.click()

In [17]:
#soup the html
jpl_soup = BeautifulSoup(browser.html, 'html.parser')

In [18]:
#close browser after scraping HTML
browser.quit()

In [19]:
#isolate the image code
jpl_image = jpl_soup.find('article', class_='carousel_item')

In [20]:
#isolate the tag containing the full-sized image location
jpl_fullimage = jpl_image['style']

In [21]:
#use regex to parse the tag string
jpl_regex = re.search( 'spaceimages/images/wallpaper/\w+-\d+\w\d+.jpg', jpl_fullimage)

In [22]:
#final - image location confirmation
featured_image_url = f'http://jpl.nasa.gov/{jpl_regex.group()}'
print(featured_image_url)

http://jpl.nasa.gov/spaceimages/images/wallpaper/PIA20057-1920x1200.jpg


# 3. MARS WEATHER
The following code module will scrape the latest weather update tweet from the @marswxreport account.

In [23]:
#set target url
weather_url = 'https://twitter.com/marswxreport'

In [24]:
#request html
weather_request = requests.get(weather_url)

In [25]:
#parse through HTML with bs
weather_soup = BeautifulSoup(weather_request.text, 'html.parser')

In [36]:
#find latest tweet
mars_weather = weather_soup.find('p', class_='TweetTextSize').text

In [37]:
#use regex to find pic.twitter.com url
tweet_tail = re.search('pic.twitter.com/\w+', mars_weather)

In [38]:
#remove tweet tail for final string
mars_tweet = mars_weather.replace(tweet_tail.group(), "")

In [39]:
#confirmation
print(mars_tweet)

InSight sol 373 (2019-12-14) low -98.1ºC (-144.6ºF) high -20.1ºC (-4.1ºF)
winds from the SW at 4.9 m/s (11.1 mph) gusting to 19.7 m/s (44.0 mph)
pressure at 6.60 hPa


# 4. MARS FACTS

In [40]:
#set target url
facts_url = 'https://space-facts.com/mars/'

In [41]:
#request html
facts_request = requests.get(facts_url)

In [42]:
#extract the html with BS
facts_soup = BeautifulSoup(facts_request.text, 'html.parser')

In [43]:
#isolate the facts table
facts_table = facts_soup.find('table')

In [44]:
#save table to  list
facts_list = pd.read_html(str(facts_table))
print(facts_list)

[                      0                              1
0  Equatorial Diameter:                       6,792 km
1       Polar Diameter:                       6,752 km
2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
3                Moons:            2 (Phobos & Deimos)
4       Orbit Distance:       227,943,824 km (1.38 AU)
5         Orbit Period:           687 days (1.9 years)
6  Surface Temperature:                   -87 to -5 °C
7         First Record:              2nd millennium BC
8          Recorded By:           Egyptian astronomers]


# 5. MARS HEMISPHERES

In [None]:
#target hemisphere url
hemi_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

In [None]:
#request the hemi HTML and use bs to parse it
hemi_request = requests.get(hemi_url)
hemi_soup = BeautifulSoup(hemi_request.text, 'html.parser')

In [None]:
#find each link leading to the four hemisphere pages
hemi_links = hemi_soup.find_all('a', class_='product-item')

In [None]:
#holder for scrape of anchor text containing hemisphere names
hemi_names = []

#loop through anchors for text
for x in range(4):
    hemi_split = hemi_links[x].text.split()
    hemi_holder = []
    
    #separate "enhanced" from hemisphere names
    for y in range(len(hemi_split)):
        if hemi_split[y] != "Enhanced":
            hemi_holder.append(hemi_split[y])
            
    hemi_names.append(" ".join(hemi_holder))

print(hemi_names)

In [None]:
#holder for image urls
hemi_images = []

#loop through individual pages and save url names
for j in range(4):
    d_url = "https://astrogeology.usgs.gov" + hemi_links[j]['href']
    d_request = requests.get(d_url)
    d_soup = BeautifulSoup(d_request.text, 'html.parser')
    d_link = d_soup.find('a', text="Sample")
    hemi_images.append(d_link['href'])

In [None]:
for k in range(4):
    h_url = hemi_images[k]
    r = requests.get(h_url)
 
    with open(hemi_names[k] + ".jpg","wb") as f:
            f.write(r.content)

In [None]:
#combine hemi names with image urls
hemi_dict = []

for z in range(4):
    hemi_dict.append({'title': hemi_names[z], 'img_url': hemi_images[z]})
    print({'title': hemi_names[z], 'img_url': hemi_images[z]})