In [1]:
#import dependencies
from bs4 import BeautifulSoup
from selenium import webdriver
from splinter import Browser
import pandas as pd
import requests
import time
import re

# 1. NASA MARS NEWS

In [2]:
#target news url
news_url = 'https://mars.nasa.gov/news'

In [3]:
#use webdriver to load page with dynamic JS before scraping
news_driver = webdriver.Chrome()
news_driver.get(news_url)
time.sleep(1)

In [4]:
#load html
news_soup = BeautifulSoup(news_driver.page_source)

In [5]:
#find first headline + article summary
news_headline = news_soup.find('div', class_='content_title')
news_teaser = news_soup.find('div', class_='article_teaser_body')

news_title = news_headline.text
news_p = news_teaser.text

print(news_title)
print(news_p)

NASA's Treasure Map for Water Ice on Mars
A new study identifies frozen water just below the Martian surface, where astronauts could easily dig it up.


# 2. JPL MARS IMAGES

In [6]:
#target jpl image page
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

In [7]:
#setup chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [8]:
#visit url
browser.visit(jpl_url)

In [9]:
#push full image button
jpl_button = browser.find_by_id('full_image')
jpl_button.click()

In [10]:
#soup the html
jpl_soup = BeautifulSoup(browser.html, 'html.parser')

In [11]:
#isolate the image code
jpl_image = jpl_soup.find('article', class_='carousel_item')

In [12]:
#isolate the tag containing the full-sized image location
jpl_fullimage = jpl_image['style']

In [13]:
#use regex to parse the tag string
jpl_regex = re.search( 'spaceimages/images/wallpaper/\w+-\d+\w\d+.jpg', jpl_fullimage)

In [14]:
#final - image location
featured_image_url = f'http://jpl.nasa.gov/{jpl_regex.group()}'
print(featured_image_url)

http://jpl.nasa.gov/spaceimages/images/wallpaper/PIA16842-1920x1200.jpg


# 3. MARS WEATHER

In [15]:
#set target url
weather_url = 'https://twitter.com/marswxreport'

In [16]:
#request html
weather_request = requests.get(weather_url)

In [17]:
#parse through HTML with bs
weather_soup = BeautifulSoup(weather_request.text, 'html.parser')

In [18]:
#find latest tweet
mars_weather = weather_soup.find('p', class_='TweetTextSize').text

In [19]:
#confirmation
print(mars_weather)

InSight sol 372 (2019-12-13) low -97.2ºC (-143.0ºF) high -21.2ºC (-6.2ºF)
winds from the SSE at 5.9 m/s (13.3 mph) gusting to 20.2 m/s (45.2 mph)
pressure at 6.60 hPapic.twitter.com/SXXKNyUaJu


# 4. MARS FACTS

In [20]:
#set target url
facts_url = 'https://space-facts.com/mars/'

In [21]:
#request html
facts_request = requests.get(facts_url)

In [22]:
facts_soup = BeautifulSoup(facts_request.text, 'html.parser')

In [23]:
facts_table = facts_soup.find('table')

In [24]:
facts_list = pd.read_html(str(facts_table))
print(facts_list)

[                      0                              1
0  Equatorial Diameter:                       6,792 km
1       Polar Diameter:                       6,752 km
2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
3                Moons:            2 (Phobos & Deimos)
4       Orbit Distance:       227,943,824 km (1.38 AU)
5         Orbit Period:           687 days (1.9 years)
6  Surface Temperature:                   -87 to -5 °C
7         First Record:              2nd millennium BC
8          Recorded By:           Egyptian astronomers]


# 5. MARS HEMISPHERES

In [25]:
#target hemisphere url
hemi_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

In [26]:
#request the hemi HTML and use bs to parse it
hemi_request = requests.get(hemi_url)
hemi_soup = BeautifulSoup(hemi_request.text, 'html.parser')

In [27]:
#find each link leading to the four hemisphere pages
hemi_links = hemi_soup.find_all('a', class_='product-item')

In [28]:
#holder for scrape of anchor text containing hemisphere names
hemi_names = []

#loop through anchors for text
for x in range(4):
    hemi_split = hemi_links[x].text.split()
    hemi_holder = []
    
    #separate "enhanced" from hemisphere names
    for y in range(len(hemi_split)):
        if hemi_split[y] != "Enhanced":
            hemi_holder.append(hemi_split[y])
            
    hemi_names.append(" ".join(hemi_holder))

print(hemi_names)

['Cerberus Hemisphere', 'Schiaparelli Hemisphere', 'Syrtis Major Hemisphere', 'Valles Marineris Hemisphere']


In [29]:
#holder for image urls
hemi_images = []

#loop through individual pages and save url names
for j in range(4):
    d_url = "https://astrogeology.usgs.gov" + hemi_links[j]['href']
    d_request = requests.get(d_url)
    d_soup = BeautifulSoup(d_request.text, 'html.parser')
    d_link = d_soup.find('a', text="Original")
    hemi_images.append(d_link['href'])
    #open 
    browser.visit(d_link['href'])
    print(d_link['href'])

http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif


In [30]:
#combine hemi names with image urls
hemi_dict = []

for z in range(4):
    hemi_dict.append({'title': hemi_names[z], 'img_url': hemi_images[z]})
    print({'title': hemi_names[z], 'img_url': hemi_images[z]})

{'title': 'Cerberus Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'}
{'title': 'Schiaparelli Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'}
{'title': 'Syrtis Major Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'}
{'title': 'Valles Marineris Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}
