In [63]:
# dependencies and setup
from bs4 import BeautifulSoup as bs
from splinter import Browser
import time
import requests
import pandas as pd


In [53]:
# showing the computer where to find the chromedriver
executablePath = {"executablePath": "~/usr/local/bin/chromedriver.exec"}
browser = Browser("chrome", executable_path, headless=False)

In [54]:
# Visit the NASA website to find the top mars news article
marsUrl = "https://mars.nasa.gov/news/"
browser.visit(marsUrl)
time.sleep(1)
htmlMarsSite = browser.html
time.sleep(1)
# Scrape page into Soup
soup = bs(htmlMarsSite,"html.parser")
time.sleep(1)

In [55]:
# Find the the latest news title and headline text in soup
newsTitle = soup.find("div",class_="content_title").text
time.sleep(1)
newsP = soup.find("div",class_="article_teaser_body").text
time.sleep(1)

In [56]:
# print the variables to check that we're pulling the right things
print(f"The latest title is: {newsTitle}")
print(f"With the description: {newsP}")

The latest title is: Mars Now
With the description: The six-wheeled scientist is heading south to explore Jezero Crater’s lakebed in search of signs of ancient microbial life.


## Image

In [57]:
# open the url and scrape the featured image
url = 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space'
browser.visit(url + '/index.html')
html = browser.html
soup = bs(html, 'html.parser')

# Scrape the featured image location
header = soup.find_all('div', class_='header')
for item in header: 
    featuredImg = item.find('img',class_='headerimage fade-in')['src']
    
# Get the full image url 
featuredImgUrl = url + '/' + featuredImg

# Quit 
browser.quit()

print(featuredImgUrl)

https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/featured/mars1.jpg


## Facts

In [59]:
# Visit the Space Facts website to find Mars facts
marsFactsUrl = "https://space-facts.com/mars/"
marsFacts = pd.read_html(marsFactsUrl)
facts_df = marsFacts[0]
# Create a dataframe and add columns
facts_df.columns = ['Description','Value']
facts_df.to_html(header=False, index=False)
facts_df


Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


## Hemispheres

In [78]:
# Scrape pictures of each hemisphere 
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
response = requests.get(url)
soup = bs(response.text, 'html.parser')

In [79]:
# Navigate to item section 
links = soup.find_all('a', class_='itemLink product-item')

# Create empty list of links 
link_list = []

# Loop through link section and get image info links 
for link in links:
    img_link = link['href']
    
    # Add image info to list 
    link_list.append(img_link)

In [80]:
# Generate full links to image info pages 
base_url = 'https://astrogeology.usgs.gov'

# Empty list for full links 
full_links = []

# Loop through link list and generate full link
for link in link_list:
    full_link = base_url + link
    full_links.append(full_link)

In [81]:
# Empty lists for full-size image links and titles
img_links = []
titles = []

# Loop through each link and scrape title and links to the full size image
for link in full_links:
    response = requests.get(link)
    soup = bs(response.text, 'html.parser')
    
    # Navigate to title 
    title = soup.find('h2', class_='title').text
    titles.append(title)
    
    # Navigate to image link location
    img_link = soup.find('img', class_='wide-image')
    img_link = img_link['src']
    img_links.append(img_link)

In [82]:
# Generate full url link
full_img_links = []
for link in img_links:
    full_link = base_url + link
    full_img_links.append(full_link)

In [83]:
# Create list of dictionaries for each image
keys = ['title', 'img_url']
zip_list= list(zip(titles, full_img_links))
hemispheres = [{k:v for k,v in zip(keys, z)} for z in zip_list]

# Print 
for hemisphere in hemispheres:
    print(hemisphere)

{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'}
{'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'}
{'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'}
{'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}


In [70]:
browser.quit()