In [1]:
# import dependencies
import pandas as pd
import requests
import time
from bs4 import BeautifulSoup as bs
from splinter import Browser
from selenium import webdriver


In [18]:
# Create executable path
executable_path = {'executable_path': 'C:/bin/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# Step 1: Web-Scraping

## NASA Mars News
* Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text
* Assign the text to variables that you can reference later

In [21]:
# URL of page to be scraped
news_url = 'https://mars.nasa.gov/news/'
browser.visit(news_url)
time.sleep(5)
news_html = browser.html

# Parse site with beautifulsoup
news_soup = bs(news_html, 'html.parser')

# Find the latests News Title and paragraph text and save as variable
article = news_soup.find("div", class_='list_text')
news_title = article.find("div", class_="content_title").text
news_p = article.find("div", class_="article_teaser_body").text
print(news_title)
print('---------')
print(news_p)


Virginia Middle School Student Earns Honor of Naming NASA's Next Mars Rover
---------
NASA chose a seventh-grader from Virginia as winner of the agency's "Name the Rover" essay contest. Alexander Mather's entry for "Perseverance" was voted tops among 28,000 entries. 


## JPL Mars Space Images
* From the JPL Featured Space Image site
* Use splinter to navigate the site and find the image url for the current Featured Mars Image
* Assign the url string to a variable called featured_image_url.

In [8]:
jpl_mars_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpl_mars_url)

time.sleep(1)
# Click 'full image' button to go to image
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(1)
# Click 'more info' button to go to full size image
browser.click_link_by_partial_text('more info')

# Parse html via Beautiful Soup
image_html = browser.html
image_soup = bs(image_html, 'html.parser')


In [9]:
#Saving image source
image_path = image_soup.find_all('img', class_="main_image")[0]["src"]
featured_image_url = f'https://www.jpl.nasa.gov{image_path}'

print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA23436_hires.jpg


## Mars Weather
* Vist the Mars Weather twitter account and scrape the latest Mars weather tweet from the page
* Save the tweet text for the weather reprot as a variable called mars_weather

In [10]:
weather_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(weather_url)

time.sleep(8)
weather_html = browser.html

# Parse html with BS
weather_soup = bs(weather_html, 'html.parser')

# Extract tweet
tweet_container = weather_soup.find(attrs={"data-testid": "tweet"})
tweets = tweet_container.text


In [11]:
# Saving tweet as variable
weather_tweets = tweets.split("InSight ")
mars_weather = weather_tweets[1]
print(mars_weather)

sol 453 (2020-03-05) low -95.1ºC (-139.1ºF) high -10.8ºC (12.6ºF)
winds from the SSW at 6.0 m/s (13.3 mph) gusting to 21.4 m/s (47.9 mph)
pressure at 6.30 hPa425


## Mars Facts
* Visit the Mars Facts webpage
* Use Pandas to scrape the table containing facts about the planet including Diameter, Mars, etc. 
* Use Pandas to convert the data to a HTML table string.

In [12]:
facts_url = "https://space-facts.com/mars/"
browser.visit(facts_url)

facts_html = browser.html

# Parser html with BS
facts_soup = bs(facts_html, 'html.parser')

# Extract table
table = pd.read_html(facts_url)
mars_facts = table[0]

In [15]:
mars_facts.columns = ["Fact", "Value"]

# Convert to HTML table string
mars_facts.to_html('table.html', index=False)
mars_facts

Unnamed: 0,Fact,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


## Mars Hemispheres
* Visit the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres. 
* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. 
* Use a Python dictionary to store the data using the keys img_url and title.
* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [16]:
mars_hemi_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(mars_hemi_url)
mars_hemi_html = browser.html

# Parser HTML with BS
mars_hemispheres_soup = bs(mars_hemi_html, 'html.parser')

# Creating list to store data
hemisphere_image_urls = []

# Getting all elements 
results = mars_hemispheres_soup.find_all('div', class_="item")

for result in results:
    # saving image title
    title = result.find('h3').text
    title = title.replace("Enhanced", '')
    
    # navigating for full image and setting up HTML parser
    click_link = result.find('a')['href']
    image_link = "https://astrogeology.usgs.gov" + click_link
    browser.visit(image_link)
    hemi_html = browser.html
    hemi_soup = bs(hemi_html, 'html.parser')
    
    # saving image urls
    links = hemi_soup.find('div', class_="downloads")
    img_url = links.find('a')['href']
    
    # appending list
    hemisphere_image_urls.append({"title": title,
                                 "img_url": img_url})

print(hemisphere_image_urls)


[{'title': 'Cerberus Hemisphere ', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere ', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere ', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere ', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
