# Web Scraping Homework - Mission to Mars

In [319]:
 # Set dependencies
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
import pandas as pd

# NASA Mars News

In [286]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

In [287]:
# Retrieve page with the requests module
response = requests.get(url)

In [288]:
 # Create BeautifulSoup object; parse with 'html.parser'
soup = bs(response.text, 'html.parser')

In [292]:
# Set HTML path to find the news title
news_title = soup.find('div', class_='content_title').text
news_title = news_title.replace('\n', '')
news_title

'NASA Prepares for Moon and Mars With New Addition to Its Deep Space Network'

In [294]:
# Set HTML path to find the first paragraph
paragraph = soup.find('div', class_="rollover_description_inner").text
paragraph = paragraph.replace('\n', '')
paragraph

'Robotic spacecraft will be able to communicate with the dish using radio waves and lasers.'

# JPL Mars Space Images - Featured Image

In [191]:
# Use Splinter to navigate to and scrape an image of Mars
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [192]:
# Establish base URL for scraping
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [193]:
# Click "Full Image" button on Mars photo site
full_image_button = browser.links.find_by_partial_text('FULL IMAGE').click()

In [194]:
# Click "more info" button on second page
more_info_button = browser.links.find_by_partial_text('more info').click()

In [195]:
# Click JPG link on third page
jpg_link = browser.links.find_by_partial_text('jpg').click()

In [196]:
# Establish third page as where to find full size image link
html = browser.html
soup = bs(html, 'html.parser')

In [197]:
# Search for full size image on third page

full_size_image = soup.find('img')['src']
full_size_image

'https://photojournal.jpl.nasa.gov/jpeg/PIA18273.jpg'

# Mars Weather from Twitter

In [199]:
# URL of page to be scraped
url = 'https://twitter.com/marswxreport?lang=en'

In [201]:
# Retrieve page with the requests module
response = requests.get(url)

In [202]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = bs(response.text, 'html.parser')

In [217]:
# Set HTML path to scrape information inside tweet
mars_weather = soup.find('p', class_='tweet-text').text
mars_weather

'InSight sol 449 (2020-03-01) low -93.6ºC (-136.4ºF) high -11.3ºC (11.7ºF)\nwinds from the SSW at 6.8 m/s (15.1 mph) gusting to 23.9 m/s (53.4 mph)\npressure at 6.30 hPapic.twitter.com/ZcHyd02Y9r'

# Mars Facts

In [506]:
# URL of page to be scraped
url = 'https://space-facts.com/mars/'

In [507]:
mars_facts = pd.read_html(url)

In [301]:
mars_facts_df = mars_facts[0]
mars_facts_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [300]:
new_mars_facts_df = mars_facts_df.rename(columns={"0": "Description", "1": "Value"})
new_mars_facts_df = new_mars_facts_df.set_index('Description')
new_mars_facts_df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [409]:
 new_mars_facts_df.to_html('mars_facts.html')

# Mars Hemispheres

In [508]:
# Use Splinter to navigate to, scrape and download images of the four hemispheres of Mars
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [509]:
# URL of page to be scraped
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [510]:
html = browser.html
soup = bs(html, 'html.parser')

In [512]:
# Click image thumbnail for each hemisphere
mars_hemis_link = browser.find_by_tag('h3').click()
title = soup.find('h2').text

In [518]:
img_url = soup.find('li').a['href']

AttributeError: 'NoneType' object has no attribute 'a'

In [None]:
browser.back()

In [503]:
hemisphere_image_urls  = dict(zip(title, img_url))
hemisphere_image_urls

{'C': 'h',
 'e': 'l',
 'r': '.',
 'b': 'p',
 'u': '/',
 's': 'e',
 ' ': 's',
 'H': 't',
 'm': 'o',
 'i': 'p',
 'p': 'd',
 'h': 'o',
 'E': 't',
 'n': 'e',
 'a': 'g',
 'c': 'o',
 'd': 'o'}