# Part One: Scrape NASA

Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text. Assign the text to variables to reference later.

In [1]:
#Import dependencies
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests
import pymongo
from splinter import Browser
import time

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.mars_db
collection = db.news

In [4]:
#Confirm db connection
mars_news = db.news.find()
for i in mars_news:
    print(i)

{'_id': ObjectId('5f57bf9b21d5d334336b3609'), 'test': 'news'}


In [7]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = bs(response.text, 'lxml')

In [8]:
#Find first headline to collect latest news title and paragraph text.
headlines = soup.find_all('div', class_='content_title')
latest_news_title = headlines[0].text
latest_news_title = latest_news_title.strip()
latest_news_title

"NASA Readies Perseverance Mars Rover's Earthly Twin"

In [9]:
##Find paragraph text of first article
paragraph = soup.find_all('div', class_='image_and_description_container')
#paragraph_latest_news = 
first_article = paragraph[0]
first_article_description = first_article.find('div', class_='rollover_description_inner')
first_article_description = first_article_description.text.strip()
first_article_description

"Did you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape."

In [12]:
#Create dict of mars news? 
mars_news_dict = {
    "latest_news_title": latest_news_title,
    "latest_news_paragraph": first_article_description
}
#Insert into db.news
db.news.insert_one(mars_news_dict)

<pymongo.results.InsertOneResult at 0x29818eccbc8>

In [13]:
##Confirm insertion
news = db.news.find()
for i in news:
    print(i)

{'_id': ObjectId('5f57bf9b21d5d334336b3609'), 'test': 'news'}
{'_id': ObjectId('5f57c0bd48e3f4f05cbbd54b'), 'latest_news_title': "NASA Readies Perseverance Mars Rover's Earthly Twin", 'latest_news_paragraph': "Did you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape."}


# Part Two: Scrape JPL for featured image

Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars). Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called `featured_image_url`.

In [18]:
##Set up splinter to visit the URL for JPL featured space image 
executable_path = {'executable_path': r'C:\Program Files\Chromedriver\chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [19]:
##Set up URL to visit using chromedriver
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)
#Click 'full image'
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(5)
#Go to 'more info'
browser.click_link_by_partial_text('more info')

In [20]:
##Find image URL of featured image 
html = browser.html
soup = bs(html, 'html.parser')
featured_image_rel_path = soup.find('figure', class_='lede').a['href']
featured_image_rel_path

'/spaceimages/images/largesize/PIA08813_hires.jpg'

In [21]:
##Append base URL for JPL to featured image
base_url = 'https://www.jpl.nasa.gov'
featured_image_url = base_url + featured_image_rel_path
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA08813_hires.jpg'

In [24]:
##Concatenate base_url and image url
full_url = base_url + featured_image_url
full_url

'https://www.jpl.nasa.govhttps://www.jpl.nasa.gov/spaceimages/images/largesize/PIA08813_hires.jpg'

In [26]:
#Put featured image into dictionary
featured_image_dict = {
    "featured_image_url": full_url
}
#Put into db w/its own collection? 
db.featured_image.insert_one(featured_image_dict)

<pymongo.results.InsertOneResult at 0x29819102f48>

# Part 3: Mars Facts

Visit the Mars Facts webpage [here](https://space-facts.com/mars/) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

In [48]:
##Set up URL to read with pandas
url = 'https://space-facts.com/mars/'

In [51]:
##Read tabular data from page w/pandas
tables = pd.read_html(url)
tables[0]

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [54]:
mars_facts = tables[0]
mars_facts_transposed = mars_facts.transpose()
mars_facts_transposed

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,Equatorial Diameter:,Polar Diameter:,Mass:,Moons:,Orbit Distance:,Orbit Period:,Surface Temperature:,First Record:,Recorded By:
1,"6,792 km","6,752 km",6.39 × 10^23 kg (0.11 Earths),2 (Phobos & Deimos),"227,943,824 km (1.38 AU)",687 days (1.9 years),-87 to -5 °C,2nd millennium BC,Egyptian astronomers


In [55]:
##Convert the data to an HTML table string
mars_facts_transposed_html = mars_facts_transposed.to_html()
mars_facts_transposed_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n      <th>2</th>\n      <th>3</th>\n      <th>4</th>\n      <th>5</th>\n      <th>6</th>\n      <th>7</th>\n      <th>8</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>Polar Diameter:</td>\n      <td>Mass:</td>\n      <td>Moons:</td>\n      <td>Orbit Distance:</td>\n      <td>Orbit Period:</td>\n      <td>Surface Temperature:</td>\n      <td>First Record:</td>\n      <td>Recorded By:</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>6,792 km</td>\n      <td>6,752 km</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n      <td>227,943,824 km (1.38 AU)</td>\n      <td>687 days (1.9 years)</td>\n      <td>-87 to -5 °C</td>\n      <td>2nd millennium BC</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

# Part 4: Mars Hemispheres

Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mars' hemispheres.

### Part 4A--Find Cerberus Image & Title

In [58]:
##Set up scraper for images of Mars' hemispheres, starting w/Cerberus
# URL of page to be scraped
url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = bs(response.text, 'lxml')

In [104]:
##Find title for cerberus hemisphere image
cerberus_title = soup.find('title').text
cerberus_title = cerberus_title.split('|')
cerberus_title = cerberus_title[0]
cerberus_title

'Cerberus Hemisphere Enhanced '

In [93]:
##Find URL for cerberus hemisphere image
cerberus_hemisphere = soup.find_all('div', class_= 'wide-image-wrapper')
cerberus_hemisphere = cerberus_hemisphere[0]
cerberus_hemisphere = cerberus_hemisphere.find_all('img', class_='wide-image')
cerberus_hemisphere = cerberus_hemisphere[0]
cerberus_hemisphere = cerberus_hemisphere['src']
cerberus_hemisphere

'/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'

In [96]:
##Save base URL to save all urls to images 
base_url = 'https://astrogeology.usgs.gov/'
cerberus_full_url = base_url + cerberus_hemisphere
cerberus_full_url

'https://astrogeology.usgs.gov//cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'

In [105]:
#Initialize dictionary to save url image and title
hemisphere_image_urls = [
    {"title": cerberus_title, "img_url": cerberus_full_url}
]

### Part 4B--Find Valles Marineris Hemisphere Image & Title

In [107]:
##Set up scraper for images of Mars' hemispheres, now Marineris
# URL of page to be scraped
url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = bs(response.text, 'lxml')

In [108]:
##Find title for Valles Marineris hemisphere image
valles_title = soup.find('title').text
valles_title = valles_title.split('|')
valles_title = valles_title[0]
valles_title

'Valles Marineris Hemisphere Enhanced '

In [109]:
##Find URL for valles hemisphere image
valles_hemisphere = soup.find_all('div', class_= 'wide-image-wrapper')
valles_hemisphere = valles_hemisphere[0]
valles_hemisphere = valles_hemisphere.find_all('img', class_='wide-image')
valles_hemisphere = valles_hemisphere[0]
valles_hemisphere = valles_hemisphere['src']
valles_hemisphere

'/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'

In [110]:
##Construct full image URL
valles_full_url = base_url + valles_hemisphere
valles_full_url

'https://astrogeology.usgs.gov//cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'

In [112]:
##Add to dictionary
hemisphere_image_urls.append({"title": valles_title, "img_url": valles_full_url})
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced ',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced ',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

### Part 4C--Find Schiaparelli Hemisphere Image & Title

In [113]:
##Set up scraper for images of Mars' hemispheres, now Schiaparelli
# URL of page to be scraped
url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = bs(response.text, 'lxml')

In [114]:
##Find title for Schiaparelli Hemisphere image
schiaparelli_title = soup.find('title').text
schiaparelli_title = schiaparelli_title.split('|')
schiaparelli_title = schiaparelli_title[0]
schiaparelli_title

'Schiaparelli Hemisphere Enhanced '

In [115]:
##Find URL for schiaparelli hemisphere image
schiaparelli_hemisphere = soup.find_all('div', class_= 'wide-image-wrapper')
schiaparelli_hemisphere = schiaparelli_hemisphere[0]
schiaparelli_hemisphere = schiaparelli_hemisphere.find_all('img', class_='wide-image')
schiaparelli_hemisphere = schiaparelli_hemisphere[0]
schiaparelli_hemisphere = schiaparelli_hemisphere['src']
schiaparelli_hemisphere

'/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'

In [116]:
##Construct full image URL
schiaparelli_full_url = base_url + schiaparelli_hemisphere
schiaparelli_full_url

'https://astrogeology.usgs.gov//cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'

In [117]:
##Add to dictionary
hemisphere_image_urls.append({"title": schiaparelli_title, "img_url": schiaparelli_full_url})
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced ',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced ',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced ',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'}]

### Part 4D--Find Syrtis Major Hemisphere Image & Title

In [118]:
##Set up scraper for images of Mars' hemispheres, now Syrtis Major
# URL of page to be scraped
url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = bs(response.text, 'lxml')

In [119]:
##Find title for Syrtis Major Hemisphere image
syrtis_title = soup.find('title').text
syrtis_title = syrtis_title.split('|')
syrtis_title = syrtis_title[0]
syrtis_title

'Syrtis Major Hemisphere Enhanced '

In [120]:
##Find URL for Syrtis Major hemisphere image
syrtis_hemisphere = soup.find_all('div', class_= 'wide-image-wrapper')
syrtis_hemisphere = syrtis_hemisphere[0]
syrtis_hemisphere = syrtis_hemisphere.find_all('img', class_='wide-image')
syrtis_hemisphere = syrtis_hemisphere[0]
syrtis_hemisphere = syrtis_hemisphere['src']
syrtis_hemisphere

'/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'

In [121]:
##Construct full image URL
syrtis_full_url = base_url + syrtis_hemisphere
syrtis_full_url

'https://astrogeology.usgs.gov//cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'

In [122]:
##Add to dictionary
hemisphere_image_urls.append({"title": syrtis_title, "img_url": syrtis_full_url})
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced ',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced ',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced ',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced ',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'}]