# NASA Mars News

Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. Assign the text to variables.

In [39]:
#Import all dependencies
import pandas as pd
import requests
from splinter import Browser
import os
from bs4 import BeautifulSoup as soup
import pymongo

In [40]:
#Establish localhost 
connection = 'mongodb://localhost:27017'

#Connect Mongo to client via connection
client = pymongo.MongoClient(connection)

In [41]:
#Create the database
db = client.mars_news_db

#Create the collection of database items
collection = db.items

In [42]:
#Associate url to variable for easy of calling
nasa_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

#Get the results from the request, store in response variable
nasa_response = requests.get(nasa_url)

In [43]:
#Create lxml text object 
nasa_object = soup(nasa_response.text, 'lxml')

#Store found 'divs' from body in result variable
nasa_result = nasa_object.body.find('div', class_ ="slide")

# print(nasa_result)

In [44]:
#Loop thru result to find title, header and timestamp
article_title = nasa_result.find('div',class_='content_title').text
article_header = nasa_result.find('div',class_='rollover_description_inner').text

# print(article_title)
# print(article_header)

# JPL Mars Space Images

Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.

In [45]:
#Use splinter to navigate to the site:
# 1 -- establish path
executable_path = {'executable_path': '/Users/fullguest/Downloads/chromedriver'}
#(establish browser)
browser = Browser('chrome', **executable_path, headless=False)

#2 -- establish url
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

#3 -- feed the browser the url and command to visit
browser.visit(jpl_url)

#4 -- get html data
jpl_html = browser.html

#5 -- convert using lxml in soup
jpl_soup = BeautifulSoup(jpl_html, 'lxml')

In [46]:
# Find featured image in div class img 
featured_img = jpl_soup.body.find('div', class_="image_and_description_container").find('div', class_="img")

#Extract the footer url of the feature image
img_url_footer = featured_img.img['src']

In [47]:
#Base url to add footer to
jpl_base_url = 'https://www.jpl.nasa.gov'

#Create full url with base + footer
featured_image_url = jpl_base_url + img_url_footer
# print(featured_image_url)

# Mars Weather

Scrape the latest Mars weather tweet from the page.

In [48]:
#Twitter base url
twitter_url = 'https://twitter.com/marswxreport?lang=en'

#Get the twitter text
twitter_response = requests.get(twitter_url)

#Convert into lxml
twitter_soup = BeautifulSoup(twitter_response.text, 'lxml')

# Get the first 10 tweet results
twitter_results = twitter_soup.body.find_all('div',class_='js-tweet-text-container')[0:10]

print(twitter_results)

[<div class="js-tweet-text-container">
<p class="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" data-aria-label-part="0" lang="en">And finally - a stack of all the images - you can see so much detail here <a class="twitter-timeline-link" data-expanded-url="https://dougellison.smugmug.com/Machines/Space-X-SAOCOM-1A/i-zbJzq8L" dir="ltr" href="https://t.co/r6pHbPsqq5" rel="nofollow noopener" target="_blank" title="https://dougellison.smugmug.com/Machines/Space-X-SAOCOM-1A/i-zbJzq8L"><span class="tco-ellipsis"></span><span class="invisible">https://</span><span class="js-display-url">dougellison.smugmug.com/Machines/Space</span><span class="invisible">-X-SAOCOM-1A/i-zbJzq8L</span><span class="tco-ellipsis"><span class="invisible"> </span>…</span></a><a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/9nLwAH3oZQ">pic.twitter.com/9nLwAH3oZQ</a></p>
</div>, <div class="js-tweet-text-container">
<p class="TweetTextSize TweetTextSize--n

In [49]:
# Use for loop to avoid gathering any data that is not weather data
for result in twitter_results:
    tweet_text = result.find('p', class_='js-tweet-text').text
    if tweet_text.startswith("Sol"):
        mars_weather = tweet_text
        break 
print(mars_weather)

Sol 2171 (2018-09-14), high -12C/10F, low -65C/-84F, pressure at 8.79 hPa, daylight 05:43-17:59


# Mars Facts
Use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

In [50]:
#Establish base url
mars_fact_url = 'https://space-facts.com/mars/'

#Use Pandas to read the html first table
mars_tables = pd.read_html(mars_fact_url)[0]
#Get the data from descrip and values columns
mars_tables.columns = ['Description','Value']
#Add/Change the index inplace 
mars_tables.set_index('Description', inplace=True)
#Save the newly indexed tables
mars_tables

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [51]:
#Convert new tables to html
html_table = mars_tables.to_html()
#Replace any '\n' with nothing
html_table = html_table.replace('\n', '')

#Save the table into the html file
with open("mars_fact.html","w") as file:
    file.write(html_table)

# Mars Hemispheres
1. Obtain high resolution images for each of Mar's hemispheres from USGS Astrogeology site.
2. Click each of the links to the hemispheres in order to find the image url to the full resolution image.
3. Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.
4. Append the dictionary with the image url string and the hemisphere title to a list using one dictionary for each hemisphere.

In [52]:
#Establish url
usgs_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

#Visit url via the chrome browser
browser.visit(usgs_url)

#Establish USGS html
usgs_html = browser.html

#Soup with 'lxml'
usgs_soup = BeautifulSoup(usgs_html, 'lxml')

#Assign variable for USGS base url
usgs_base_url = 'https://astrogeology.usgs.gov'


In [53]:
#Find all USGS 'items' in the 'body' 'divs'
usgs_items = usgs_soup.body.find_all('div',class_='item')

#Create list to store results
item_links = []

# USGS item-link function to return the href for each image formatted as a string to add to list of image urls
def GetDownloadLink(url):
    browser.visit(url)
    html = browser.html
    soup = BeautifulSoup(html,'lxml')
    href = soup.body.find('div',class_='downloads').find_all('a')[1]['href']
    print(f"image link: {href}")
    return href

In [54]:
# Loop thru the item links and extract download link
hemis_image_urls = []

for item in usgs_items:
    hemis_dict = {}
    # get the title
    title = item.find('h3').text
    hemis_dict['title'] = title
    # get the image link within the item tab
    href = item.find('a',class_='itemLink product-item')['href']
    item_url = usgs_base_url + href
    img_url = GetDownloadLink(item_url)
    hemis_dict['img_url'] = img_url
    # append dict into list
    hemis_image_urls.append(hemis_dict)

hemis_image_urls

image link: http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif
image link: http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif
image link: http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif
image link: http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif


[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}]