In [18]:
import pandas as pd
import numpy as np
from splinter import Browser
from bs4 import BeautifulSoup
import requests
import pymongo
import os
import time

# Windows Users

In [19]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# Mac Users

In [3]:
#/usr/local/bin/chromedriver
#executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
#browser = Browser('chrome', **executable_path, headless=False)

#                            *SET UP WEBSITES SCRAPING*

# NASA Mars News Site 

In [4]:
#NASA website via Splinter
url_nasa = 'https://mars.nasa.gov/news/'
browser.visit(url_nasa)

In [5]:
#HTML Object
html_nasa = browser.html
#Parsing HTML with Beautiful Soup
soup = BeautifulSoup(html_nasa, 'html.parser')

#Set up news_title scrape
news_title = soup.find('div',class_='content_title').find('a').text #goes two levels deep
news_p = soup.find('div', class_='article_teaser_body').text


#Display scrapped info
print(news_title)
print(news_p)

NASA's MRO Completes 60,000 Trips Around Mars
The orbiting spacecraft is also about to set a record for data relayed from the Martian surface.


# JPL Mars Space Images - Featured Image

In [20]:
#JPL NASA website for images via Splinter
url_jpl = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url_jpl)

In [21]:
#HTML Object for the image
html_jpl = browser.html
#Parsing HTML with Beautiful Soup
mars_image = BeautifulSoup(html_jpl, 'html.parser')

#Set up image scrape
image_url = mars_image.find('a', class_='button fancybox')['data-fancybox-href']
#print(image_url)

main_url_jpl = 'https://www.jpl.nasa.gov'

featured_image_url=main_url_jpl + image_url

print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA22893_ip.jpg


# Mars Weather with Twitter

In [8]:
#Mars Weather Tweeter page for temperature via Splinter
url_mars_tweet = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url_mars_tweet)

In [9]:
#HTML Object for the image
html_mars_tweet = browser.html

#Parsing HTML with Beautiful Soup
soup = BeautifulSoup(html_mars_tweet, 'html.parser')

#Set up temperature scrape
tweet_url = soup.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text').text
print(tweet_url)

InSight sol 169 (2019-05-18) low -100.6ºC (-149.1ºF) high -17.6ºC (0.4ºF)
winds from the S at 4.6 m/s (10.2 mph) gusting to 15.5 m/s (34.7 mph)
pressure at 7.50 hPapic.twitter.com/QKbNMc35Ia


# Mars Facts

In [10]:
#Set up url
url_mars_facts = 'https://space-facts.com/mars/'

In [11]:
#Scrape tabular data from url above 
mars_facts = pd.read_html(url_mars_facts)
mars_facts

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [12]:
#Create DataFrame
mars_facts_df = mars_facts[0]

#Rename columns from '0',''1 to 'Description' and 'Info'
mars_facts_df.columns= ['Description', 'Info']

#Set Index to be 'Description'
mars_facts_df.set_index('Description', inplace=True)

mars_facts_df

Unnamed: 0_level_0,Info
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [13]:
#Generate HTML table from DataFrame
html_table = mars_facts_df.to_html()
#Clean HTML data for unwanted newlines
html_table.replace('\n', '')
#Save table to a file
mars_facts_df.to_html('mars_facts_table.html')

html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Info</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\

# Mars Hemispheres

In [14]:
#Hemispheres website via Splinter
url_hemispheres = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url_hemispheres)

In [15]:
#HTML Object
html_hemispheres = browser.html
#Parsing HTML with Beautiful Soup
soup = BeautifulSoup(html_hemispheres, 'html.parser')

#Get items that have hemispheres info
hemispheres = soup.find_all('div', class_='item')

#Set up empty list for hemisphere url info
hemisphere_image_urls = []

#Main link
main_url_hemisphere = 'https://astrogeology.usgs.gov/'

#Set up a loop through all the hemispheres info
for hemisphere in hemispheres:
    
    #Scrap titles
    title = hemisphere.find('h3').text
    
    #Get url for each image
    image_link = hemisphere.find('a', class_="itemLink product-item")['href']
    
    #Combine image_url and main url
    image_url = main_url_hemisphere + image_link
    
    #Click into link for each image via Splinter function to get to jpg link
    browser.visit(image_url)
    
    #HTML Object
    html_full_image = browser.html
    
    #Parse a specific hemisphere information website 
    soup = BeautifulSoup(html_full_image, 'html.parser')
    
    #Get jpg link for each image
    image_jpg = soup.find('img', class_='wide-image')['src']
    
    #Combine main url with jpg link
    image_url_jpg = main_url_hemisphere + image_jpg
    
    #Append the empty list
    hemisphere_image_urls.append({'title': title,"image_url_png":image_url_jpg})
    
    

In [16]:
#Show contents of list
hemisphere_image_urls


[{'title': 'Cerberus Hemisphere Enhanced',
  'image_url_png': 'https://astrogeology.usgs.gov//cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'image_url_png': 'https://astrogeology.usgs.gov//cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'image_url_png': 'https://astrogeology.usgs.gov//cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'image_url_png': 'https://astrogeology.usgs.gov//cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]