In [1]:
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from time import sleep
from bs4 import BeautifulSoup
import requests
import pandas as pd

# Step 1 - Scraping

## NASA Mars News

In [3]:
# regular scrapping attempted, but html pulled is not the same as the one displayed
# Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# URL of page to be scraped
mars_news_url = 'https://mars.nasa.gov/news/'

# initialize
nasa_mars_news = []

browser.visit(mars_news_url)

html = browser.html
soup_result = BeautifulSoup(html, 'html.parser')

# Retrieve the parent divs for all articles
result = soup_result.find(class_='slide')
# print(results)
news_title = result.find(class_='content_title').text
news_p = result.find(class_='article_teaser_body').text
nmn = {
    "news_title":   news_title,
    "news_p": news_p,
}
nasa_mars_news.append(nmn)
browser.quit()
nasa_mars_news

[{'news_title': 'InSight Captures Sunrise and Sunset on Mars',
  'news_p': "InSight joins the rest of NASA's Red Planet surface missions, all of which have photographed either the start or end of a Martian day."}]

## JPL Mars Space Images - Featured Image

In [4]:
# Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=True)

# first page
# JPL Featured Space Image
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

browser.visit(url)

browser.click_link_by_partial_text('FULL IMAGE')
sleep(5)

In [5]:
# second page
browser.click_link_by_partial_text('more info')

In [6]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

image_urls = soup.find_all('div', class_='download_tiff')

for image_url in image_urls:
    if "JPG" in image_url.text:
        featured_image_url = image_url.find('a')['href']
    else:
        continue 

featured_image_url =  'https:'+ featured_image_url
print(featured_image_url)

https://photojournal.jpl.nasa.gov/jpeg/PIA17924.jpg


In [7]:
# close browser
browser.quit()

## Mars Weather

In [8]:
# URL of page to be scraped
# Mars Weather twitter account
mars_weather_twitter_url = 'https://twitter.com/marswxreport?lang=en'

# Retrieve page with the requests module
response = requests.get(mars_weather_twitter_url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

# Retrieve the parent divs for all articles
results = soup.find_all(class_='js-tweet-text-container',limit=1)
# print(results)
# Loop through results
for result in results:
    mars_weather = result.find(class_='TweetTextSize').text
    time_link = result.find(class_='twitter-timeline-link').text

mars_weather = mars_weather.split(time_link)[0]
print(mars_weather)

InSight sol 153 (2019-05-02) low -98.5ºC (-145.3ºF) high -17.5ºC (0.6ºF)
winds from the SW at 4.7 m/s (10.5 mph) gusting to 11.9 m/s (26.6 mph)
pressure at 7.40 hPa


## Mars Facts

In [2]:
# Mars Facts webpage
mars_facts_url = 'http://space-facts.com/mars/'
# Use Panda's `read_html` to parse the url

tables = pd.read_html(mars_facts_url)[0]
tables.columns = ['Description', 'Facts']
tables

Unnamed: 0,Description,Facts
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [17]:
mars_facts = []
for r in tables.iterrows():
    mf = {
    "Description":  r[1]['Description'],
    "Facts": r[1]['Facts'],
    }
    mars_facts.append(mf)
mars_facts    

[{'Description': 'Equatorial Diameter:', 'Facts': '6,792 km'},
 {'Description': 'Polar Diameter:', 'Facts': '6,752 km'},
 {'Description': 'Mass:', 'Facts': '6.42 x 10^23 kg (10.7% Earth)'},
 {'Description': 'Moons:', 'Facts': '2 (Phobos & Deimos)'},
 {'Description': 'Orbit Distance:', 'Facts': '227,943,824 km (1.52 AU)'},
 {'Description': 'Orbit Period:', 'Facts': '687 days (1.9 years)'},
 {'Description': 'Surface Temperature:', 'Facts': '-153 to 20 °C'},
 {'Description': 'First Record:', 'Facts': '2nd millennium BC'},
 {'Description': 'Recorded By:', 'Facts': 'Egyptian astronomers'}]

## Mars Hemispheres

In [10]:
# USGS Astrogeology site 
astrogeology_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# initialize
hemisphere_image_urls = []

# Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(astrogeology_url)

# Retrieve page with the requests module
response = requests.get(astrogeology_url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

# Retrieve the parent divs for all articles
hemisphere_results = soup.find_all(class_='itemLink')
# print(results)
# Loop through results
for result in hemisphere_results:
    title = result.find(class_='description').text
    browser.click_link_by_partial_text(title)
    html = browser.html
    soup_result = BeautifulSoup(html, 'html.parser')
    image_urls = soup_result.find(class_='content').find(class_='block').find('dl').find('dd').find_next('dd')
    for image_url in image_urls:
        try:
            img_url = image_url ['href']
            img_url_jpg = img_url +'/full.jpg'
            hiu = {
                "title":   title,
                "img_url": img_url_jpg,
            }
            hemisphere_image_urls.append(hiu)
        except:
            continue
#   always go back to the initial url window
    browser.back()
# close browser 
browser.quit()

In [11]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

# Step 2 - MongoDB and Flask Application