In [1]:
# Dependencies
import requests
from bs4 import BeautifulSoup
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
import pandas as pd
import selenium.webdriver as webdriver
import selenium.webdriver.support.ui as ui
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from time import sleep 
from flask import Flask, jsonify, render_template

## Scraping News

In [2]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
# URL of page to be scraped

In [3]:
url_news = "https://mars.nasa.gov/news/"
browser.visit(url_news)
html_news = browser.html
# Create BeautifulSoup object; parse with 'html.parser'
soup_news = BeautifulSoup(html_news, 'html.parser')

In [5]:
# Retrieve the latest element that contains news title and news_paragraph
news_title = soup_news.find('div', class_='content_title').find('a').text
news_p = soup_news.find('div', class_='article_teaser_body').text
browser.quit()
mars_news = {"title":news_title, "p": news_p}
# Display scrapped data
print(news_title)
print(news_p)

NASA's MAVEN Explores Mars to Understand Radio Interference at Earth
NASA’s MAVEN spacecraft has discovered “layers” and “rifts” in the electrically charged part of the upper atmosphere of Mars.


## Scraping Images

In [6]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [7]:
# URL of page to be scraped
url_img = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url_img)

In [8]:
# Scraping full size image, image title
html_img = browser.html
soup_img = BeautifulSoup(html_img, 'html.parser')

img_grid = soup_img.find('article', class_='carousel_item')
img_alt =  img_grid.h1.text.strip()
feat_img = img_grid.a['data-fancybox-href']
feat_img = "https://www.jpl.nasa.gov" + feat_img

featured_img = {'img_alt':img_alt, 'url':feat_img}
browser.quit()

In [9]:
featured_img

{'img_alt': 'A Splendor Seldom Seen',
 'url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA14934_ip.jpg'}

## Scraping Weather

In [10]:
# URL of page to be scraped
url_weather = 'https://twitter.com/marswxreport?lang=en'

# Retrieve page with the requests module
response_weather = requests.get(url_weather)

# Create BeautifulSoup object; parse with 'lxml'
soup_weather = BeautifulSoup(response_weather.text, 'lxml')


In [11]:
weather_p = soup_weather.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text').text.strip()
weather_p = weather_p.split(" hPapic")[0]
weather_p = weather_p.replace("\n", ", ")

In [12]:
weather_p

'InSight sol 424 (2020-02-04) low -91.9ºC (-133.4ºF) high -12.7ºC (9.2ºF), winds from the SSE at 5.7 m/s (12.7 mph) gusting to 22.9 m/s (51.1 mph), pressure at 6.30'

## Scraping Facts

In [13]:
# URL of page to be scraped
url_facts = 'https://space-facts.com/mars/'

tables_facts = pd.read_html(url_facts)

In [14]:
table_mars = pd.DataFrame(tables_facts[0])
table_mars.columns = ["Data", "Value"]


In [15]:
mars_html_table = table_mars.style.hide_index()
mars_html_table = table_mars.to_html(index=False)

In [16]:
mars_html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th>Data</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

## Scraping Mars Hemispheres

In [17]:
# Scraping title, name, image url

browser = webdriver.Chrome()
browser.get('https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars')
all_elements = ui.WebDriverWait(browser, 15).until(lambda browser: browser.find_elements_by_class_name('item'))

hemis_title = []
hemis_img = []
main_window = browser.current_window_handle

for element in all_elements:
    hemis_name = element.find_element_by_tag_name("h3").text
    hemis_name = hemis_name.split(" Enhanced")[0]
    hemis_title.append(hemis_name) 
    
    first_link = element.find_element_by_tag_name('a')

    # Open the link in a new tab by sending key strokes on the element
    first_link.send_keys(Keys.CONTROL + Keys.SHIFT + Keys.RETURN)

    # Switch tab to the new tab
    handle = browser.window_handles[-1]
    
    try:     
        browser.switch_to.window(handle)

        link = browser.find_element_by_class_name('downloads')
        hemis_href = link.find_element_by_tag_name('a').get_attribute('href')
        hemis_img.append(hemis_href)

    except:
        print(f"Error on tab {handle}")
        
    browser.switch_to.window(main_window)
        
# webdriver.quit()   
# create a list of dictionaries
hemisphere_image_urls = []
count = 0
for image in hemis_img:
    img_dic= {}
    img_dic["title"] = hemis_title[count]
    img_dic["img_url"] = image
    hemisphere_image_urls.append(img_dic)
    count = count+1
browser.quit()  

In [18]:
## Create variable with all the data.

In [21]:
mars = {
    "mars_news": mars_news,
    "featured_img": featured_img,
    "mars_weather" : weather_p,
    "mars_facts": mars_html_table,
    "hemisphere": hemisphere_image_urls}
mars

{'mars_news': {'title': "NASA's MAVEN Explores Mars to Understand Radio Interference at Earth",
  'p': 'NASA’s MAVEN spacecraft has discovered “layers” and “rifts” in the electrically charged part of the upper atmosphere of Mars.'},
 'featured_img': {'img_alt': 'A Splendor Seldom Seen',
  'url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA14934_ip.jpg'},
 'mars_weather': 'InSight sol 424 (2020-02-04) low -91.9ºC (-133.4ºF) high -12.7ºC (9.2ºF), winds from the SSE at 5.7 m/s (12.7 mph) gusting to 22.9 m/s (51.1 mph), pressure at 6.30',
 'mars_facts': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th>Data</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n     