# Step 1 - Scraping

In [1]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
import time

### NASA Mars News

In [2]:
# URL of page to be scraped
mars_news_url = 'https://mars.nasa.gov/news/'

In [3]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [4]:
browser.visit(mars_news_url)
if browser.is_element_present_by_css('li.slide', wait_time=10):  
    html = browser.html
    soup = bs(html, 'html.parser')
    slides = soup.find_all('li', class_='slide')
    news_title = slides[0].find('div',class_='content_title').text
    news_p = slides[0].find('div',class_='article_teaser_body').text
    print('News Headline: ', news_title)
    print('-------------')
    print(news_p,'\n')

News Headline:  Perseverance Scientists Train for Mars in Nevada Desert
-------------
Team members searched for signs of ancient microscopic life there, just as NASA's latest rover will on the Red Planet next year. 



### JPL Mars Space Images - Featured Image

In [5]:
mars_jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

In [6]:
browser.visit(mars_jpl_url)

if browser.is_element_present_by_css('section.primary_media_feature', wait_time=10):
    browser.click_link_by_partial_text('FULL IMAGE')
    browser.click_link_by_partial_text('more info')
    html_jpl = browser.html
    soup_jpl = bs(html_jpl, 'html.parser')
    browser.find_by_css('a img.main_image').click()
    time.sleep(1)
    featured_image_url = browser.url # read current page url
    #option 2    
    #featured_image_url = 'https://www.jpl.nasa.gov' + soup_jpl.find('img',class_='main_image').get('src')
    #option 3
    #images = soup_jpl.find_all('div', class_='download_tiff')
    #featured_image_url = images[1].a.get('href')
    print('Featured Image URL: ', featured_image_url)
    print('Page URL: ', mars_jpl_url)



Featured Image URL:  https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19108_hires.jpg
Page URL:  https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars


### Mars Weather

In [7]:
mars_weather_url = 'https://twitter.com/marswxreport?lang=en'

In [9]:
browser.visit(mars_weather_url)
if browser.is_element_present_by_css('section article', wait_time=10):
    html_weather = browser.html
    soup_weather = bs(html_weather, 'html.parser')
    latest_tweet = browser.find_by_css('[data-testid="tweet"]')[0]
    mars_weather = latest_tweet.find_by_css('div.r-jwli3a.r-16dba41.r-bnwqim').text
    print('Latest Tweet')
    print('------------')
    print(mars_weather)

In [10]:
html_weather = requests.get(mars_weather_url)
soup_weather = bs(html_weather.text, 'html.parser')

tweet_timestamp_container = soup_weather.find('div', class_="stream-item-header")
tweet_timestamp = tweet_timestamp_container.find('a', class_="tweet-timestamp")['title']
tweet_container = soup_weather.find('div', class_="js-tweet-text-container")
latest_tweet = tweet_container.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text")
mars_weather = latest_tweet.find_all(text=True)[0]
mars_weather

'InSight sol 512 (2020-05-05) low -92.9ºC (-135.2ºF) high -3.1ºC (26.4ºF)\nwinds from the SW at 4.8 m/s (10.7 mph) gusting to 19.2 m/s (42.9 mph)\npressure at 6.80 hPa'

### Mars Facts

In [11]:
mars_facts_url = 'https://space-facts.com/mars/'

In [14]:
tables = pd.read_html(mars_facts_url)
df = tables[0]
df.columns = ['Fact', 'Value']
df['Fact'] = df['Fact'].str.replace(':', '') # remove ':' from Fact column
df.set_index('Fact', inplace=True)
# df = df.rename_axis(None) # Remove Column Header "Fact"
html_table = df.to_html(classes="table table-bordered table-sm", border=0, index=False, justify='initial')
html_table

'<table border="0" class="dataframe table table-bordered table-sm">\n  <thead>\n    <tr style="text-align: initial;">\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

### Mars Hemispheres

In [15]:
mars_hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.

In [16]:
browser.visit(mars_hemi_url)
html_hemi = browser.html
soup_hemi = bs(html_hemi, 'html.parser')
results = soup_hemi.find_all('div',class_='item')

hemisphere_image_urls = []
for item in results:
    title = item.find('h3').text
    print(f'Title: {title}')
    browser.click_link_by_partial_text(title)
#     time.sleep(1)
    html_subpage = browser.html
    soup_subpage = bs(html_subpage, 'html.parser')
    page_url = browser.url
    img_url = soup_subpage.find('div', 'downloads').ul.li.a['href']
    print(f'Image URL: {img_url}')
    browser.back()
    hemisphere_image_urls.append({"title": title, "img_url": img_url, "page_url": page_url})
    
hemisphere_image_urls

Title: Cerberus Hemisphere Enhanced
Image URL: http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
Title: Schiaparelli Hemisphere Enhanced
Image URL: http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
Title: Syrtis Major Hemisphere Enhanced
Image URL: http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
Title: Valles Marineris Hemisphere Enhanced
Image URL: http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'page_url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'page_url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'page_url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'page_url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enha