In [2]:
import requests
import pymongo
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as bs

In [3]:
# Initiate
executable_path = {'executable_path': 'C:\\localref\\chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## NASA News Article

In [4]:
# Scrape
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url)
html = browser.html
soup = bs(html, 'html.parser')

In [5]:
# Pull all articles
results = soup.find_all('li', class_='slide')

In [6]:
# Get title of first article and teaser paragraph
title = results[0].find('h3').text
teaser = results[0].find('div', class_='article_teaser_body').text

In [7]:
title

"Mars InSight's Mole Has Partially Backed Out of Its Hole"

In [8]:
teaser

"After making progress over the past several weeks digging into the surface of Mars, InSight's mole has backed about halfway out of its hole this past weekend."

## JPL Image

In [9]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)
html = browser.html
soup = bs(html, 'html.parser')

In [10]:
results = soup.find('div', class_='carousel_items')

In [11]:
# Super ugly method for getting url that's buried in the tag...
image_1 = 'https:/www.jpl.nasa.gov'
image_2 = results.article["style"].split(" ")[1].replace('(', ',').replace(')', ',').replace("'", ",").split(',')[2]

featured_image_url = image_1 + image_2

In [12]:
featured_image_url

'https:/www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18907-1920x1200.jpg'

## Mars Weather

In [13]:
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)
html = browser.html
soup = bs(html, 'html.parser')

In [14]:
results = soup.find_all('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text')

In [15]:
mars_weather = results[0].text.rsplit(' ', 1)[0]+" hpa"

In [16]:
print(mars_weather)

InSight sol 334 (2019-11-04) low -100.0ºC (-148.1ºF) high -23.8ºC (-10.8ºF)
winds from the SSW at 5.1 m/s (11.4 mph) gusting to 19.9 m/s (44.4 mph)
pressure at 7.00 hpa


## Mars Facts

In [17]:
url = 'https://space-facts.com/mars/'

In [18]:
mars_facts_tables = pd.read_html(url)

In [19]:
table_df = mars_facts_tables[1]

In [20]:
table_df

Unnamed: 0,Mars - Earth Comparison,Mars,Earth
0,Diameter:,"6,779 km","12,742 km"
1,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
2,Moons:,2,1
3,Distance from Sun:,"227,943,824 km","149,598,262 km"
4,Length of Year:,687 Earth days,365.24 days
5,Temperature:,-153 to 20 °C,-88 to 58°C


In [21]:
html_table = table_df.to_html(classes="table table-striped")

In [22]:
print(html_table)

<table border="1" class="dataframe table table-striped">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Mars - Earth Comparison</th>
      <th>Mars</th>
      <th>Earth</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Diameter:</td>
      <td>6,779 km</td>
      <td>12,742 km</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg</td>
      <td>5.97 × 10^24 kg</td>
    </tr>
    <tr>
      <th>2</th>
      <td>Moons:</td>
      <td>2</td>
      <td>1</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Distance from Sun:</td>
      <td>227,943,824 km</td>
      <td>149,598,262 km</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Length of Year:</td>
      <td>687 Earth days</td>
      <td>365.24 days</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Temperature:</td>
      <td>-153 to 20 °C</td>
      <td>-88 to 58°C</td>
    </tr>
  </tbody>
</table>


## Mars Hemispheres

In [23]:
hemispheres = []

urls = ['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced',
        'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced',
        'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced',
        'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']

for url in urls:
    hemi_dict = {}
    
    browser.visit(url)
    html = browser.html
    soup = bs(html, 'html.parser')
    
    # Find image title
    title_results = soup.find('h2', class_='title')
    title = title_results.text
    
    # Find image link
    for url in soup.find_all('img'):
        if 'wide-image' in str(url):
            img_url = 'https://astrogeology.usgs.gov' + url['src']
            print(img_url)
    
    hemi_dict['title'] = title
    hemi_dict['img_url'] = img_url
    hemispheres.append(hemi_dict)

https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg


In [24]:
hemi_dict

{'title': 'Valles Marineris Hemisphere Enhanced',
 'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}