In [2]:
# Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd
import time

In [3]:
# This is to initialize Splinter for Mac users
#https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

/usr/local/bin/chromedriver


In [4]:
# This is to initialize Splinter for Windows - comment the Mac lines above and uncomment these lines for Windows

#executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
#browser = Browser("chrome", **executable_path, headless=False)

# NASA Mars News

In [5]:
# Visit the following URL
url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
browser.visit(url)
#url = "https://en.wikipedia.org/wiki/Mars"

# Create BeautifulSoup object; parse with 'html.parser'
time.sleep(1)
nasa_html = browser.html
nasa_soup = BeautifulSoup(nasa_html, 'html.parser')


In [6]:
#Scrape the first article and teaser paragraph from the page
news_list = nasa_soup.find('ul', class_='item_list')
first_item = news_list.find('li', class_='slide')
news_headline = first_item.find('div', class_='content_title').text
news_teaser = first_item.find('div', class_='article_teaser_body').text
print(news_headline)
print(news_teaser)

Mars Scientists Investigate Ancient Life in Australia
Teams with NASA's Mars 2020 and ESA's ExoMars practiced hunting for fossilized microbial life in the Australian Outback in preparation for their Red Planet missions. 


# JPL Mars Space Images - Featured Image

In [7]:
# Visist the url for the page to scrape
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [8]:
# Click the image to display the full picture
time.sleep(1)
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(1)
expand = browser.find_by_css('a.fancybox-expand')
expand.click()
time.sleep(1)

In [9]:
# Scrape the featured image
jpl_html = browser.html
jpl_soup = BeautifulSoup(jpl_html, 'html.parser')

img_relative = jpl_soup.find('img', class_='fancybox-image')['src']
featured_image_url = f'https://www.jpl.nasa.gov{img_relative}'
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA16842_ip.jpg


# Mars Weather

In [10]:
# Url for the twitter page to be scraped
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

# Create BeautifulSoup object; parse with 'html.parser'
time.sleep(1)
html = browser.html
tweet_soup = BeautifulSoup(html, 'html.parser')

In [11]:
# Scrape the latest tweet
mars_weather = tweet_soup.find('p', class_='TweetTextSize').text
mars_weather

'InSight sol 343 (2019-11-13) low -100.8ºC (-149.4ºF) high -23.1ºC (-9.5ºF)\nwinds from the SSE at 5.2 m/s (11.7 mph) gusting to 20.8 m/s (46.5 mph)\npressure at 6.90 hPa'

# Mars Facts

In [12]:
# URL for the mars facts table
url = 'https://space-facts.com/mars/'
# use pandas to read the table
tables = pd.read_html(url)
df = tables[0]
df.columns = ["Description", "Value"]
df.set_index("Description", inplace=True, drop=True)
df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [13]:
# Convert the pandas table to an HTML table string
mars_facts_html = df.to_html()

In [14]:
# Clean up HTML
mars_facts_html = mars_facts_html.replace("\n","")
mars_facts_html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

# Mars Hemispheres

In [15]:
base_hemisphere_url = "https://astrogeology.usgs.gov"
hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemisphere_url)

In [16]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
    
hemisphere_image_urls = []

links = soup.find_all("div", class_="item")

for link in links:
    img_dict = {}
    title = link.find("h3").text
    next_link = link.find("div", class_="description").a["href"]
    full_next_link = base_hemisphere_url + next_link
    
    browser.visit(full_next_link)
    
    pic_html = browser.html
    pic_soup = BeautifulSoup(pic_html, 'html.parser')
    
    url = pic_soup.find("img", class_="wide-image")["src"]

    img_dict["title"] = title
    img_dict["img_url"] = base_hemisphere_url + url
    print(img_dict["img_url"])
    
    hemisphere_image_urls.append(img_dict)

https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg


In [17]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]