In [1]:
# imports for Splinter and Beautiful Soup
from splinter import Browser
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import pandas as pd

In [2]:
#Initialize the Browser
# @NOTE: Replace the path with your actual path to the chromedriver
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
#Get NASA news listings
listings = []

url = "https://mars.nasa.gov/news/"
browser.visit(url)

attempts = 0
haveData = False
while attempts < 5 and not haveData:
    time.sleep(1)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")

    slides = soup.find_all("li", class_="slide")
    if slides:
        haveData = True

if not haveData:
    raise Exception("Failed to scrape nasa news site")

for slide in slides:
    news_title = slide.find("div", class_="content_title").get_text()
    news_p = slide.find("div", class_="article_teaser_body").get_text()

    listings.append({"title": news_title
                , "paragraph": news_p})

In [5]:
#The latest nasa news feature
print(listings[0])

{'title': "The Detective Aboard NASA's Perseverance Rover", 'paragraph': 'An instrument called SHERLOC will, with the help of its partner WATSON, hunt for signs of ancient life by detecting organic molecules and minerals.'}


In [6]:
#get the nasa featured image
base_url = "https://www.jpl.nasa.gov"
url = f"{base_url}/spaceimages?search=&category=Mars"
browser.visit(url)

attempts = 0
haveData = False
while attempts < 5 and not haveData:
    time.sleep(1)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")

    section = soup.find_all("section", class_="main_feature")[0]
    if section:
        article = section.find("article")
    if article: 
        style = article["style"]
        idx = style.find("url('")
        img_text = style[idx+5:]
        image = img_text[0:len(img_text) -3]
    if image:
        haveData = True

featured_image = f"{base_url}{image}"

In [8]:
# Featured NASA image URL
print(featured_image)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA14579-1920x1200.jpg


In [15]:
# Get Mars weather
url = "https://twitter.com/marswxreport?lang=en"
browser.visit(url)

html = browser.html
soup = BeautifulSoup(html, "html.parser")

#Find article with class
attempts = 0
haveData = False
while attempts < 5 and not haveData:
    time.sleep(1)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    matches = soup.find_all('article', attrs={'role': 'article'})
    if matches:
        haveData = True

#print(matches)
# it may not be in the first match
weather = None
for match in matches:
    spans = match.find_all('span')
    for span in spans:
        # should begin with InSight
        text = span.get_text()
        if text[0:7] == "InSight":
            weather = text
            break
    if weather != None:
        break


In [16]:
# Mars weather
print(weather)

InSight sol 532 (2020-05-26) low -93.1ºC (-135.7ºF) high -1.1ºC (30.0ºF)
winds from the SW at 4.9 m/s (10.9 mph) gusting to 17.7 m/s (39.6 mph)
pressure at 7.10 hPa


In [17]:
# Get Mars facts using Panda
url = "https://space-facts.com/mars/"
tables = pd.read_html(url)
facts = tables[0]

facts_df = pd.DataFrame(facts) 
facts_df.columns = ["description", "value"]
facts_df.set_index("description", inplace=True)

table_html = facts_df.to_html(index=True, header=True)

In [18]:
# Mars facts
print(table_html)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>value</th>
    </tr>
    <tr>
      <th>description</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


In [20]:
# define a function to get a hemispher image
def get_hemispher_image(browser, url, link):
    linkurl = f"{url}/{link}"
    browser.visit(linkurl)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")   

    image = soup.find("img", class_="wide-image") 
    return image["src"]

In [21]:
#get the hemisphers
hemispheres = []

base_url = "https://astrogeology.usgs.gov"
url = f"{base_url}/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)

html = browser.html
soup = BeautifulSoup(html, "html.parser")
# Find all items
links = []
items = soup.find_all("div", class_="item")
for item in items: 
    description = item.find("div", class_="description")      
    link = description.find("a", class_="product-item")
    links.append(link)

for link in links:
    title = link.find("h3").get_text()
    image_url = get_hemispher_image(browser, base_url, link["href"])
    image_url = f"{base_url}{image_url}"
    hemispheres.append({"title": title,
                "img_url": image_url})

In [23]:
# Mars hemisphers
print(hemispheres)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]
