# Dependencies and Setup

In [31]:
# Dependencies
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
import pymongo
from splinter import Browser
import time

In [32]:
# Chrome driver setup
executable_path = {'executable_path': 'chromedriver.exe'}

# Dictionary to store the scraped data
mars_dict = {}

# NASA Mars News

URL: https://mars.nasa.gov/news/

In [33]:
# Store the URL for the NASA Mars News website
news_url = "https://mars.nasa.gov/news/"

# Instantiate Browser
browser = Browser('chrome', **executable_path, headless=False)

# Attempt to scrape the NASA Mars News website
try:
    # Visit the url
    browser.visit(news_url)
    # Wait for the website to fully load
    time.sleep(5)
    # Scrape the html from the site
    html = browser.html
    # Close the browser
    browser.quit()

    # Create Beautiful Soup object from scraped html
    soup = bs(html, "html.parser")

    # Extract and store the latest news title and paragraph description
    latest_news = soup.find('div', class_="image_and_description_container")
    news_title = latest_news.find('div', class_='content_title').text
    news_p = latest_news.find('div', class_='article_teaser_body').text

    # Add the news title and paragraph description to the scraping results dictionary: 'mars_dict'
    mars_dict['latest_news'] = {'title': news_title, 'paragraph': news_p}




# Handle errors
except Exception as e:
    # Print exception
    print(e)
    mars_dict['latest_news'] = {'title': 'Scraping Failed', 'paragrah': 'Scraping Failed'}
    # Close the browser
    browser.quit()


In [48]:
# Print the title and paragraph
print(f"Title:\n{mars_dict['latest_news']['title']}")
print("-"*100)
print(f"Paragraph:\n{mars_dict['latest_news']['paragraph']}")

Title:
The Extraordinary Sample-Gathering System of NASA's Perseverance Mars Rover
----------------------------------------------------------------------------------------------------
Paragraph:
Two astronauts collected Moon rocks on Apollo 11. It will take three robotic systems working together to gather up the first Mars rock samples for return to Earth.


# Mars Featured Image
URL: https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars

In [34]:
# Base URL
feat_img_base_url = "https://www.jpl.nasa.gov/"
# URL to scrape
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

# Instantiate Browser
browser = Browser('chrome', **executable_path, headless=False)

# Attempt to scrape the JPL Mars website's featured image
try: 
    
    # Visit the url
    browser.visit(jpl_url)
    # Scrape the html from the site
    html = browser.html
    
    # Create Beautiful Soup object from scraped html
    soup = bs(html, "html.parser")
    
    # Extract the URL for the featured image page
    feat_image = soup.find('div', class_="carousel_items")
    feat_img_url = feat_image.a['data-link']

    # Navigate to the article page for the featured image
    browser.visit(f"{feat_img_base_url}{feat_img_url}")
    
    # Scrape the html from the featured image page
    feat_img_html = browser.html
    # Create Beautiful Soup object from scraped html
    feat_soup = bs(feat_img_html, "html.parser")

    # Extract the URL for the full size featured image
    img_fig = feat_soup.find('figure', class_='lede')
    img_fig_url = img_fig.a['href']
    
    # Add the full size featured image url to the scraping results dictionary: 'mars_dict'
    mars_dict['feat_image_url'] = f"{feat_img_base_url}{img_fig_url}"
    
    # Close the browser
    browser.quit()
# Handle errors
except Exception as e:
    # Print exception
    print(e)
    # Close the browser
    browser.quit()

In [49]:
# Print the feature image url
print(f"Featured Image URL:\n{mars_dict['feat_image_url']}")

Featured Image URL:
https://www.jpl.nasa.gov//spaceimages/images/largesize/PIA17767_hires.jpg


# Mars Weather
URL: https://twitter.com/marswxreport?lang=en

In [35]:
# URL to scrape
url = 'https://twitter.com/marswxreport?lang=en'
# Instantiate Browser
browser = Browser('chrome', **executable_path, headless=False)

# Attempt to scrape the Mars weather twitter account
try: 

    browser.visit(url)
    # Wait for the website to load
    time.sleep(5)
    # Create Beautiful Soup object from scraped html
    html = browser.html
    soup = bs(html, "html.parser")
    
    weather_data = soup.find("div", class_="css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0")
    # Replace line breaks with spaces
    replaced_weather_data = weather_data.text.replace("\n", " ")
    mars_dict['weather'] = replaced_weather_data
    browser.quit()
# Handle errors
except Exception as e:
    # Print exception
    print(e)
    browser.quit()


In [50]:
# Print out the latest weather on Mars
print(f"Current Weather on Mars:\n{mars_dict['weather']}")

Current Weather on Mars:
InSight sol 543 (2020-06-06) low -92.9ºC (-135.3ºF) high -6.6ºC (20.2ºF) winds from the WNW at 7.2 m/s (16.0 mph) gusting to 21.0 m/s (47.0 mph) pressure at 7.40 hPa


# Mars Facts
URL: https://space-facts.com/mars/

In [37]:
# Store the URL to scrape
url = 'https://space-facts.com/mars'

# Attempt to scrape the Mars facts website
try: 
    # Scrape tabular data from the website using pandas
    tables = pd.read_html(url)

    # Store the Mars fact table
    fact_table = tables[0]

    # Clean up table dataframe
    # Rename columns
    fact_table.columns = ['Attribute', 'Value']
    # Set 'Attribute' column as the index
    fact_table.set_index('Attribute', inplace=True)

    # Convert the fact table to an html string
    html_table = fact_table.to_html()
    
    # Add the Mars fact table html string to the scraping results dictionary: 'mars_dict'
    mars_dict['fact_table'] = html_table
    
# Handle errors
except Exception as e:
    # Print exception
    print(e)

In [57]:
# Print out the Mars fact table html string
print(mars_dict['fact_table'])

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Value</th>
    </tr>
    <tr>
      <th>Attribute</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


# Mars Hemispheres
URL: https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars



In [62]:
# Base URL
hemi_base_url = 'https://astrogeology.usgs.gov'
# URL to scrape
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Instantiate Browser
browser = Browser('chrome', **executable_path, headless=False)

# Go to the mars hemispheres website
browser.visit(url)
# Scrape the html
html = browser.html
# Create Beautiful Soup object from scraped html
soup = bs(html, 'lxml')

# Extract the div containing the link to each hemisphere page
image_links = soup.find_all('div', class_='description')

# List to store full size hemisphere image urls
hemisphere_image_urls = []

# Attempt to scrape each hemisphere page to extract the full size image URL
for link in image_links:
    # Visit hemisphere page
    try:
        print(f'Visiting: {link.h3.text}')
        # Wait for website to load
        time.sleep(1)
        print('Visiting...')
        # Visit hemisphere page
        browser.click_link_by_partial_text(link.h3.text)
        # Wait for website to load
        time.sleep(1)
        
        # Scrape html
        html = browser.html
        # Create Beautiful Soup object from scraped html
        soup = bs(html, 'lxml')

        # Store the image title
        title = soup.find('h2', class_='title')
        # Store the image URL
        image = soup.find('img', class_='wide-image')
        
        # Add the image title and url to the list of hemisphere image urls
        hemisphere_image_urls.append({'title': title.text, 'img_url': f"{hemi_base_url}{image['src']}"})
        print('Data scraped')
        
        # Return to the previous page
        browser.visit(url)
    except Exception as e:
        print(e)
        browser.visit(url)

# Close the browser
browser.quit()
# Add the Mars hemisphere image URLs to the scraping results dictionary: 'mars_dict'
mars_dict['hemisphere_image_urls'] = hemisphere_image_urls

Visiting: Cerberus Hemisphere Enhanced
Visiting...




Data scraped
Visiting: Schiaparelli Hemisphere Enhanced
Visiting...
Data scraped
Visiting: Syrtis Major Hemisphere Enhanced
Visiting...
Data scraped
Visiting: Valles Marineris Hemisphere Enhanced
Visiting...
Data scraped


In [67]:
# Print out the Mars hemisphere image urls
for url in hemisphere_image_urls:
    print(f"{url['title']}:\n{url['img_url']}")

Cerberus Hemisphere Enhanced:
https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg
Schiaparelli Hemisphere Enhanced:
https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg
Syrtis Major Hemisphere Enhanced:
https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg
Valles Marineris Hemisphere Enhanced:
https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg
