In [None]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

# Scraping NASA Mars News

In [None]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [None]:
# URL of page to be scraped
url = 'https://redplanetscience.com/'
browser.visit(url)

In [None]:
# Collect the latest News Title and Paragraph Text
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
news_titles = soup.find_all('div', class_='content_title')
paragraphs = soup.find_all('div', class_='article_teaser_body')

In [None]:
# Close the browser
browser.quit()

In [None]:
# Store the latest content from the scraped data into variables for later
latest_title = news_titles[0].text
print(latest_title)
print('====================================================================')

latest_paragraph = paragraphs[0].text
print(latest_paragraph)

# Scraping JPL Mars Space Images

In [None]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [None]:
# URL of page to be scraped
url = 'https://spaceimages-mars.com/'
browser.visit(url)

In [None]:
# Collect the latest News Title and Paragraph Text
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
image_data = soup.find_all('img')

In [None]:
# Close the browser
browser.quit()

In [None]:
# Store url for latest featured image (index 1, after the NASA logo) into a variable for later
featured_image_url = url + image_data[1].get("src")
print(featured_image_url)

# Scraping Mars Facts

In [None]:
# URL of page to be scraped
url = 'https://galaxyfacts-mars.com/'

In [None]:
# Use pandas to scrape tables
tables = pd.read_html(url)

In [None]:
# Find the first table with desired data
mars_table = tables[0]

# Convert this table into an html string
mars_table.to_html('table.html')

# Scraping images for Mars Hemispheres

In [None]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [None]:
# URL of page to be scraped
url = 'https://marshemispheres.com/'
browser.visit(url)

In [None]:
# Find links on the main page for hemisphere pages and store link urls
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
all_links = soup.find_all('a', class_='itemLink')
hemi_links = [all_links[1].get('href')]
hemi_links.append(all_links[3].get('href'))
hemi_links.append(all_links[5].get('href'))
hemi_links.append(all_links[7].get('href'))

In [None]:
# Create an empty list to store dictionaries
hemisphere_image_urls = []

# Scrape links and visit each to find data

for hemi in hemi_links:
        
    # Go to hemisphere page
    hemi_url = url + hemi
    browser.visit(hemi_url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
        
    # Store data for the title and enhanced image
    title = soup.find_all('h2', class_='title')[0].text
    img_url = url + soup.find('img', class_='wide-image').get('src')
    
    # Create a dictionary of this data and append to list
    hemi_dict = {'title': title, "img_url": img_url}
    hemisphere_image_urls.append(hemi_dict)
        
    # Go back to original page
    browser.visit(url)
    
print(hemisphere_image_urls)

In [None]:
# Close the browser
browser.quit()