In [1]:
from splinter import Browser
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
# Print the path to chromedriver
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
# Execute path to chromedriver
executable_path = {"executable_path": "/usr/local/bin/chromedriver"}

## NASA Mars News

In [None]:
# Open a Chrome browser
browser_news = Browser("chrome", **executable_path, headless=False)

In [None]:
# URL of page to be scraped
url_news = "https://mars.nasa.gov/news/"

# Visit URL in browser
browser_news.visit(url_news)

In [None]:
# Create BeautifulSoup object; parse with 'lxml'
html_news = browser_news.html
soup_news = BeautifulSoup(html_news, "lxml")

In [None]:
# Save the latest article's title
news_title = soup_news.find("div", class_ = "content_title").text
print(news_title)

# Save the latest article's description
news_body = soup_news.find("div", class_ = "article_teaser_body").text
print(news_body)

## JPL Mars Space Images - Featured Image

In [57]:
# Open a Chrome browser
browser_image = Browser("chrome", **executable_path, headless=False)

In [58]:
# URL of page to be scraped
url_image = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

# Visit URL in browser
browser_image.visit(url_image)

In [59]:
# Create BeautifulSoup object; parse with 'lxml'
html_image = browser_image.html
soup_image = BeautifulSoup(html_image, "lxml")

In [60]:
# Get the image name to include in full version of featured image
name_image = soup_image.find("article", class_ = "carousel_item")["alt"] 

In [61]:
# Name the base url
url_base = "https://www.jpl.nasa.gov"

# Add the specifics for the featured image
url_full_image = soup_image.find(attrs={"data-title":name_image})["data-fancybox-href"] 

# Combine the two for the final url
featured_image_url = url_base + url_full_image

# Print complete url
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17652_ip.jpg


## Mars Weather

In [None]:
# Open a Chrome browser
browser_weather = Browser("chrome", **executable_path, headless=False)

In [None]:
# URL of page to be scraped
url_weather = "https://twitter.com/marswxreport?lang=en"

# Visit URL in browser
browser_weather.visit(url_weather)

In [None]:
# Create BeautifulSoup object; parse with 'lxml'
html_weather = browser_weather.html
soup_weather = BeautifulSoup(html_weather, "lxml")

In [None]:
# Save the latest tweet with weather readings from Mars
mars_weather = soup_weather.find("p", class_ = "TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
print(mars_weather)

## Mars Facts

In [None]:
# URL for the table containing facts about Mars
url_facts = "https://space-facts.com/mars/"

In [None]:
# Scrape the website for tables
tables_facts = pd.read_html(url_facts)
tables_facts

In [None]:
# Create a dataframe for the table of info about Mars
mars_facts = tables_facts[0]
mars_facts.columns = ["Category", "Fact"]

mars_facts

In [None]:
html_facts = mars_facts.to_html(header=None,index=False)
html_facts = html_facts.replace('\n', '')
html_facts

## Mars Hemispheres

In [26]:
# Open a Chrome browser
browser_hemisphere = Browser("chrome", **executable_path, headless=False)

In [27]:
# URL of page to be scraped
url_hemisphere = "https://web.archive.org/web/20181114171728/https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

# Visit URL in browser
browser_hemisphere.visit(url_hemisphere)

In [29]:
# Create BeautifulSoup object; parse with 'lxml'
html_hemisphere = browser_hemisphere.html
soup_hemisphere = BeautifulSoup(html_hemisphere, "lxml")

In [30]:
#container = soup.find("div", id = "product-section")
location_hemisphere = soup_hemisphere.find("div", id = "product-section")
items_hemisphere = location_hemisphere.find_all("div", class_ = "item")

In [49]:
# I couldn't figure out how to get the links for the images because it was accessing an archive.
# Create a dataframe with the urls for the Mars hemispheres
links_hemisphere = {"img_url":["https://web.archive.org/web/20181114182327/https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg",
                            "https://web.archive.org/web/20181114182347/https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg",
                            "https://web.archive.org/web/20181114182245/https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced",
                            "https://web.archive.org/web/20181114182431/https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg"]}
url_df = pd.DataFrame(links_hemisphere)

Unnamed: 0,img_url
0,https://web.archive.org/web/20181114182327/htt...
1,https://web.archive.org/web/20181114182347/htt...
2,https://web.archive.org/web/20181114182245/htt...
3,https://web.archive.org/web/20181114182431/htt...


In [41]:
# Create a dictionary with the titles of the hemisphere images
image_titles_hemisphere = []

for item in items_hemisphere:
    title = item.find("h3").text
    image_titles_hemisphere.append({"title": title})
    
image_titles_hemisphere

[{'title': 'Cerberus Hemisphere Enhanced'},
 {'title': 'Schiaparelli Hemisphere Enhanced'},
 {'title': 'Syrtis Major Hemisphere Enhanced'},
 {'title': 'Valles Marineris Hemisphere Enhanced'}]

In [34]:
# Change the dictionary to a dataframe
title_df = pd.DataFrame.from_dict(image_titles_hemisphere)

# Concatenate the two dataframes (title and url)
hemisphere_df = pd.concat([title_df, url_df], axis=1)

Unnamed: 0,title
0,Cerberus Hemisphere Enhanced
1,Schiaparelli Hemisphere Enhanced
2,Syrtis Major Hemisphere Enhanced
3,Valles Marineris Hemisphere Enhanced


In [56]:
# Create a dictionary from the combined dataframe
hemisphere_dict = hemisphere_df.to_dict(orient = "index")
hemisphere_dict

{0: {'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://web.archive.org/web/20181114182327/https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 1: {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://web.archive.org/web/20181114182347/https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 2: {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://web.archive.org/web/20181114182245/https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'},
 3: {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://web.archive.org/web/20181114182431/https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}}