In [1]:
# import dependencies
import requests
from bs4 import BeautifulSoup
import pymongo
import pandas as pd

# import splinter
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager 

### NASA Mars News

In [3]:
# go to the mars news site and extract the title and paragraph for the first news article

# setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# mars news site url
url = "https://www.redplanetscience.com"

# use splinter to visit url
browser.visit(url)

# parse the page with beautiful soup
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [4]:
# get the first title
header = soup.find("div", class_="content_title")
news_title = header.text

# get the first title's corresponding paragraph
paragraph = soup.find("div", class_="article_teaser_body")
news_p = paragraph.text

# quit the browser
browser.quit()

### JPL Mars Space Images—Featured Image

In [6]:
# go to the jpl mars space images website and get the featured image url

# setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# jpl mars space images url
url = "https://spaceimages-mars.com/"

# use splinter to visit url
browser.visit(url)

# parse the page with beautiful soup
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [8]:
# find the tag for the featured image
image = soup.find("img", class_="headerimage fade-in")

# get the relative link
relative_link = image['src']

# add the relative url to the main url
featured_image_url = url + relative_link

# quit the browser
browser.quit()

### Mars Facts

In [9]:
# go to the mars facts website and get the table with facts

# mars facts site url
url = "https://galaxyfacts-mars.com/"

# use pandas to scrape the site for tables
table = pd.read_html(url)

# check to see what tables pandas got -- we want the second table
print(table)

# create a dataframe with the table
df = table[1]
print(df)

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [11]:
# convert the dataframe into a html table string
html_table = df.to_html()

# remove the \n
html_table = html_table.replace('\n', '') 

### Mars Hemispheres

In [12]:
# visit the astrogeology website to get images for each mars hemisphere

# setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# astrogeology site url
url = "https://marshemispheres.com/"

# use splinter to visit url
browser.visit(url)

# parse the page with beautiful soup
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [15]:
def extract_img (url):
    
    #browser.quit()
    executable_path = {'executable_path': ChromeDriverManager().install()}
    browser = Browser('chrome', **executable_path, headless=False)
    
    browser.visit(url)
    
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    
    link = soup.find("div", class_="downloads").find("li")
    link = link.a.get("href")
    
    browser.quit()
    
    return link

In [16]:
# find all the tags with the class itemLink product-item
hemi_links = soup.find_all("a", class_="itemLink product-item")

# quit the browser
browser.quit()

# create list for dictionaries with urls to go in
hemisphere_image_urls = []

# for each result
for hemi in hemi_links:

    if (hemi.h3):
        
        if (hemi.h3.text != "Back"):
        
            title = hemi.h3.text
            title = title.replace(' Enhanced', '') 
            print(title)
            #figure out how to get back out of it
                
            if (hemi.get("href")):
                href = hemi.get("href")
                new_url = url + href
        
                new_href = extract_img(new_url)
            
                img_url = url + new_href

        
                image_dict = {"title": title,
                             "img_url": img_url}
                hemisphere_image_urls.append(image_dict)


Cerberus Hemisphere
Schiaparelli Hemisphere
Syrtis Major Hemisphere
Valles Marineris Hemisphere


In [18]:
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere', 'img_url': 'https://marshemispheres.com/images/full.jpg'}, {'title': 'Schiaparelli Hemisphere', 'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg'}, {'title': 'Syrtis Major Hemisphere', 'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg'}, {'title': 'Valles Marineris Hemisphere', 'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg'}]
