# Initial steps

## Import dependencies

In [10]:
from bs4 import BeautifulSoup as soup
from html5print import HTMLBeautifier
import pandas as pd
from pprint import pprint
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

## Initialize Splinter

In [11]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=True)



Current google-chrome version is 98.0.4758
Get LATEST chromedriver version for 98.0.4758 google-chrome
Driver [/Users/colinbrineman/.wdm/drivers/chromedriver/mac64/98.0.4758.80/chromedriver] found in cache


# Web scraping

## News article

In [12]:
url = 'https://redplanetscience.com'
browser.visit(url)
browser.is_element_present_by_css('div.list_text', wait_time=1)
html = browser.html
news_soup = soup(html, 'html.parser')
slide_elem = news_soup.select_one('div.list_text')

### Title

In [13]:
news_title = slide_elem.find('div', class_='content_title').get_text()
print(news_title)

MAVEN Maps Electric Currents around Mars that are Fundamental to Atmospheric Loss


### Paragraph

In [14]:
news_p = slide_elem.find('div', class_='article_teaser_body').get_text()
print(news_p)

Five years after NASA’s MAVEN spacecraft entered into orbit around Mars, data from the mission has led to the creation of a map of electric current systems in the Martian atmosphere.


## Surface image

In [15]:
url = 'https://spaceimages-mars.com/'
browser.visit(url)
full_image_elem = browser.find_by_tag('button')[1]
full_image_elem.click()
html = browser.html
img_soup = soup(html, 'html.parser')
img_url_rel = img_soup.find('img', class_='fancybox-image').get('src')
img_url = f'{url}{img_url_rel}'
print(img_url)

https://spaceimages-mars.com/image/featured/mars3.jpg


## Facts table

In [16]:
df = pd.read_html('https://galaxyfacts-mars.com')[0]
df.columns=['description', 'Mars', 'Earth']
df.set_index('description', inplace=True)
print(HTMLBeautifier.beautify(df.to_html()))

<html>
  <head>
  </head>
  <body>
    <table border="1" class="dataframe">
      <thead>
        <tr style="text-align: right;">
          <th>
          </th>
          <th>
            Mars
          </th>
          <th>
            Earth
          </th>
        </tr>
        <tr>
          <th>
            description
          </th>
          <th>
          </th>
          <th>
          </th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <th>
            Mars - Earth Comparison
          </th>
          <td>
            Mars
          </td>
          <td>
            Earth
          </td>
        </tr>
        <tr>
          <th>
            Diameter:
          </th>
          <td>
            6,779 km
          </td>
          <td>
            12,742 km
          </td>
        </tr>
        <tr>
          <th>
            Mass:
          </th>
          <td>
            6.39 &times; 10^23 kg
          </td>
          <td>
            5.97 &times; 10^24 kg
   

## Hemisphere images

In [17]:
url = 'https://marshemispheres.com/'
browser.visit(url)
html = browser.html
hemisphere_soup = soup(html, 'html.parser')
items = hemisphere_soup.find_all('div', class_="item")
hemisphere_urls = []
for item in items:
    hemisphere_rel = item.a['href']
    hemisphere_url = f'https://marshemispheres.com/{hemisphere_rel}'
    hemisphere_urls.append(hemisphere_url)
img_urls = []
titles = []
for hemisphere_url in hemisphere_urls:
    browser.visit(hemisphere_url)
    html = browser.html
    imgs_soup = soup(html, 'html.parser')
    img_rel = imgs_soup.find('img', class_='wide-image').get('src')
    img_url = f'https://marshemispheres.com/{img_rel}'
    img_urls.append(img_url)
    title = imgs_soup.find('h2', class_='title').get_text()
    titles.append(title)
hemispheres = [{'image': i, 'caption': t} for i, t in zip(img_urls, titles)]
pprint(hemispheres)

[{'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]


# Quit browser

In [18]:
browser.quit()