In [1]:
# Dependencies

from bs4 import BeautifulSoup
import pandas as pd
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import config

In [2]:
# Set up Splinter
executable_path = {'executable_path' : ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 88.0.4324
[WDM] - Get LATEST driver version for 88.0.4324
[WDM] - Driver [C:\Users\Ben Galde\.wdm\drivers\chromedriver\win32\88.0.4324.96\chromedriver.exe] found in cache




In [3]:
# News website to scrape the article and teaser
browser.visit(config.news_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [4]:
# Get the latest titles of all the news articles.
latest_news = soup.find_all('div', {'class' : 'content_title'})
# Get the second one (first is title)
news_title = latest_news[1].text
print(news_title)

Testing Proves Its Worth With Successful Mars Parachute Deployment


In [5]:
# Get all the articles teasers and grabbing the first one.
latest_teaser = soup.find_all('div', {'class' : 'article_teaser_body'})
news_p = latest_teaser[0].text
print(news_p)

The giant canopy that helped land Perseverance on Mars was tested here on Earth at NASA’s Wallops Flight Facility in Virginia.


In [6]:
# Scrape the featured image.
browser.visit(config.image_url)
html = browser.html
soup=BeautifulSoup(html, 'html.parser')

In [8]:
# Get all images from the website and grabbing the first href to get the image name/path.
image_soup = soup.find_all('a', class_='showimg')
picture = image_soup[0]['href']
featured_image_url = config.image_url.replace('index.html', picture)
print(featured_image_url)

https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/featured/mars1.jpg


In [9]:
# Get the Mars facts to be read in to the data table as a dataframe.
tables = pd.read_html(config.fact_url)
fact_df = tables[0]
fact_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [10]:
# Change the column names to be specific

mars_fact_df = fact_df.rename(columns = {0 : 'Aspect', 1 : 'Measurement'})
mars_fact_df

Unnamed: 0,Aspect,Measurement
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [11]:
# Creating an html string for the table. Set the alignment.
mars_table_str = mars_fact_df.to_html(index=False)
# Align to the left
mars_table_string = mars_table_str.replace('text-align: right;', 'text-align: left;')
mars_table_string

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: left;">\n      <th>Aspect</th>\n      <th>Measurement</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

In [12]:
# Scraping hemisphere data
browser.visit(config.hemi_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [13]:
# Get all descriptions of the hemispheres, looping through results to update format and append to title list.
hemi_soup = soup.find_all('div', {'class' : 'description'})

hemi_titles = []

for item in hemi_soup:
    wrong_name = item.text.split('/')
    title = wrong_name[0].replace(' Enhancedimage', '')
    hemi_titles.append(title)

print(hemi_titles)

['Cerberus Hemisphere', 'Schiaparelli Hemisphere', 'Syrtis Major Hemisphere', 'Valles Marineris Hemisphere']


In [16]:
# Setting base url for scraping hemisphere images and setting an empty list for the resulting image paths.
hemi_base_url  = 'https://astrogeology.usgs.gov'
hemi_images = []

# Looping through the image urls, parse the htmls, grab the image path and adding its src to base url to scrape
# the four images.
for url in config.hemi_url_list:
    browser.visit(url)
    
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    hemi_soup = soup.find('img', {'class' : 'wide-image'})
    
    hemi_images.append(config.hemi_base_url + hemi_soup['src'])
    
print(hemi_images)

['https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg', 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg', 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg', 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg']


In [17]:
# Creating a dictionary list of the hemisphere titles with their associated image url.
hemisphere_image_urls = []

for url in range(0, len(hemi_titles)):
    hemisphere_image_urls.append({'title':hemi_titles[url], 'img_url':hemi_images[url]})

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]