In [1]:
# MISSION TO MARS SCRAPING PROJECT #

# Project dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import requests
import pymongo

In [2]:
# Point to the chromedriver for splinter
executable_path = {'executable_path':"C:\\Users\\dwint\\web-scraping-challenge\\Mission_to_Mars\\chromedriver"}
browser = Browser("chrome", **executable_path, headless = False)

In [12]:
# SCRAPE MARS NEWS ARTICLE #

# Visit NASA Mars News url using splinter
news_url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
browser.visit(news_url)

# Create html object and parse with BeautifulSoup
html_article = browser.html

soup = BeautifulSoup(html_article, 'html.parser')

In [13]:
# Scrape and print the latest Mars news title and paragraph
news_title = soup.find('div', class_='content_title').text
news_p = soup.find('div', class_='article_teaser_body').text

print(news_title)
print(news_p)

NASA's Mars Reconnaissance Orbiter Undergoes Memory Update
Other orbiters will continue relaying data from Mars surface missions for a two-week period.


In [14]:
# SCRAPE MARS IMAGES # 

# Visit NASA JPL space images using splinter
mars_image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(mars_image_url)

# Create html object and parse with BeautifulSoup
html_image = browser.html

soup = BeautifulSoup(html_image, 'html.parser')

In [15]:
# Scrape image url from the style tag 
mars_image_url = soup.find('article')['style'].replace('background-image: url(','').replace(');', '')[1:-1]

# Main website url 
main_url = "https://www.jpl.nasa.gov"

# Combine main website url with scrapped image url
featured_image_url = main_url + mars_image_url

# Print the combined url link to the featured_image
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA17044-1920x1200.jpg


In [3]:
# SCRAPE MARS WEATHER # 

# Visit Mars weather twitter using splinter
weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(weather_url)

# Create html object and parse with BeautifulSoup
html_weather = browser.html

soup = BeautifulSoup(html_weather, 'html.parser')

In [6]:
# Scrape and print recent Mars weather
mars_weather = soup.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text')

print(mars_weather)

None


In [105]:
# SCRAPE MARS FACTS # 

# Visit https://space-facts.com/mars/ using pandas
# Read Mars facts table using pandas
mars_facts_df = pd.read_html('https://space-facts.com/mars/')[2]

# Use Mars facts to create DataFrame
mars_facts_df.columns=["Mars Feature", "Fact"]
#mars_facts_df = mars_facts_df.set_index('Mars Feature')

print(mars_facts_df)

           Mars Feature                           Fact
0  Equatorial Diameter:                       6,792 km
1       Polar Diameter:                       6,752 km
2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
3                Moons:            2 (Phobos & Deimos)
4       Orbit Distance:       227,943,824 km (1.38 AU)
5         Orbit Period:           687 days (1.9 years)
6  Surface Temperature:                   -87 to -5 °C
7         First Record:              2nd millennium BC
8          Recorded By:           Egyptian astronomers


In [70]:
# Convert DataFrame to html
mars_facts_df = mars_facts_df.to_html()

print(mars_facts_df)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Mars Feature</th>
      <th>Value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>2</th>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>6</th>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>7</th>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>8</th>
   

In [None]:
# SCRAPE MARS HEMISPHERE IMAGES #

In [106]:
# Visit the US Astrogeology website using splinter
mars_hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(mars_hemispheres_url)

# Create html object and parse with BeautifulSoup
html_hemispheres = browser.html

soup = BeautifulSoup(html_hemispheres, 'html.parser')

In [107]:
# Create a Hemisphere Image list using the dictionary for each hemisphere title and image url string. 
# Hemisphere Image list code fashioned after coding by vabigdatamover @ https://github.com/vabigdatamover/web-scraping-challenge/blob/master/Missions_to_Mars/mission_to_mars.ipynb
hemisphere_image_list = []

hemisphere_titles = soup.find_all('h3')

for i in range(len(hemisphere_titles)):
    hemisphere_title = hemisphere_titles[i].text
    print(hemisphere_title)
    
    hemisphere_images = browser.find_by_tag('h3')
    hemisphere_images[i].click()
    
    html_hemispheres = browser.html
    soup = BeautifulSoup(html_hemispheres, 'html.parser')
    
    hemi_img_url = soup.find('img', class_='wide-image')['src']
    hemi_img_url = "https://astrogeology.usgs.gov" + hemi_img_url
    print(hemi_img_url)
    
    hemisphere_dict = {"title": hemisphere_title, "img_url":hemi_img_url}
    hemisphere_image_list.append(hemisphere_dict)
    
    browser.back()

Cerberus Hemisphere Enhanced
https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg
Schiaparelli Hemisphere Enhanced
https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg
Syrtis Major Hemisphere Enhanced
https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg
Valles Marineris Hemisphere Enhanced
https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg


In [108]:
# Print Hemisphere Image list
print(hemisphere_image_list)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]
