In [1]:
# Dependencies
import pymongo
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [2]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome',**executable_path, headless=False)

In [3]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
html = browser.html

# Create BeautifulSoup object; parse with 'html'
soup = BeautifulSoup(html, 'html.parser')
# soup

In [5]:
# Retrieve the parent divs for all articles
results = soup.find_all('li', class_='slide')
# Loop through results to retrieve article title and paragraph
news_title = []
news_p = []
for result in results:
    div = result.find('div',class_='content_title')
    link = div.find('a')
    news_title.append(link.text.strip())
    paragraph = result.find('div',class_='article_teaser_body')
    news_p.append(paragraph.text.strip())
    
print(f'News Title:{news_title}')
print("====================")
print(f'News Article:{news_p}')



News Title:['Rover Team Beaming New Commands to Opportunity on Mars', "NASA's Opportunity Rover Logs 15 Years on Mars", "NASA's InSight Places First Instrument on Mars", 'InSight Engineers Have Made a Martian Rock Garden', 'Mars InSight Lander Seen in First Images from Space', "NASA's InSight Takes Its First Selfie", "NASA InSight Lander 'Hears' Martian Winds", "NASA's Mars InSight Flexes Its Arm", "Mars New Home 'a Large Sandbox'", 'NASA Hears MarCO CubeSats Loud and Clear from Mars', 'InSight Is Catching Rays on Mars', 'NASA InSight Lander Arrives on Martian Surface', 'Landing Day for InSight', 'NASA InSight Landing on Mars: Milestones', 'NASA InSight Team on Course for Mars Touchdown', 'What Two Planetary Siblings Can Teach Us About Life', 'NASA Announces Landing Site for Mars 2020 Rover', 'NASA to Host Media Call on Next Mars Landing Site', 'How NASA Will Know When InSight Touches Down', 'NASA Brings Mars Landing to Viewers Everywhere', 'Curiosity on the Move Again', 'The Mars InSi

In [7]:
# JPL Mars Space Images - Featured Image
image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(image_url)

In [8]:
image_html = browser.html
soup = BeautifulSoup(image_html,'html.parser')
ul = soup.find('div',class_='carousel_container')
images = ul.find_all('div',class_='carousel_items')

In [9]:
image = images[0]
image_a = image.find('article',class_='carousel_item')
image_href = image_a.get('style')
clean_href = image_href.split("'")
# print(clean_href[1])
url_original = 'https://www.jpl.nasa.gov'
featured_image_url = url_original + clean_href[1]
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18284-1920x1200.jpg


In [10]:
#Mars weather scraping
mars_twitter_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(mars_twitter_url)

In [11]:
html = browser.html
soup = BeautifulSoup(html,'html.parser')
parent_ol = soup.find('ol',id='stream-items-id')
weather_li = parent_ol.find_all('li')
latest_weather = weather_li[0]
latest_weather_p = latest_weather.find('p')
mars_weather = latest_weather_p.text.strip()
print(mars_weather)

Sol 2299 (2019-01-24), high -5C/23F, low -74C/-101F, pressure at 8.18 hPa, daylight 06:46-18:55pic.twitter.com/UNiNdu3U8t


In [12]:
#Mars facts scraping
mars_facts_url = 'http://space-facts.com/mars/'
facts_table = pd.read_html(mars_facts_url)
facts_table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [13]:
#convert table to DF, give column header and print
mars_facts_df = facts_table[0]
mars_facts_df.columns = ['Facts', 'Values']
mars_facts_df

Unnamed: 0,Facts,Values
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [14]:
#Mars Hemisphere
hemisphere_url = 'https://www.lpi.usra.edu/education/explore/mars/background/'
base_url = "https://www.lpi.usra.edu/education/explore/mars/background/"
browser.visit(hemisphere_url)

In [15]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [16]:
parent_div = soup.find('div', id = 'main')
contents = parent_div.find_all('p', class_="center")
contents
images = dict.fromkeys(['Description', 'img_url'])
images_desc = []
images_url = []
for content in contents:
    img = content.find('img').get('src')
    desc = content.find('img').get('alt')
    img_url = base_url + img
    images_desc.append(desc)
    images_url.append(img_url)    
#     print(img_url)
for elements in images:
    images['Description'] = images_desc
    for items in images_url:
        images['img_url'] = images_url
            
print(images)


{'Description': ['Image of dust storm on Mars.', 'Image of Earth, Moon and Mars inner and outer cores.', 'Image of Mars Orbiter Laser Altimeter (MOLA) map showing elevations of the Martian Surface.', 'Image of Mars Orbiter Lase Altimeter (MOLA) maps shows a distinction between lowlands and highlands.', 'Image of Mars surface showing the prominent Valles Marineris.', 'Image of Olympus Mons and volcanoson the Tharsis bulge.'], 'img_url': ['https://www.lpi.usra.edu/education/explore/mars/background/../images/dust_storm.jpg', 'https://www.lpi.usra.edu/education/explore/mars/background/../images/planets_core.jpg', 'https://www.lpi.usra.edu/education/explore/mars/background/../images/martina_surface_elevations.jpg', 'https://www.lpi.usra.edu/education/explore/mars/background/../images/views_mars.jpg', 'https://www.lpi.usra.edu/education/explore/mars/background/../images/mars_valles_marineris.jpg', 'https://www.lpi.usra.edu/education/explore/mars/background/../images/olympus_moons_volcanos.jp