In [1]:
from bs4 import BeautifulSoup
import pymongo
from splinter import Browser
import requests
import time
import pandas as pd

# Scraping

In [2]:
#set up splinter browser
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless = False)

#visit url
url = "https://mars.nasa.gov/news/"
browser.visit(url)
time.sleep(2)

#pull html text and parse
html_code = browser.html
soup = BeautifulSoup(html_code, "html.parser")
#soup

# NASA Mars News

In [3]:
news_title = soup.find('div', class_="content_title").text
news_title

"NASA's Next Mars Lander Spreads its Solar Wings"

In [4]:
news_para = soup.find('div', class_="article_teaser_body").text
news_para

"NASA's next mission to Mars passed a key test Tuesday, extending the solar arrays that will power the InSight spacecraft once it lands on the Red Planet this November."

# JPL Mars Space Images

In [5]:
# Featured Image URL & visit
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpl_url)

#navigate to link
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(5)


In [6]:
browser.click_link_by_partial_text('more info')

#get html code once at page
image_html = browser.html

#parse
soup = BeautifulSoup(image_html, "html.parser")

#find path and make full path
image_path = soup.find('figure', class_='lede').a['href']
featured_image_url = "https://www.jpl.nasa.gov/" + image_path

# Mars Weather

In [7]:
marsweather_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(marsweather_url)

weather_html = browser.html

soup = BeautifulSoup(weather_html, 'html.parser')

mars_weather = soup.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
mars_weather

'Sol 1946 (Jan 26, 2018), Sunny, high -20C/-4F, low -79C/-110F, pressure at 7.53 hPa, daylight 05:43-17:28'

# Mars Facts

In [8]:
#mars facts url and splinter visit
facts_url = "https://space-facts.com/mars/"
browser.visit(facts_url)

#get html
facts_html = browser.html

soup = BeautifulSoup(facts_html, 'html.parser')
#soup

In [9]:
#get the entire table
table_data = soup.find('table', class_="tablepress tablepress-id-mars")

In [10]:
#find all instances of table row
table_all = table_data.find_all('tr')

#set up lists to hold td elements which alternate between label and value
labels = []
values = []

#for each tr element append the first td element to labels and the second to values
for tr in table_all:
    td_elements = tr.find_all('td')
    labels.append(td_elements[0].text)
    values.append(td_elements[1].text)
        

In [11]:
#make a data frame and view
mars_facts_df = pd.DataFrame({
    "Label": labels,
    "Values": values
})

In [12]:
mars_facts_df

Unnamed: 0,Label,Values
0,Equatorial Diameter:,"6,792 km\n"
1,Polar Diameter:,"6,752 km\n"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)\n
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [13]:
# get html code for DataFrame
fact_table = mars_facts_df.to_html(header = False, index = False)
fact_table

'<table border="1" class="dataframe">\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km\\n</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km\\n</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)\\n</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

In [14]:
# new url
usgs_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

browser.visit(usgs_url)

usgs_html = browser.html

soup = BeautifulSoup(usgs_html, "html.parser")
#soup

In [15]:
#class holding hemisphere picture
returns = soup.find('div', class_="collapsible results")
hemispheres = returns.find_all('a')

#setup list to hold dictionaries
hemisphere_image_urls =[]

for a in hemispheres:
    #get title and link from main page
    title = soup.find('h3').text
    link = "https://astrogeology.usgs.gov" + a['href']
    
    #follow link from each page
    browser.visit(link)
    time.sleep(5)
    
  

In [16]:
 #get image links
image_page = browser.html
results = BeautifulSoup(image_page, 'html.parser')
img_link = results.find('div', class_='downloads').find('li').a['href']
    
# create image dictionary for each image and title
image_dict = {}
image_dict['title'] = title
image_dict['img_url'] = img_link
    
hemisphere_image_urls.append(image_dict)
    
print(hemisphere_image_urls)
    


[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
