In [2]:
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd
from selenium import webdriver

### Scrape Mars News

In [2]:
# Mars news URL
news_url = "https://mars.nasa.gov/news/"

# Retrieve page
news_html = requests.get(news_url)

# Create & parse BeautifulSoup object
soup = BeautifulSoup(news_html.text, 'html.parser')

In [3]:
# Headline of first news article
news_title = soup.find('div', class_ = "content_title").text.strip("\n")
news_title

'Tiny Crystal Shapes Get Close Look From Mars Rover'

In [4]:
# Paragraph text of first news article
news_p = soup.find('div', class_ = "rollover_description_inner").text.strip("\n")
news_p

"Star-shaped, tiny, dark bumps in the fine-layered bright bedrock of a Martian ridge are drawing close inspection by NASA's Curiosity Mars rover."

### Scrape JPL Featured Mars Image

In [5]:
# JPL URL
pic_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

# Navigate to site
browser = Browser('chrome', headless=False)
browser.visit(pic_url)

# Click to see full-size image
browser.click_link_by_partial_text('FULL IMAGE')

In [6]:
# Retrieve page
pic_html = browser.html

# Create & parse BeautifulSoup object
soup = BeautifulSoup(pic_html, 'html.parser')

In [7]:
# Image URL
img = soup.find('a', class_ = 'button fancybox')['data-fancybox-href']
img_url = "https://www.jpl.nasa.gov" + img
img_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA20057_ip.jpg'

### Mars Weather

In [2]:
# Mars news URL
weath_url = "https://twitter.com/marswxreport?lang=en"

# Retrieve page
weath_html = requests.get(weath_url)

# Create & parse BeautifulSoup object
soup = BeautifulSoup(weath_html.text, 'html.parser')

In [9]:
# Grab recent tweets
tweets = soup.find_all('p')[0:10]

In [10]:
# Grab only weather tweets
weather_tweets = []

for tweet in tweets:
    split_tweet = tweet.text.split()
    if split_tweet[0] == 'Sol':
        weather_tweet = ' '.join(split_tweet)
        weather_tweets.append(weather_tweet)
        
weather_tweets

['Sol 1955 (Feb 04, 2018), Sunny, high -21C/-5F, low -77C/-106F, pressure at 7.45 hPa, daylight 05:41-17:27',
 'Sol 1954 (Feb 03, 2018), Sunny, high -17C/1F, low -77C/-106F, pressure at 7.45 hPa, daylight 05:42-17:27',
 'Sol 1951 (Jan 31, 2018), Sunny, high -23C/-9F, low -77C/-106F, pressure at 7.49 hPa, daylight 05:42-17:28',
 'Sol 1949 (Jan 29, 2018), Sunny, high -20C/-4F, low -78C/-108F, pressure at 7.51 hPa, daylight 05:42-17:28',
 'Sol 1946 (Jan 26, 2018), Sunny, high -20C/-4F, low -79C/-110F, pressure at 7.53 hPa, daylight 05:43-17:28']

In [11]:
# Most recent weather tweet
mars_weather = weather_tweets[0]
mars_weather

'Sol 1955 (Feb 04, 2018), Sunny, high -21C/-5F, low -77C/-106F, pressure at 7.45 hPa, daylight 05:41-17:27'

### Mars Facts

In [23]:
# URL
fact_url = "https://space-facts.com/mars/"

# Get table
table = pd.read_html(fact_url)
table = pd.DataFrame(table[0]).set_index(0)
table

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [24]:
# Convert table to HTML string
table_html = table.to_html(header = False) \
.replace('\n  ','').replace('<table border="1" class="dataframe">','') \
.replace('</table','').replace('\n>','')
table_html

'<tbody>  <tr>    <th>Equatorial Diameter:</th>    <td>6,792 km</td>  </tr>  <tr>    <th>Polar Diameter:</th>    <td>6,752 km</td>  </tr>  <tr>    <th>Mass:</th>    <td>6.42 x 10^23 kg (10.7% Earth)</td>  </tr>  <tr>    <th>Moons:</th>    <td>2 (Phobos &amp; Deimos)</td>  </tr>  <tr>    <th>Orbit Distance:</th>    <td>227,943,824 km (1.52 AU)</td>  </tr>  <tr>    <th>Orbit Period:</th>    <td>687 days (1.9 years)</td>  </tr>  <tr>    <th>Surface Temperature:</th>    <td>-153 to 20 °C</td>  </tr>  <tr>    <th>First Record:</th>    <td>2nd millennium BC</td>  </tr>  <tr>    <th>Recorded By:</th>    <td>Egyptian astronomers</td>  </tr></tbody>'

### Mars Hemispheres

In [14]:
# USGS Astrogeology
hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Navigate to site
browser = Browser('chrome', headless=False)
browser.visit(hemi_url)

In [15]:
# Empty list of hemisphere dictionaries
hemisphere_image_urls = []

# Go through all 4 images
hemi = 1
while hemi < 5:
    # Empty dictionary
    hemi_dict = {}
    
    # Click to enhanced image
    browser.click_link_by_partial_text('Hemisphere Enhanced')
    
    # Link of enhanced image page -> soup
    soup = BeautifulSoup(browser.html, 'html.parser')
    
    # Title
    title = soup.find('div', class_ = 'content')
    hemi_dict['title'] = title.find('h2', class_ = 'title').text
    
    # Image
    download_box = soup.find('div', class_ = 'downloads')
    hemi_dict['img_url'] = download_box.find('a')['href']
    
    # Append title and image to dictionary
    hemisphere_image_urls.append(hemi_dict)
    
    browser.back
    
    # Increment hemisphere count
    hemi += 1

print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
