# the red planet
### setting up

In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo
import time

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.mars_db
collection = db.articles

### Scrape NASA Mars News Site

In [4]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [5]:
# Retrieve Title and Article
results = soup.find_all('div', class_='slide')

for result in results:
    news_title = result.find('div', class_='content_title').text
    news_p = result.a.div.text

# Save to a Dictionary and post to DB
    post = {
    'news_title': news_title, 
    'news_detail': news_p
    }

    collection.insert_one(post)

### Scrape JPL Featured Space Image

In [6]:
from splinter import Browser

In [7]:
# Set up Path & Browser
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [8]:
# URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [9]:
# Design an XPATH selector to direct Splinter to click the FULL IMAGE button
xpath = '/html/body/div[1]/div/div[3]/section[1]/div/div/article/div[1]/footer/a'

In [10]:
# Click FULL IMAGE button to get full-size image
results = browser.find_by_xpath(xpath)
img = results[0]
img.click()

In [11]:
# Grab the image link
time.sleep(3)

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

time.sleep(1)

img_url = soup.find('img', class_='fancybox-image')['src']
print(img_url)

/spaceimages/images/mediumsize/PIA13911_ip.jpg


In [12]:
base_url = "https://www.jpl.nasa.gov"
featured_image_url = base_url + img_url
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA13911_ip.jpg'

### Scrape Mars Weather Twitter

In [13]:
# URL of page to be scraped
url = 'https://twitter.com/marswxreport?lang=en'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [14]:
# Retrieve Title and Article
mars_weather = soup.find('div', class_='js-tweet-text-container').text.strip()
mars_weather


'InSight sol 193 (2019-06-12) low -103.7ºC (-154.7ºF) high -23.0ºC (-9.4ºF)\nwinds from the SSE at 4.4 m/s (9.8 mph) gusting to 15.7 m/s (35.1 mph)\npressure at 7.60 hPapic.twitter.com/EhemsIhPLv'

### Scrape Mars Facts

In [15]:
import pandas as pd

In [16]:
url = 'https://space-facts.com/mars/'

In [17]:
tables = pd.read_html(url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [18]:
facts_df = tables[0]
facts_df.columns = ['Description', 'Value']
facts_df.head()

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"


In [19]:
facts_df.set_index('Description', inplace=True)
facts_df.head()

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"


In [20]:
facts_df.to_html('Resources/table.html')

### Scrape USGS Astrogeology

In [21]:
# URL
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [22]:
# Design an XPATH selector to direct Splinter to click the Hemisphere Link
xpath1 = '/html/body/div/div[1]/div/section/div/div[2]/div['
xpath2 = ']/div/a'

In [23]:
hemisphere_image_urls = []

for x in range(1,5):
    # Clicking the Hemisphere Links to get to next page
    xpath = xpath1 + str(x) + xpath2
    results = browser.find_by_xpath(xpath)
    img = results[0]
    img.click()
    
    # Soup
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    # Grab the link
    iresults = soup.find_all('div', class_='downloads')
    for result in iresults:
        img_url = result.find('a', href=True)['href']
        #print(img_url)
    
    # Soup
    #html = browser.html
    #soup = BeautifulSoup(html, 'html.parser')
    
    # Grab the link
    tresults = soup.find_all('div', class_='content')
    for result in tresults:
        title = result.find('h2', class_='title').text
        title = title.replace(' Enhanced', '')
        #print(title)
    
    # Save to a Dictionary and post to DB
    hemisphere_dict = {
    'title': title, 
    'img_url': img_url
    }
    hemisphere_image_urls.append(hemisphere_dict)   
    
    # Go back to Home Page in browser
    browser.back()

In [24]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]