In [1]:
import pandas as pd
import os
from bs4 import BeautifulSoup as bs
import requests

In [2]:
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist

In [3]:
import json
import tweepy
from config import consumer_key, consumer_secret, access_token, access_token_secret

### NASA Mars News

In [4]:
!which chromedriver

/usr/local/bin/chromedriver


In [5]:
executable_path = {'executable_path':'/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [6]:
news_url = "https://mars.nasa.gov/news/"
browser.visit(news_url)

In [7]:
news_html = browser.html
news_soup = bs(news_html, 'html.parser')
#print(news_soup.prettify())

In [8]:
items = news_soup.find_all('ul',class_='item_list')
for item in items:
    news_title = item.find('div',class_='content_title').text
    news_p = item.find('div', class_='article_teaser_body').text
    print("----------------")
    print("Latest News")
    print(f"Title: "+news_title)
    print(news_p)
    


----------------
Latest News
Title: NASA's InSight Will Study Mars While Standing Still
The lander's unique science can teach us how planets are born.


### JPL Mars Space Images

In [9]:
!which chromedriver

/usr/local/bin/chromedriver


In [10]:
executable_path = {'executable_path':'/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [11]:
img_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(img_url)

In [12]:
img_html = browser.html
img_soup = bs(img_html, 'html.parser')
#print(img_soup.prettify())

In [13]:
#featured = img_soup.find_all(
feat = img_soup.find('article')['style']
img_url = feat.replace("background-image: url('/","")
img = img_url.replace("');","")

In [14]:
img

'spaceimages/images/wallpaper/PIA15253-1920x1200.jpg'

In [15]:
featured_image_url = (f'https://www.jpl.nasa.gov/' + img)

In [16]:
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA15253-1920x1200.jpg'

### Mars Weather

In [17]:
# Setup Tweepy API authentications
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

In [18]:
# target user for Mars Weather
target_user = "MarsWxReport"

In [19]:
# get tweets from target
tweets = api.user_timeline(target_user, page=1, result_type="recent")

In [20]:
mars_weather = tweets[0]['text']
mars_weather

'Wondering where the Mars weather reports have been?  Curiosity is back online after a memory anomaly on Sol 2172. R… https://t.co/XrzPt8AKBa'

### Mars Facts

In [21]:
#!which chromedriver

In [22]:
#executable_path = {'executable_path':'/usr/local/bin/chromedriver'}
#browser = Browser('chrome', **executable_path, headless=False)

In [23]:
facts_url = "https://space-facts.com/mars/"
#browser.visit(facts_url)
#facts_html = browser.html
#facts_soup = bs(facts_html, 'html.parser')
#print(facts_soup.prettify())

In [24]:
table = pd.read_html(facts_url)
columns_renamed = table[0].rename(columns={0:"Description", 1:"Value"})
mars_facts = columns_renamed.set_index('Description')
mars_facts.to_html('mars_facts.html')


### Mars Hemispheres

In [25]:
!which chromedriver

/usr/local/bin/chromedriver


In [26]:
executable_path = {'executable_path':'/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [27]:
hem_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hem_url)

In [28]:
link_list = [ ]
url_list = []

hem_html = browser.html
hem_soup = bs(hem_html, 'html.parser')
results = hem_soup.find_all('div',class_='item')
for result in results:
    link = result.find('a')['href']
    link_list.append(link)
    
url_list = ['https://astrogeology.usgs.gov/'+ link for link in link_list]

In [29]:
img_url_list = []
title_list = []
try:
    for page in url_list:
        executable_path = {'executable_path':'/usr/local/bin/chromedriver'}
        browser = Browser('chrome', **executable_path, headless=False)
        
        page_url = page
        browser.visit(page_url)
        page_html = browser.html
        page_soup = bs(page_html, 'html.parser')
        
        title = page_soup.find('h2',class_='title').text
        title_list.append(title)
        
        image = page_soup.find('img', class_='wide-image')
        img_url = (f"https://astrogeology.usgs.gov" + image["src"])
        img_url_list.append(img_url)     
except ElementDoesNotExist:
print("Scraping complete!")


In [30]:
hem_dict = dict(zip(title_list, img_url_list))
hem_dict

{'Cerberus Hemisphere Enhanced': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
 'Schiaparelli Hemisphere Enhanced': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
 'Syrtis Major Hemisphere Enhanced': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
 'Valles Marineris Hemisphere Enhanced': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}

In [31]:
#hemisphere_image_urls=[
    #{"title": title_list, "img_url": img_url_list},
    #{"title": title_list, "img_url": img_url_list},
    #{"title": title_list, "img_url": img_url_list},
    #{"title": title_list, "img_url": img_url_list}]