# Scraping using BeautifulSoup, Pandas, and Requests/Splinter

In [1]:
## Dependencies

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'


import requests
from splinter import Browser
from bs4 import BeautifulSoup as bs
import time

from urls_list import * #where all urls and paths are saved

## NASA Mars News

* **Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. Assign the text to variables that we can reference later.**

In [8]:
def scrape_latest_news():
    #########################################################################################
    #Scrape the latest news
    #Returns news_title, news_p
    #########################################################################################
    
    news_title, news_p = None, None
    #Configure Browser
    browser = Browser(browser_choice, executable_path=executable_path, headless=True)
    try:
        #Visit url
        browser.visit(nasa_mars_news)
        #bs object with lxml parser
        soup = bs(browser.html, 'lxml')
        news = soup.find('li', class_='slide').div.find(class_='list_text').find_all('div')
        news_title, news_p = news[1].text, news[2].text
    
    except Exception as e:
        print(e)
            
    #Close browser to avoid resource issue
    browser.quit()
    
    
    return (news_title, news_p)

In [10]:
news_title, news_p = scrape_latest_news()

In [11]:
news_title
news_p

"NASA's MAVEN Observes Martian Night Sky Pulsing in Ultraviolet Light"

'Vast areas of the Martian night sky pulse in ultraviolet light, according to images from NASA’s MAVEN spacecraft. The results are being used to illuminate complex circulation patterns in the Martian atmosphere.'

## JPL Mars Space Images - Featured Image

* **Find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url**

In [116]:
def scrape_featured_image_url():
    
    #########################################################################################
    #Scrape the featured image url from nasa jpl site
    #Returns featured_image_url
    #########################################################################################
    
    featured_image_url = None
    #Configure Browser
    browser = Browser(browser_choice, executable_path=executable_path, headless=True)

    try:
        #Visit url
        browser.visit(nasa_jpl)
        #bs object with lxml parser
        soup = bs(browser.html, 'lxml')

        #Click a button "FULL IMAGE"
        browser.find_by_id('full_image', wait_time=1).click()

        #Click more info button
        browser.find_by_css('[id="fancybox-lock"]')[0].find_by_css('div[class="buttons"] a:nth-child(2)')[0].click()
        time.sleep(1)

        #Take the image link (largesize)
        featured_image_url = browser.find_by_css('figure[class="lede"] a')['href']
        
    except Exception as e:
        print(e)
            
    #Close browser to avoid resource issue
    browser.quit()

    return featured_image_url

In [117]:

featured_image_url = scrape_featured_image_url()