## Step 1 - Scraping

Complete your initial scraping using Jupyter Notebook, BeautifulSoup, Pandas, and Requests/Splinter.

* Create a Jupyter Notebook file called `mission_to_mars.ipynb` and use this to complete all of your scraping and analysis tasks. The following outlines what you need to scrape.

In [1]:
#Import Dependencies needed to scrape HTML
#BeautifulSoup
#splinter

In [2]:
import time
from splinter import Browser
from bs4 import BeautifulSoup as bs

In [3]:
#Function to init browser to Chrome

def init_browser():
    executable_path = {"executable_path": "/Users/rck/chrome_driver/chromedriver"}
    return Browser("chrome", **executable_path, headless=False)

In [None]:
#Function to scrape the Latest News Article from NASA
#get only the first(latest) article

def get_news():
    url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
    
    #try catch block to catch if error is encountered, pass will return the title and paragraph element of the article
    try:
        browser.visit(url)
        html_string = browser.html
        soup = bs(html_string, 'html.parser')

        div = soup.find('div', attrs={'class': 'list_text'})
        title=div.findNext('div', {'class': 'content_title'}).text            
        description=div.findNext('div', {'class': 'article_teaser_body'}).text
    except:
        pass
    return {"news_title":title,"news_p":description}

In [None]:
def get_featured_image():
    url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    try:
        browser.visit(url)
        button = browser.find_by_id("full_image")
        button.click()
        time.sleep(2)

        html_string = browser.html
        soup = bs(html_string, 'html.parser')
        anchor = soup.find('a','ready')
        if anchor.img:
            image_url = anchor.img['src']
        featured_image_url = "https://www.jpl.nasa.gov" + image_url      
    except:
        pass
    return featured_image_url

In [None]:
def get_latest_weather():
    url = 'https://twitter.com/marswxreport?lang=en'
    try:
        browser.visit(url)
        html_string = browser.html
        soup = bs(html_string, 'lxml')
        
        latest_weather = soup.find('div','js-tweet-text-container').text.strip()
    except:
        pass
    return latest_weather 

In [None]:
def get_facts(): 
    url = 'https://space-facts.com/mars/'
    try:
        browser.visit(url)
        html_string = browser.html
        soup = bs(html_string, 'lxml')

        keys =[]
        values=[]
        table = soup.find('table','tablepress tablepress-id-mars')
        for row in table.find_all('tr'):
            columns = row.find_all('td')
            keys.append(columns[0].text)
            values.append(columns[1].text)
        facts = dict(zip(keys, values))
    except:
        pass
    return facts

In [None]:
def get_hemispheres():
    hemisphere_image_urls = []
    url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars' 
    try:
        browser.visit(url)     
        html_string = browser.html
        soup = bs(html_string, 'lxml')

        for header in soup.find_all("h3"):
            title = header.text
            uri = header.find_previous("a")
            image_url = 'https://astrogeology.usgs.gov'+ uri['href'] 
            browser.visit(image_url)

            sub_html_string = browser.html
            sub_soup = bs(sub_html_string, 'lxml')
            image_url='https://astrogeology.usgs.gov' + str(sub_soup.find('img','wide-image')['src'])
            hemisphere_image_urls.append({"title": title, "img_url": image_url})
            browser.back()
    except:
        pass
    return hemisphere_image_urls

In [None]:
def scrape():
    output ={}
    news=get_news()
    featured_image_url= get_featured_image()
    latest_weather=get_latest_weather()
    facts =get_facts()
    hemisphere_image_urls =get_hemispheres()
    output ={ "news":news,"featured_image_url":featured_image_url,"weather":latest_weather,"facts":facts, "hemisphere_image_urls":hemisphere_image_urls
    }
    return output 

In [None]:
browser = init_browser()
output = scrape()

In [None]:
import json
print(json.dumps(output,indent=4))

In [None]:
type(output)

In [None]:
from pymongo import MongoClient

In [None]:
client = MongoClient("mongodb://localhost:27017")
db = client.mission_to_mars

In [None]:
new_data = db.general.find_one()
new_data

In [None]:
type(new_data)

In [None]:
for k, v in new_data.items(): 
    if k == "news":
        news = v
    elif k == "featured_image_url":
        featured_image_url = v
    elif k == "weather":
        weather = v
    elif k == "facts":
        facts = v
    elif k == "hemisphere_image_urls":
        hemisphere_image_urls = v
print(news)
print(featured_image_url)
print(weather)
print(facts)
print(hemisphere_image_urls)