# Using Web Scraping to Deliver the News About Mars
#### This notebook will provide the following information:
* NASA Mars News
* JPL Mars Space Images
* Mars Weather
* Mars Facts
* Mars Hemispheres

In [1]:
# Dependencies
import pymongo
import os
import requests
from bs4 import BeautifulSoup as bs
from splinter import Browser 

#### Setting up Mongo Database for Mars News

In [2]:
# Connect to MongoDB default port
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Defining 'MarsDB' database and collection
mars_db = client.marsDB
collection = mars_db.articles

#### URLs of Webpages to be Scraped

In [28]:
# Mars urls
news_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
weather_url = 'https://twitter.com/marswxreport?lang=en'
facts_url = 'https://space-facts.com/mars/'
hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

In [29]:
# Retrieve pages with the requests module
news_response = requests.get(news_url)
jpl_response = requests.get(jpl_url)
weather_response = requests.get(weather_url)
facts_response = requests.get(facts_url)
hemi_response = requests.get(hemi_url)

In [35]:
# Beautiful Soup objects
news_soup = bs(news_response.text, 'html.parser')
jpl_soup = bs(jpl_response.text, 'html.parser')
weather_soup = bs(weather_response.text, 'html.parser')
facts_soup = bs(facts_response.text, 'html.parser')
hemi_soup = bs(hemi_response.text, 'html.parser')

bs4.BeautifulSoup

#### Examining results before determining elements that contain sought info

In [31]:
# Uncomment to examine results

# print(news_soup.prettify())
# print(jpl_soup.prettify())
# print(weather_soup.prettify())
# print(facts_soup.prettify())
# print(hemi_soup.prettify())

## NASA Mars News
#### Latest news and paragraph text from the NASA Mars News Site.

In [32]:
# Determining elements that contain sought info; Retrieving parent divs
news_var_1 = news_soup.find('div', class_='content_title')
news_var_2 = news_var_1.find('div', class_='article_teaser_body')

news_var_1.text

'\n\nNASA Prepares for Moon and Mars With New Addition to Its Deep Space Network\n\n'

In [34]:
# news_1 = news_soup.select_one('#content_title')
title = news_soup.find_all('div', class_='list_date')
# paragraph = news_soup.find('div', class_='article_teaser_body').get_text()
title

[]

In [68]:
# Loop through returned results
for result in news_results:
    
    # Retrieve news title
    news_title = result.find('div', class_='list_date')
    
    # Access thread's text content
#     news_p = news_title.a.text
#     print(news_p)
    
    # Dictionary to be inserted into MongoDB
    post = {
        'Title': news_title,
#         'Text': news_p, 
    }
    
    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

## JPL Mars Space Images
#### Featured image from JPL's Mars programme.

In [72]:
# Using Splinter to navigate site and find image url 
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

browser.visit(jpl_url)

In [76]:
html = browser.html
images = jpl_soup.find_all('article', class_='carousel_item')
    
for image in images:
    print('Featured Image:', image)

Featured Image: <article alt="Ancient Light Deflected" class="carousel_item" style="background-image: url('/spaceimages/images/wallpaper/PIA17448-1920x1200.jpg');">
<div class="default floating_text_area ms-layer">
<h2 class="category_title">
</h2>
<h2 class="brand_title">
				  FEATURED IMAGE
				</h2>
<h1 class="media_feature_title">
				  Ancient Light Deflected				</h1>
<div class="description">
</div>
<footer>
<a class="button fancybox" data-description="This artist's impression shows how photons from the early universe are deflected by the gravitational lensing effect of massive cosmic structures as they travel across the universe." data-fancybox-group="images" data-fancybox-href="/spaceimages/images/mediumsize/PIA17448_ip.jpg" data-link="/spaceimages/details.php?id=PIA17448" data-title="Ancient Light Deflected" id="full_image">
					FULL IMAGE
				  </a>
</footer>
</div>
<div class="gradient_container_top"></div>
<div class="gradient_container_bottom"></div>
</article>


## Mars Weather
#### Mars weather report tweet from Mars Weather twitter account.