# Mission to Mars

### Import Dependencies

In [1]:
import pandas as pd
import pymongo as pmo
from bs4 import BeautifulSoup as bsp
from splinter import Browser
import time


### Set-up Web Browser Driver for Scraping

In [2]:
# specify the path of browser driver want to use
executable_path = {'executable_path': 'chromedriver.exe'}

# specify the name of browser want to use
browser_name = 'chrome'

# specify parser used
lib_used = 'html.parser'

# start browser
browser = Browser(browser_name, **executable_path, headless=False)

### NASA Mars News Web Scraping

In [3]:
# link to NASA Mars news
article_url = 'https://mars.nasa.gov/news/'

# access & get content 
browser.visit(article_url)
time.sleep(2)   
soup = bsp(browser.html, lib_used)

# return results
results = soup.find_all('div', class_='list_text')
results

[<div class="list_text"><div class="list_date">April 21, 2020</div><div class="content_title"><a href="/news/8654/how-nasas-perseverance-mars-team-adjusted-to-work-in-the-time-of-coronavirus/" target="_self">How NASA's Perseverance Mars Team Adjusted to Work in the Time of Coronavirus </a></div><div class="article_teaser_body">Like much of the rest of the world, the Mars rover team is pushing forward with its mission-critical work while putting the health and safety of their colleagues and community first.</div></div>,
 <div class="list_text"><div class="list_date">April 20, 2020</div><div class="content_title"><a href="/news/8649/nasas-perseverance-mars-rover-gets-balanced/" target="_self">NASA's Perseverance Mars Rover Gets Balanced</a></div><div class="article_teaser_body">The mission team performed a crucial weight-balancing test on the rover in preparation for this summer's history-making launch to the Red Planet.</div></div>,
 <div class="list_text"><div class="list_date">April 1

In [4]:
# pull the lastest news from the list with index = 0 :: indication of the top latest
latest_news = results[0]
latest_news

<div class="list_text"><div class="list_date">April 21, 2020</div><div class="content_title"><a href="/news/8654/how-nasas-perseverance-mars-team-adjusted-to-work-in-the-time-of-coronavirus/" target="_self">How NASA's Perseverance Mars Team Adjusted to Work in the Time of Coronavirus </a></div><div class="article_teaser_body">Like much of the rest of the world, the Mars rover team is pushing forward with its mission-critical work while putting the health and safety of their colleagues and community first.</div></div>

In [5]:
# workflow: 
    # find all the div, then use the unique class of each div  
    # to access the content of a specific div
for tag in latest_news.find_all('div'):
    if "content_title" in tag.attrs["class"]:
        las_news_title = tag.a.text
        las_news_link = f"https://mars.nasa.gov/{tag.a['href']}"
    elif "article_teaser_body" in tag.attrs["class"]:
        las_news_content = tag.text
     
        
# print out what found in the loop
print(f'>> Lastest news of Mars from NASA:\n\
    {las_news_title}\n\n\
>> News Content:\n\
    {las_news_content}\n\n\
>> News Link:\n\
    {las_news_link}')

>> Lastest news of Mars from NASA:
    How NASA's Perseverance Mars Team Adjusted to Work in the Time of Coronavirus 

>> News Content:
    Like much of the rest of the world, the Mars rover team is pushing forward with its mission-critical work while putting the health and safety of their colleagues and community first.

>> News Link:
    https://mars.nasa.gov//news/8654/how-nasas-perseverance-mars-team-adjusted-to-work-in-the-time-of-coronavirus/


### JPL Mars Space Images - Featured Image Web Scraping

In [8]:
# link to Mars Image
ft_img_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

# access website
browser.visit(ft_img_url) 


# click on couple of buttons to gain access to the full size image page
# condition with if to make sure the subsequent codes will still run incase of issue
if browser.links.find_by_partial_text('FULL IMAGE'):
    browser.links.find_by_partial_text('FULL IMAGE').click()
    
else:
    print(f'No "FULL IMAGE" Button found')
  
    
if browser.links.find_by_partial_text('more info'):
    browser.links.find_by_partial_text('more info').click()
    
    # delay time so browser can load before proceeding
    time.sleep(2)
    
else:
     print(f'No "more info" Button found')

In [10]:
# condition with if to make sure the subsequent codes will still run incase of issue
if browser.links.find_by_partial_href('largesize'):
    browser.links.find_by_partial_href('largesize').click()
    
else:
    print(f'No "Full size Image " Button found')
    
feature_image_url = browser.url
feature_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16682_hires.jpg'

### Mars Weather Web Scraping

In [34]:
# link to Mars weather
weather_url = 'https://twitter.com/marswxreport?lang=en'

# access & get content 
browser.visit(weather_url)
time.sleep(3)   
soup = bsp(browser.html, lib_used)


# return results
results = soup.find_all('span', class_='css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0')
results

[<span class="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0">Log in</span>,
 <span class="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0">Sign up</span>,
 <span class="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0">See new Tweets</span>,
 <span class="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0">Follow</span>,
 <span class="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0"><span class="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0">Mars Weather</span></span>,
 <span class="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0">Mars Weather</span>,
 <span class="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0">@MarsWxReport</span>,
 <span class="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0">Updates as avail from the REMS weather instrument aboard </span>,
 <span class="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0">.  Data credit: Centro deAstrobiologia, FMI, 

In [46]:
# loop thru the list and find partial match for the weather content
for ea_tag in results:
    if str(ea_tag.text)[0:7] == "InSight":
        temp_string = ea_tag.text
        break
print(temp_string)

InSight sol 499 (2020-04-22) low -94.4ºC (-137.9ºF) high -3.6ºC (25.5ºF)
winds from the SW at 5.1 m/s (11.3 mph) gusting to 16.2 m/s (36.1 mph)
pressure at 6.70 hPa


### Mars Facts

In [48]:
# link to Mars weather
facts_url = 'https://space-facts.com/mars/'

# access & get content 
browser.visit(facts_url)
time.sleep(3)   
soup = bsp(browser.html, lib_used)


# return results
# results = soup.find_all('span', class_='css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0')
# results

