# Mission to Mars

### Import Dependencies

In [1]:
import pandas as pd
import pymongo as pmo
from bs4 import BeautifulSoup as bsp
from splinter import Browser
import time


### Set-up Web Browser Driver for Scraping

In [None]:
# specify the path of browser driver want to use
executable_path = {'executable_path': 'chromedriver.exe'}

# specify the name of browser want to use
browser_name = 'chrome'

# specify parser used
lib_used = 'html.parser'

# start browser
browser = Browser(browser_name, **executable_path, headless=False)

### NASA Mars News Web Scraping

In [None]:
# link to NASA Mars news
article_url = 'https://mars.nasa.gov/news/'

# access & get content 
browser.visit(article_url)

t_wait = 0
del_t = 0.25

# condition to make sure the webpage is loaded
if browser.is_element_present_by_tag('/html') == False:
    time.sleep(t_wait)
    t_wait += del_t
else:
    pass

soup = bsp(browser.html, lib_used)

# return results
results = soup.find_all('div', class_='list_text')
results

In [None]:
# pull the lastest news from the list with index = 0 :: indication of the top latest
latest_news = results[0]
latest_news

In [None]:
# workflow: 
    # find all the div, then use the unique class of each div  
    # to access the content of a specific div
for tag in latest_news.find_all('div'):
    if "content_title" in tag.attrs["class"]:
        las_news_title = tag.a.text
        las_news_link = f"https://mars.nasa.gov/{tag.a['href']}"
    elif "article_teaser_body" in tag.attrs["class"]:
        las_news_content = tag.text
     
        
# print out what found in the loop
print(f'>> Lastest news of Mars from NASA:\n\
    {las_news_title}\n\n\
>> News Content:\n\
    {las_news_content}\n\n\
>> News Link:\n\
    {las_news_link}')

### JPL Mars Space Images - Featured Image Web Scraping

In [None]:
# link to Mars Image
ft_img_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

# access website
browser.visit(ft_img_url)

t_wait = 0
del_t = 0.25

# condition to make sure the webpage is loaded
if browser.is_element_present_by_tag('/html') == False:
    time.sleep(t_wait)
    t_wait += del_t
else:
    pass


# click on couple of buttons to gain access to the full size image page
# condition with if to make sure the subsequent codes will still run incase of issue
if browser.links.find_by_partial_text('FULL IMAGE'):
    browser.links.find_by_partial_text('FULL IMAGE').click()
    
else:
    print(f'No "FULL IMAGE" Button found')
  
    
if browser.links.find_by_partial_text('more info'):
    browser.links.find_by_partial_text('more info').click()
    
    # delay time so browser can load before proceeding
    time.sleep(2)
    
else:
     print(f'No "more info" Button found')
        
# condition with if to make sure the subsequent codes will still run incase of issue
if browser.links.find_by_partial_href('largesize'):
    browser.links.find_by_partial_href('largesize').click()
    
else:
    print(f'No "Full size Image " Button found')
    
feature_image_url = browser.url
feature_image_url

### Mars Weather Web Scraping

In [None]:
# link to Mars weather
weather_url = 'https://twitter.com/marswxreport?lang=en'

# access & get content 
browser.visit(weather_url)

t_wait = 0
del_t = 0.25

# condition to make sure the webpage is loaded
if browser.is_element_present_by_tag('/html') == False:
    time.sleep(t_wait)
    t_wait += del_t
else:
    pass

# create soup object
soup = bsp(browser.html, lib_used)
print(soup)

# return results
results = soup.find_all('span', class_='css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0')
results

In [None]:
# loop thru the list and find partial match for the weather content
# as soon as the first string read, stop the loop
for ea_tag in results:
    if ea_tag.text[0:7] == "InSight":
        mars_weather = ea_tag.text
        break
mars_weather

### Mars Facts

In [None]:
# link to Mars weather
facts_url = 'https://space-facts.com/mars/'

# access & get content 
browser.visit(facts_url)
time.sleep(3)   
# soup = bsp(browser.html, lib_used)



# return results
# results = soup.find_all('span', class_='css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0')
# results



In [None]:
tables = pd.read_html(facts_url)
tables[0]
df = tables[0]
df.columns = ["Description", "Value"]
df

In [None]:
mars_info_table = df.to_html(index=False)
mars_info_table.replace("\n", "")

In [None]:
df.to_html('mars_info_table.html')

### Mars Hemispheres

In [None]:
# create function to process webpage and retrieve hemisphere images of Mars

def get_hemi_img(brwr, hemi_url, hemi_name):
    
    # since browser will take sometime to load,
    # create timer to delay the process and wait for chrome to load page
    # set wait time parameters
    t_wait = 0
    t_out = 10
    del_t = 0.25
    
    # access & get content by soup object
    browser.visit(hemi_url)
    soup = bsp(browser.html, lib_used)
    
    print(">> Progress = 10%")
    
    # condition to make sure the webpage is loaded
    if browser.is_element_present_by_tag('/html') == False:
        time.sleep(t_wait)
        t_wait += del_t
    else:
        pass

    print(">> Progress = 30%")
    
    # click on couple of buttons to gain access to the full size image page
    # condition with if to make sure the subsequent codes will still run incase of issue
    if brwr.links.find_by_partial_text(hemi_name):
        brwr.links.find_by_partial_text(hemi_name).click()
        # condition to make sure the webpage is loaded
        print(">> Progress = 50%")
        if brwr.is_element_present_by_tag('/html') == False:
            if t_wait <= t_out:
                time.sleep(t_wait)
                t_wait += del_t
        else:
            print("Page takes too long to load!")
            pass

    else:
        print(f"Unable to find {hemi_name} Hemisphere Image")
        pass
    
    print(">> Progress = 60%")
    # create soup object
    soup = bsp(browser.html, lib_used)
    
    # return results
    results = soup.find_all('ul', class_='')
    print(">> Progress = 70%")
    # loop to find the tag a and href of the full size image
    for rsl in results[0].find_all('a'):
        if rsl.attrs["href"] and (rsl.text).lower() == "sample":
            print('Getting Image Link')
            img_link = rsl['href']
            print('Getting Image Title')
            img_title = hemi_name + ' Hemisphere'
    print(">> Progress = 100%")
    return (img_title, img_link)

In [None]:
# link to Mars hemisphere pics
hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
hemi_name = ['Cerberus', 'Schiaparelli', 'Syrtis Major', 'Valles Marineris']

In [None]:
# get the list of dictionary of all hemisphere image info
hemisphere_image_urls = []
for name in hemi_name:
    print(f'>> Processing data of {name} Hemisphere')
    print(f'>> Please wait ...')
    answer = get_hemi_img(browser, hemi_url, name)
    dict_each_hemi = {"title": answer[0] , "img_url": answer[1]}
    hemisphere_image_urls.append(dict_each_hemi)
    print(f'>> Finish with {name} Hemisphere\n{("-")*25}')
hemisphere_image_urls               

In [None]:
# pd.DataFrame(hemisphere_image_urls)

In [None]:
# # link to Mars hemisphere pics
# hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# # access & get content 
# browser.visit(hemi_url)

# t_wait = 0
# del_t = 0.25

# # condition to make sure the webpage is loaded
# if browser.is_element_present_by_tag('/html') == False:
#     time.sleep(t_wait)
#     t_wait += del_t
# else:
#     pass

# # click on couple of buttons to gain access to the full size image page
# # condition with if to make sure the subsequent codes will still run incase of issue
# if browser.links.find_by_partial_text('Valles Marineris'):
#     browser.links.find_by_partial_text('Valles Marineris').click()
#     # condition to make sure the webpage is loaded
#     if browser.is_element_present_by_tag('/html') == False:
#         time.sleep(t_wait)
#         t_wait += del_t
#     else:
#         print("Page takes too long to load!")
#         pass
# else:
#     print("Unable to find Valles Marineris Hemisphere Image")
#     pass

In [None]:
# create soup object
soup = bsp(browser.html, lib_used)

# return results
results = soup.find_all('ul', class_='')

# function to loop and get hemi image info
def hemi_img_loop(rsl):
# loop to find the tag a and href of the full size image
    for rsl in results[0].find_all('a'):
        if rsl.attrs["href"] and (rsl.text).lower() == "sample":
            img_link = rsl['href']
            img_title = rsl.text
    return (img_title, img_link)

In [None]:
hemi_img_loop(results)

In [None]:
valles_img_link = results.a['href']
valles_img_link

In [None]:

Cerberus Hemisphere Enhanced thumbnail
Cerberus Hemisphere Enhanced
image/tiff 21 MB
Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…

Schiaparelli Hemisphere Enhanced thumbnail
Schiaparelli Hemisphere Enhanced
image/tiff 35 MB
Mosaic of the Schiaparelli hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. The images were acquired in 1980 during early northern…

Syrtis Major Hemisphere Enhanced thumbnail
Syrtis Major Hemisphere Enhanced
image/tiff 25 MB
Mosaic of the Syrtis Major hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of about 100 red and violet…

Valles Marineris Hemisphere Enhanced thumbnail
Valles Marineris Hemisphere Enhanced
image/tiff 27 MB
Mosaic of the Valles Marineris hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. The distance is 2500 kilometers from the surface of…


# # condition to make sure the webpage is loaded
# if browser.is_element_present_by_tag('/html') == False:
#     time.sleep(t_wait)
#     t_wait += del_t
# else:
#     pass


# hemisphere_image_urls = [
#     {"title": "Valles Marineris Hemisphere", "img_url": "..."},
#     {"title": "Cerberus Hemisphere", "img_url": "..."},
#     {"title": "Schiaparelli Hemisphere", "img_url": "..."},
#     {"title": "Syrtis Major Hemisphere", "img_url": "..."},
# ]
