In [119]:
# Dependencies

from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd
from pprint import pprint
import time

### 1. Scraping 

##### 1.1. NASA Mars News

* Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

From URL to beautiful soup object



In [120]:
#URL of page to be scrapped
url = 'https://mars.nasa.gov/news/'
#start hacking
executable_path = {'executable_path':'chromedriver'}
browser = Browser('chrome',**executable_path,headless=False)
browser.visit(url)

In [121]:
#collect latest news and print
html = browser.html
mars_soup = BeautifulSoup(html, 'html.parser')


 # find the first news title
news_title = mars_soup.body.find("div", class_="content_title").text
# find the paragraph associated with the first title
news_paragraph = mars_soup.body.find("div", class_="article_teaser_body").text
 
print(f"The title is: \n{news_title}")
print()
print(f"The descriptive paragraph is:  \n{news_paragraph}")

The title is: 
The MarCO Mission Comes to an End

The descriptive paragraph is:  
The pair of briefcase-sized satellites made history when they sailed past Mars in 2019.


### JPL Mars Space Images - Featured Image

In [122]:
# find the featured image for Mars
url2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url2)

In [123]:
# Scrape the image from website
html = browser.html
soup = BeautifulSoup(html,'html.parser')
images = soup.find('a',class_='fancybox')
images.attrs['data-fancybox-href']
featured_image_url = 'https://www.jpl.nasa.gov'+images.attrs['data-fancybox-href']
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA22911_ip.jpg'

### Mars Weather 



In [124]:
# open url in browser
weather_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(mars_weather_url)

# create a soup item
html_weather = browser.html
soup = BeautifulSoup(html_weather, 'html.parser')

In [125]:
#Scrape the twitter data
mars_weather = soup.find_all('div', class_='js-tweet-text-container')
print(mars_weather)

[]


### Mars Facts

In [126]:
# Visit Mars facts url 
facts_url = 'http://space-facts.com/mars/'

# Use Panda's `read_html` to parse the url
mars_facts = pd.read_html(facts_url)

# Find the mars facts DataFrame in the list of DataFrames as assign it to `mars_df`
mars_df = mars_facts[0]

# Assign the columns `['Description', 'Value']`
mars_df.columns = ['Description','Value']

# Set the index to the `Description` column without row indexing
mars_df.set_index('Description', inplace=True)

# Save html code to folder Assets
mars_df.to_html()

data = mars_df.to_dict(orient='records')  # Here's our added param..

# Display mars_df
mars_df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


### Mars Hemispheres

In [127]:
# open url in browser
hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemispheres_url)

In [128]:
# HTML Object
html_hemispheres = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html_hemispheres, 'html.parser')

# Retreive all items that contain mars hemispheres information
items = soup.find_all('div', class_='item')

# Create empty list for hemisphere urls 
hemisphere_image_urls = []

# Store the main_ul 
hemispheres_main_url = 'https://astrogeology.usgs.gov'

# Loop through the items previously stored
for i in items: 
    # Store title
    title = i.find('h3').text
    
    # Store link that leads to full image website
    partial_img_url = i.find('a', class_='itemLink product-item')['href']
    
    # Visit the link that contains the full image website 
    browser.visit(hemispheres_main_url + partial_img_url)
    
    # HTML Object of individual hemisphere information website 
    partial_img_html = browser.html
    
    # Parse HTML with Beautiful Soup for every individual hemisphere information website 
    soup = BeautifulSoup( partial_img_html, 'html.parser')
    
    # Retrieve full image source 
    img_url = hemispheres_main_url + soup.find('img', class_='wide-image')['src']
    
    # Append the retreived information into a list of dictionaries 
    hemisphere_image_urls.append({"title" : title, "img_url" : img_url})
    

# Display hemisphere_image_urls
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]

In [129]:
browser.quit()

## Step 2 - MongoDB and Flask Application
