# Step 1: Scraping

In [1]:
# Dependencies
import requests
from bs4 import BeautifulSoup
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
import pandas as pd
from IPython.display import Image
from IPython.core.display import HTML, Image, display
#from IPython.core.display import Image, display

In [2]:
# Create beautiful soup object from html, create a function
def scrape(url):
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path, headless=False)
    browser.visit(url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    return soup

### NASA Mars News

* Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [3]:
# Save URL, use scrape function created and save as variable name
url_news = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
soup = scrape(url_news)

# Collect latest news title & paragraph text
latest_news = soup.find("ul", class_="item_list").find("div",class_ = "content_title").a.text
latest_paragraph = soup.find("ul", class_="item_list").find("div",class_ = "article_teaser_body").text

print(f'''
The most recent article is: "{latest_news}"
The most recent paragraph is: "{latest_paragraph}"
''')


The most recent article is: "Things Are Stacking up for NASA's Mars 2020 Spacecraft"
The most recent paragraph is: "As the July 2020 launch date inches closer, the next spacecraft headed to the Red Planet is assembled for more testing."



### JPL Mars Space Images - Featured Image

* Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars).

* Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called `featured_image_url`.

* Make sure to find the image url to the full size `.jpg` image.

* Make sure to save a complete url string for this image.

In [109]:
# Save urls as variables
url_base = "https://www.jpl.nasa.gov"
url_featured = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

# Use function created to scrape site and find targets of scraped object
image_soup = scrape(url_featured)
image_url = image_soup.find("div", class_="carousel_container").find("article", class_="carousel_item")\
.find('a')['data-fancybox-href']

# Image url output is only the path after "url", so must append to base url
# example: /spaceimages/images/mediumsize/PIA09113_ip.jpg
featured_image_url = f'{url_base}{image_url}'
print(featured_image_url)

# Display image
display(Image(url=featured_image_url))

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17832_ip.jpg


### Mars Weather

* Visit the Mars Weather twitter account [here](https://twitter.com/marswxreport?lang=en) and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called `mars_weather`.

In [35]:
# Save urls as variable
url_twitter = "https://twitter.com/marswxreport?lang=en"

# Use function created to scrape site and find targets of scraped object
twitter_soup = scrape(url_twitter)

# Display to preview soup object
#print(twitter_soup.prettify())

# Use soup object to locate the most recent tweet with the weather
mars_weather = (twitter_soup.find("div", class_="js-tweet-text-container")\
                .find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text)
print(f'''
The current weather on mars is:
{mars_weather}
''')


The current weather on mars is:
InSight sol 140 (2019-04-19) low -98.6ºC (-145.5ºF) high -18.0ºC (-0.4ºF)
winds from the W at 4.0 m/s (8.9 mph) gusting to 14.2 m/s (31.8 mph)
pressure at 7.40 hPapic.twitter.com/4YBCvCijXM



### Mars Facts

* Visit the Mars Facts webpage [here](http://space-facts.com/mars/) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

* Use Pandas to convert the data to a HTML table string.

In [83]:
# Save url as variable
fact_url = "https://space-facts.com/mars/"

# Use function created to scrape site and find targets of scraped object
fact_soup = scrape(fact_url)

# Preview soup object
#print(fact_soup.prettify())

# Find table and save as table to be iterated
table = fact_soup.find("table").find("tbody").find_all("tr")

for t in table:
    cells = t.find_all("td")
    name = cells[0].get_text()
    data = cells[1].get_text()
    print(f'{name}{data}')

Equatorial Diameter:6,792 km

Polar Diameter:6,752 km

Mass:6.42 x 10^23 kg (10.7% Earth)
Moons:2 (Phobos & Deimos)
Orbit Distance:227,943,824 km (1.52 AU)
Orbit Period:687 days (1.9 years)

Surface Temperature: -153 to 20 °C
First Record:2nd millennium BC
Recorded By:Egyptian astronomers


In [113]:
# Create lists from table
df_name = []
df_data = []

for t in table:
    cells = t.find_all("td")
    name = cells[0].get_text()
    data = cells[1].get_text()
    df_name.append(name)
    df_data.append(data)

# Turn lists into pandas dataframe
df_table = pd.DataFrame({
    "Mars Parameters": df_name, 
    "Data":df_data})
df_table.set_index(['Mars Parameters'])

Unnamed: 0_level_0,Data
Mars Parameters,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km\n"
Polar Diameter:,"6,752 km\n"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)\n
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


### Mars Hemispheres

* Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mar's hemispheres.

* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.

* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.