# Mission to Mars

## Scraping

In [10]:
# Dependencies
from bs4 import BeautifulSoup as BS
from splinter import Browser
import pandas as pd
import requests
import re

In [11]:
#Choose the executable path to driver - Mac Users
executable_path = {'executable_path' : 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [12]:
# URL of pages to be scraped
news_url = "https://mars.nasa.gov/news/"
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
weather_url = "https://twitter.com/marswxreport?lang=en"
facts_url = "https://space-facts.com/mars/"
hemi_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

#### Nasa Mars News

In [14]:
# Visit & Parse HTML with Beautiful Soup
browser.visit(news_url)
news_html = browser.html
news_soup = BS(news_html,"html.parser")

# Retrieve & Print the latest element that contains news title and news_paragraph
news_title = news_soup.find("div",class_="content_title").text
news_para = news_soup.find("div", class_="article_teaser_body").text
print(f"Title: {news_title}")
print(f"Para: {news_para}")

Title: Mars Now
Para: An instrument called SHERLOC will, with the help of its partner WATSON, hunt for signs of ancient life by detecting organic molecules and minerals.


#### JPL Mars Space Images - Featured Image

In [13]:
# Visit & Scrape featured image with Beautiful Soup
browser.visit(image_url)
image_html = browser.html
image_soup = BS(image_html, 'html.parser')

# Retrieve & Print
feature_img = image_soup.find('article',attrs={'class':'carousel_item'})
feature_img_url_string = feature_img['style']
featured_image_link = re.findall(r"'(.*?)'",feature_img_url_string)
image_path = 'https://www.jpl.nasa.gov'+ featured_image_link[0]
print(image_path)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18906-1920x1200.jpg


#### Mars Weather 

In [13]:
# Visit & Scrape weather data with Beautiful Soup
result = requests.get(weather_url)
weather_html = result.text
weather_soup = BS(weather_html,'html.parser')

# Retrieve & Print the weather from the newest tweet

mars_weather = weather_soup.find(class_='tweet-text').get_text()
mars_weather

'InSight sol 535 (2020-05-29) low -91.3ºC (-132.4ºF) high -2.7ºC (27.2ºF)\nwinds from the SW at 5.2 m/s (11.5 mph) gusting to 16.7 m/s (37.3 mph)\npressure at 7.20 hPapic.twitter.com/TlqNzxU9Zo'

#### Mars Facts

In [6]:
# Visit the Mars Facts Site Using Pandas to Read
mars_data = pd.read_html(facts_url)
mars_df = pd.DataFrame(mars_data[0])
mars_df.columns=["Description", "Value"]
mars_df

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [8]:
#convert the data to a HTML table string
html_table = mars_df.to_html(header = False, index = False)
html_table = html_table.replace('\n', '')

html_table

'<table border="1" class="dataframe">  <tbody>    <tr>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <td>Mass:</td>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <td>Surface Temperature:</td>      <td>-87 to -5 °C</td>    </tr>    <tr>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <td>Recorded By:</td>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [17]:
# Visit & Scrape images of Mars Hemispheres with Beautiful Soup
browser.visit(hemi_url)
hemi_soup = BS(browser.html, "html.parser")

#get the 4 hemispheres (class of 'item')
hemispheres = hemi_soup.select('div.item')

# Create dictionary to store titles & links to images
hemisphere_image_urls = []

# Iterate through each image
for hemi in hemispheres:
    img_title = (hemi.find('h3').text).replace(' Enhanced', '') 
    browser.click_link_by_partial_text(img_title)
    soup = BS(browser.html, "html.parser")
    full = soup.find('a', text='Sample')
    img_url = full['href']
    hemisphere_image_urls.append({"title": img_title, "img_url": img_url})
    browser.back()

# Print image title and url
print(hemisphere_image_urls)



[{'title': 'Cerberus Hemisphere', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
