# Mission to Mars

In [145]:
# Dependencies
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
import pandas as pd


In [146]:
# Use Chrome to scrape the following url
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## The latest  Nasa Mars News

In [147]:
# URL of page to be scraped
url="https://mars.nasa.gov/news"
browser.visit(url)

In [148]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = bs(browser.html, 'html.parser')

In [149]:
# Scrape the data contain news title and news Paragraph Text

News_result = soup.find('li',class_="slide").find('div', class_='list_text')


In [150]:
# Extract new title, paragraoh text and news update date
news_title = News_result.find('div',{'class':'content_title'}).text
news_para = News_result.find('div', class_='article_teaser_body').text
News_date = News_result.find('div', class_='list_date').text        

In [151]:
# Print the result
print ("---------------------------------------")
print ("The Latest Nasa Mars News:")
print(news_title)
print(news_para)
print("update time: "+ News_date)
print ("---------------------------------------")

---------------------------------------
The Latest Nasa Mars News:
NASA Invites Public to Submit Names to Fly Aboard Next Mars Rover
From now till Sept. 30, the public can submit names to be stenciled on chips that will fly on the Mars 2020 rover and receive a souvenir boarding pass.
update time: May 21, 2019
---------------------------------------


## JPL Mars Space Images - Featured Image

In [126]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [127]:
# URL of page to be scraped
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [128]:
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = bs(html, 'html.parser')

In [129]:
# Retrieve full size image url
article = soup.find("li",class_="slide")
featured_image_url=article.find("img",class_='thumb')['src']
# Get full url
featured_image_url = url[:24] + featured_image_url
# Displace featured image url
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA23222-640x350.jpg'

## Mars Weather

In [130]:
# URL of page to be scraped
url = "https://twitter.com/marswxreport?lang=en"
response = requests.get(url)
soup = bs(response.text, 'html.parser')


In [131]:
# Scrape weather from tweets about "Mars Weather"
weather_list = soup.find_all("p",class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text")
# Make sure the data we get is valid and complete which should include with "high" "low" "gusting"
for weather in weather_list:
    try:
        mars_weather = weather.text
        mars_weather_1 = [i.lower() for i in mars_weather.split(" ")]
        if ("low" in mars_weather_1) and ("high" in mars_weather_1) and ("gusting" in mars_weather_1):
            if mars_weather_1[-1].split(".")[0]=="hpapic" :
                mars_weather = mars_weather[:-26]
            print("The Latest Mars weather:")
            print(mars_weather)
            break
           
    except:
        pass
        

The Latest Mars weather:
InSight sol 170 (2019-05-20) low -100.9ºC (-149.6ºF) high -21.2ºC (-6.1ºF)
winds from the SW at 4.7 m/s (10.5 mph) gusting to 14.6 m/s (32.6 mph)
pressure at 7.50 hPa


## Mars Facts

In [132]:
# Use Panda's `read_html` to parse the url
url="https://space-facts.com/mars/"
tables = pd.read_html(url)
Mars_fact_df = tables[0]

Mars_fact_df 

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [133]:

# Delete ":" in values of the first column
Mars_fact_df[0] = Mars_fact_df[0].apply(lambda x: x.rstrip(":"))
# change columns name
Mars_fact_df = Mars_fact_df.rename(columns={0:"Description",1:"Value"})

Mars_fact_df 


Unnamed: 0,Feature,Value
0,Equatorial Diameter,"6,792 km"
1,Polar Diameter,"6,752 km"
2,Mass,6.42 x 10^23 kg (10.7% Earth)
3,Moons,2 (Phobos & Deimos)
4,Orbit Distance,"227,943,824 km (1.52 AU)"
5,Orbit Period,687 days (1.9 years)
6,Surface Temperature,-153 to 20 °C
7,First Record,2nd millennium BC
8,Recorded By,Egyptian astronomers


##  Mar's hemispheres images

In [110]:
# URL of page to be scraped
url= "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
response= requests.get(url)

In [111]:
soup = bs(response.text,"html.parser")

In [112]:
# Find the hemisphere name
Hemisphere_name_list = []
#Get the url links to the four hemispheres
itemLink = soup.find_all("div",class_ = "description")
for item in itemLink:
    Hemisphere_name = item.find("h3").text
    Hemisphere_name = "_".join(Hemisphere_name.split(" "))
    Hemisphere_name_list.append(Hemisphere_name)
    
# Displace the four hemisphere name 
Hemisphere_name_list

['Cerberus_Hemisphere_Enhanced',
 'Schiaparelli_Hemisphere_Enhanced',
 'Syrtis_Major_Hemisphere_Enhanced',
 'Valles_Marineris_Hemisphere_Enhanced']

In [113]:
# Find the url linker
url_list=[]
itemLink_1 = soup.find_all("a",class_ = "itemLink product-item")
for item in itemLink_1:
    hemis_url=item['href']
    # Get full url
    hemis_url = url[:29]+ hemis_url
    url_list.append(hemis_url)

url_list

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']

In [114]:
# Scrape the hemisphere urls
image_url_list=[]
for url in url_list:
    response = requests.get(url)
    soup = bs(response.text,"html.parser")
    image_url = soup.find("img",class_="wide-image")['src']
    image_url = url[:29] + image_url
    image_url_list.append(image_url)

In [115]:
image_url_list

['https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg']

In [116]:
# Store four hemisphere names and image urls in a list
hemisphere_image_urls =[]
# Store one pair of hemisphere name and image url in a dictionary
hemisphere_image_urls_dic={}

for i in range(len(image_url_list)):
    hemisphere_image_urls_dic["title"] = Hemisphere_name_list[i]
    hemisphere_image_urls_dic["img_url"] = image_url_list[i]
    hemisphere_image_urls.append({"title":Hemisphere_name_list[i], "img_url": image_url_list[i]})


In [117]:
hemisphere_image_urls

[{'title': 'Cerberus_Hemisphere_Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli_Hemisphere_Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis_Major_Hemisphere_Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles_Marineris_Hemisphere_Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]

In [118]:

# Close all browsers if still active
browser.quit()