# Web Scraping - Mission to Mars

In [68]:
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
import requests

## NASA Mars News

In [74]:
# visit nasa for news of mars
browser = Browser('chrome', headless=False)
url_news = 'https://mars.nasa.gov/news/'
browser.visit(url_news)

# create a soup object from the html
html_news = browser.html
soup_news = BeautifulSoup(html_news, 'html.parser')

div1 = soup_news.find('div', class_='content_title')
news_title = div1.find('a').text
news_p = soup_news.find('div', class_='article_teaser_body').text

print(news_title)
print(news_p)

Seven Ways Mars InSight is Different
NASA has a long and successful track record at Mars. Since 1965, it has flown by, orbited, landed and roved across the surface of the Red Planet. What can InSight -- planned for launch in May -- do that hasn’t been done before?


## JPL Mars Space Images - Featured Image

In [76]:
# visit JPL Mars space images to get a big image
browser = Browser('chrome', headless=False)
url_img = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url_img)
browser.click_link_by_partial_text('FULL IMAGE')

# create a soup object from the html
html_img = browser.html
soup_img = BeautifulSoup(html_img, 'html.parser')

home = soup_img.find('article', class_="carousel_item")
link = home.a['data-fancybox-href']
featured_image_url = 'https://www.jpl.nasa.gov' + link

print('featured_image_url =', featured_image_url)

featured_image_url = https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA19046_ip.jpg


## Mars Weather

In [77]:
# visit twitter to get Mars Weather
url_weather = 'https://twitter.com/marswxreport?lang=en'
html_weather = requests.get(url_weather)
soup_weather = BeautifulSoup(html_weather.text, 'html.parser')

tweet = soup_weather.find('div', class_='stream')
mars_weather = tweet.find(text="Mars Weather").findNext('p').text

print('mars_weather =', mars_weather)

mars_weather = Sol 1974 (Feb 24, 2018), Sunny, high -13C/8F, low -77C/-106F, pressure at 7.29 hPa, daylight 05:37-17:25


## Mars Facts

In [79]:
# visit Mars facts and create a table by pandas
url_facts = 'https://space-facts.com/mars/'
facts_table = pd.read_html(url_facts)
df = facts_table[0]
df.columns = ['Description', 'Value']
df.set_index(['Description'], inplace = True)
df.to_html('Mars_df.html')
df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [88]:
# Generate a html table from dataframe
html_table = df.to_html()
html_table.replace('\n','')

soup_table = BeautifulSoup(open('mars_df.html'),'html.parser')

# create a dictionaries for all cells to create a table in html
mars_facts = {}
mars_list = []
ths = [x.text.strip(':') for x in soup_table.table('th') if x.text != '']
column_list = ths[0:2]
column_list.reverse()
th = ths[2:]
td = [y.text for y in soup_table.table('td')]
mars_facts = dict([(i, j) for i, j in zip(th, td)])
mars_list.append(mars_facts)

print(column_list)
print(mars_list)

['Description', 'Value']
[{'Equatorial Diameter': '6,792 km', 'Polar Diameter': '6,752 km', 'Mass': '6.42 x 10^23 kg (10.7% Earth)', 'Moons': '2 (Phobos & Deimos)', 'Orbit Distance': '227,943,824 km (1.52 AU)', 'Orbit Period': '687 days (1.9 years)', 'Surface Temperature': '-153 to 20 °C', 'First Record': '2nd millennium BC', 'Recorded By': 'Egyptian astronomers'}]


## Mars Hemisperes

In [100]:
# visit Mars hemisperes to get the urls of pictures
browser = Browser('chrome', headless=False)
url_hemisperes = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url_hemisperes)

html_hemisperes = browser.html
soup_hem = BeautifulSoup(html_hemisperes, 'html.parser')
jpg_links = soup_hem.find_all('div', class_='description')

# click in all pictures pages to get the url
Mars_Hemisperes = []
for link in jpg_links:
    info = {}
    h3 = link.find('h3').text
    info['title'] = h3
    browser.click_link_by_partial_text(h3)
    html2 = browser.html
    soup2 = BeautifulSoup(html2, 'html.parser')
    url = soup2.find('img', class_='wide-image')['src']
    info['img_url'] = 'https://astrogeology.usgs.gov' + url
    Mars_Hemisperes.append(info)
    browser.click_link_by_partial_text('Back')
    
print('hemisphere_image_urls =', Mars_Hemisperes)

hemisphere_image_urls = [{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]
