In [19]:
# Import Libraries
import os
import pandas as pd
import requests as req
from splinter import Browser
from bs4 import BeautifulSoup as bs
import time

## Getting news from https://mars.nasa.gov/news/ web page

In [20]:
# Chromedriver execution
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=True)

# URL path
url1 = "https://mars.nasa.gov/news/"
browser.visit(url1)

# Save html and parser
html = browser.html
soup = bs(html, "html.parser")

In [21]:
# Printing
news_date = soup.find('li', class_='slide').find('div', class_="list_date").text
print(news_date)
news_title = soup.find('div',class_="list_text").find('div', class_="content_title").text
print(news_title)
news_text = soup.find('div',class_="list_text").find('div',class_="article_teaser_body").text
print(news_text)

October 27, 2020
NASA's Perseverance Rover Is Midway to Mars 
Sometimes half measures can be a good thing – especially on a journey this long. The agency's latest rover only has about 146 million miles left to reach its destination.


## Getting featured image from https://www.jpl.nasa.gov/spaceimages/ web page

In [22]:
# URL path
url2 = "https://www.jpl.nasa.gov/spaceimages/"

# Visiting url2 to click and response
browser.visit(url2)
browser.find_by_id('full_image').click()
time.sleep(3)

# Clicking on more info button
browser.links.find_by_partial_text('more info').click()

# Getting image URL
featured_image_url = browser.find_by_xpath("//img[@class='main_image']")._element.get_attribute("src")

print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19977_hires.jpg


## Getting Mars facts table structure and data from https://space-facts.com/mars/ web page

In [23]:
# URL path
url3 = "https://space-facts.com/mars/"

# Finding all tables on a web page
table = pd.read_html(url3)
len(table)

3

In [24]:
# Pick first table (Mars facts)
table[0].columns = ['Parameter', 'Value']
fact_table = table[0]
fact_table

Unnamed: 0,Parameter,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [25]:
# Converting DataFrame to HTML table
table_html = fact_table.to_html()
# print(table_html)

In [26]:
# Getting mars facts table data from the web page
browser.visit(url3)

html = browser.html
soup = bs(html, "html.parser")

tables = soup.findChildren('table')
table_data=[]
table1 = tables[0]
rows = table1.findChildren(['th', 'tr'])   

for row in rows:
    title = row.find('td', class_="column-1").text.strip()
    value = row.find('td', class_="column-2").text.strip()
    table_data.append({'Parameter': title, 'Value': value})

table_data

[{'Parameter': 'Equatorial Diameter:', 'Value': '6,792 km'},
 {'Parameter': 'Polar Diameter:', 'Value': '6,752 km'},
 {'Parameter': 'Mass:', 'Value': '6.39 × 10^23 kg (0.11 Earths)'},
 {'Parameter': 'Moons:', 'Value': '2 (Phobos & Deimos)'},
 {'Parameter': 'Orbit Distance:', 'Value': '227,943,824 km (1.38 AU)'},
 {'Parameter': 'Orbit Period:', 'Value': '687 days (1.9 years)'},
 {'Parameter': 'Surface Temperature:', 'Value': '-87 to -5 °C'},
 {'Parameter': 'First Record:', 'Value': '2nd millennium BC'},
 {'Parameter': 'Recorded By:', 'Value': 'Egyptian astronomers'}]

## Getting Mars hemispheres images from https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars web page

In [27]:
# URL path
url4 = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

browser.visit(url4)
browser.url
html = browser.html

# Parsering and scrapping list of images
soup = bs(html, "html.parser")
images = soup.find_all('div', class_="description")
link = f"https://astrogeology.usgs.gov"

In [28]:
# Looping thorugh images list, pick href and add it to link, visit new link, scrap for image url and title, append to a list
hem_img_urls = []
for image in images:
    img_link = f"{link}{image.find('a')['href']}"
    browser.visit(img_link)
    img_url = browser.find_by_xpath("//img[@class='wide-image']")._element.get_attribute("src")
    title = browser.find_by_xpath("//h2[@class='title']").text
    title = title.rstrip('Enhanced')
    hem_img_urls.append({"title" : title, "img_url" : img_url})
hem_img_urls

[{'title': 'Cerberus Hemisphere ',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere ',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere ',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere ',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]