In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
import pymongo
import time

# Step One

### NASA Mars News

In [2]:
#using  chromedriver and browser method
#setup and fire up browser task 1 
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

#visit URL
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

time.sleep(2)

html = browser.html
soup = bs(html, 'html.parser')

#find relevant class and store in a list 
item_list = soup.find_all('li', class_ = 'slide')

#loop and strip the html
titles = []
descriptions = []
for data in item_list:
    
    title = data.find('h3').text
    titles.append(title)
    
    description = data.find('a').text
    descriptions.append(description)

# Close the browser after scraping
browser.quit()

In [3]:
titles[0]

'Mars Is Getting a New Robotic Meteorologist'

In [3]:
#inspecting final results
print("\n-------   Titles    ----------\n")
print(titles[:2])
print("\n-------   Descriptions    ----------\n")
print(descriptions[:2])


-------   Titles    ----------

['Mars Is Getting a New Robotic Meteorologist', "NASA's Curiosity Takes Selfie With 'Mary Anning' on the Red Planet"]

-------   Descriptions    ----------

["Sensors on NASA's Perseverance will help prepare for future human exploration by taking weather measurements and studying dust particles.Mars Is Getting a New Robotic Meteorologist", "The Mars rover has drilled three samples of rock in this clay-enriched region since arriving in July.NASA's Curiosity Takes Selfie With 'Mary Anning' on the Red Planet"]


### JPL Mars Space Images - Featured Image

In [2]:
#using request method 
#initial URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
response = requests.get(url)

#soup object: html doc
soup = bs(response.text, 'html.parser')

#url inspection and finding the image's parent class
all_images = soup.find_all('a', class_ = 'fancybox')

#looping through each image
imglinks_list = []
for img_html in all_images:
    
    #image link
    img_link = img_html['data-fancybox-href']
    
    #full url concant and append
    featured_image_url = 'https://www.jpl.nasa.gov/spaceimages/' + img_link
    imglinks_list.append(featured_image_url)


In [3]:
imglinks_list[:5]

['https://www.jpl.nasa.gov/spaceimages//spaceimages/images/mediumsize/PIA17357_ip.jpg',
 'https://www.jpl.nasa.gov/spaceimages//spaceimages/images/largesize/PIA24245_hires.jpg',
 'https://www.jpl.nasa.gov/spaceimages//spaceimages/images/largesize/PIA24244_hires.jpg',
 'https://www.jpl.nasa.gov/spaceimages//spaceimages/images/largesize/PIA24173_hires.jpg',
 'https://www.jpl.nasa.gov/spaceimages//spaceimages/images/largesize/PIA24243_hires.jpg']

In [4]:
#using request method 
#initial URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
response = requests.get(url)

#soup object: html doc
soup = bs(response.text, 'html.parser')

#url inspection and finding the image's parent class
img_src = soup.footer.a["data-fancybox-href"]

featured_img = "https://www.jpl.nasa.gov/" + img_src

In [5]:
featured_img

'https://www.jpl.nasa.gov//spaceimages/images/mediumsize/PIA02570_ip.jpg'

### Mars Facts


In [2]:
url = 'https://space-facts.com/mars/'
df = pd.read_html(url)

In [23]:
mars_table = df[0]
mars_table = mars_table.rename({0: "", 1: "Data"}, axis =1).set_index("")

In [24]:
mars_table

Unnamed: 0,Data
,
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [25]:
#export as a html file
mars_table.to_html("mars_facts.html")

### Mars Hemispheres

In [9]:
#using  chromedriver and browser method
#setup and fire up browser task 1 
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

#visit URL
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

html = browser.html
soup = bs(html, 'html.parser')

#all items where the links are stored
items = soup.find_all('div', class_ = 'item')

home_url = 'https://astrogeology.usgs.gov' 

#here I will store dictionaries for each item
links_list = []

#looping through each item
for item in items:
    
    #initialising empty dictionary to store the links of each image
    hemishpere_data = {}
    
    #dictionary key: 'title' ; value-pair: item title
    hemishpere_data['title'] = item.find('h3').text.strip('Enhanced').strip()
    
    #find image link, create new URL, request and create soup object
    img_link = item.find('a')['href']
    new_url = home_url + img_link
    response = requests.get(new_url)
    soup = bs(response.text, 'html.parser')
   
    #find full image link 
    full_size_img = soup.find_all('div', class_ = 'downloads')[0].find('li').find('a')['href']
    
    #dictionary key for link
    hemishpere_data['img_url'] = full_size_img
    
    #appending dictionary to the list 
    links_list.append(hemishpere_data)

# Close the browser after scraping
browser.quit()

In [10]:
links_list

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

### All scrapes complete 

#### Compiling all scrapes into 1 dictionary 

In [11]:
mars_data = {"news_title": titles[0],
                "mars_news":descriptions[0],
                "img_links": featured_img,
                "hemispheres": links_list
                 }

In [12]:
mars_data

{'news_title': 'Mars Is Getting a New Robotic Meteorologist',
 'mars_news': "Sensors on NASA's Perseverance will help prepare for future human exploration by taking weather measurements and studying dust particles.Mars Is Getting a New Robotic Meteorologist",
 'img_links': 'https://www.jpl.nasa.gov//spaceimages/images/mediumsize/PIA17793_ip.jpg',
 'hemispheres': [{'title': 'Cerberus Hemisphere',
   'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
  {'title': 'Schiaparelli Hemisphere',
   'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
  {'title': 'Syrtis Major Hemisphere',
   'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
  {'title': 'Valles Marineris Hemisphere',
   'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]}