In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

from splinter import Browser
import time

### Code for getting featured stories from NASA mars mission site


In [2]:
# NASA mars site information
#using request and soup
nasa_url = "https://mars.nasa.gov/news/"

nasa_response = requests.get(nasa_url)

# Create BeautifulSoup object; parse with 'html.parser'
nasa_soup = BeautifulSoup(nasa_response.text, 'lxml')

#print(nasa_soup.prettify())

In [3]:
# results are returned as an iterable list
nasa_results = nasa_soup.find_all('div', class_="slide")

In [4]:
nasa_list = []
for result in nasa_results:
     # Error handling
    try:
        # Identify and return title of listing
        title = result.find('div', class_="content_title").text
#         # Identify and return price of listing
#         title = result.find('div', class_="content_title").text
#         # Identify and return link to listing
        text = result.find('div', class_="rollover_description_inner").text

        # Print results only if title, price, and link are available
        if (title and text):
#             print('-------------')
#             print(title)
#             print(text)

            nasa_list.append({"Nasa_Title":title.replace("\n",""),"Nasa_Text":text.replace("\n","")})

    except AttributeError as e:
        print(e)

In [5]:
#nasa_list

### Code for getting featured mars image from JPL site.

In [6]:
#setup for splinter
executable_path = {'executable_path': 'c:/bin/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [7]:
url = 'http://www.jpl.nasa.gov/spaceimages/?search=&category=Mars#submit/'
browser.visit(url)

In [8]:
html = browser.html
soup = BeautifulSoup(html, "html.parser")


In [9]:
mars_images = soup.find_all('li', class_='slide')

In [10]:
#print(mars_image)
i=0
mars_images_url = []
for mars_image in mars_images:
    image_link = mars_image.find('a')["data-fancybox-href"]
    print(image_link)
    mars_images_url.append(image_link)
    i+=1
    if i == 2:
        break

/spaceimages/images/largesize/PIA24088_hires.jpg
/spaceimages/images/largesize/PIA24087_hires.jpg


In [11]:
mars_images_url

['/spaceimages/images/largesize/PIA24088_hires.jpg',
 '/spaceimages/images/largesize/PIA24087_hires.jpg']

In [12]:
browser.quit()

### Getting information table from Space-Facts

In [13]:
#setup for splinter
executable_path = {'executable_path': 'c:/bin/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [14]:
url = 'https://space-facts.com/mars/'
browser.visit(url)

In [15]:
tables = pd.read_html(url)

In [16]:
#tables is a list of dataframes.  Inspection found table[0] is desired one.

In [17]:
mars_facts = tables[0].copy()
mars_facts.rename(columns = {0:"Parameter", 1:"Value"}, inplace=True) #renaming headings so make sense.
mars_facts

Unnamed: 0,Parameter,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [18]:
mars_facts.to_html("mars_facts.html", index = False)

In [19]:
browser.quit()

### Getting information from Astrogeology Site

In [20]:
#setup for splinter
executable_path = {'executable_path': 'c:/bin/chromedriver.exe'}

browser = Browser('chrome', **executable_path, headless=False)

In [21]:
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)
time.sleep(5) #added delay to make sure loads OK without issues

In [22]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
hemi_list = ["Cerberus", "Schiaparelli", "Syrtis", "Valles"]
print(soup.prettify())

<html lang="en">
 <head>
  <link href="//ajax.googleapis.com/ajax/libs/jqueryui/1.11.3/themes/smoothness/jquery-ui.css" rel="stylesheet" type="text/css"/>
  <title>
   Astropedia Search Results | USGS Astrogeology Science Center
  </title>
  <meta content="USGS Astrogeology Science Center Astropedia search results." name="description"/>
  <meta content="USGS,Astrogeology Science Center,Cartography,Geology,Space,Geological Survey,Mapping" name="keywords"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1" name="viewport"/>
  <meta content="x61hXXVj7wtfBSNOPnTftajMsZ5yB2W-qRoyr7GtOKM" name="google-site-verification"/>
  <!--<link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Open+Sans:400italic,400,bold"/>-->
  <link href="/css/main.css" media="screen" rel="stylesheet"/>
  <link href="/css/print.css" media="print" rel="styles

In [31]:
base_url = "https://astrogeology.usgs.gov"
results = soup.find_all('div', class_='description')
hemi_url = []
hemi_name = []
#print(results)
for result in results:
#     print('*************')
#     print(result)
#     print(type(result))
#     print('* ** ** ** ** ** *')
    
    try:
        test = result.find('a')["href"]
        name = result.find('h3').text
#         print(test)
#         print(name)
        print("** ** ** **")
        # Access the href attribute with bracket notation
        #link = result.a['href']
        combine_url = base_url+test
        #print(combine_url)
        hemi_url.append(combine_url)
        hemi_name.append(name)
        
        
    except AttributeError as e:
        print(e)

** ** ** **
** ** ** **
** ** ** **
** ** ** **


In [32]:
print(hemi_url)
print(hemi_name)
browser.quit()
    

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']
['Cerberus Hemisphere Enhanced', 'Schiaparelli Hemisphere Enhanced', 'Syrtis Major Hemisphere Enhanced', 'Valles Marineris Hemisphere Enhanced']


In [44]:
print(f"NASA Title:  "+nasa_list[0]["Nasa_Title"])
print("***************")
print(f"NASA Story:  "+nasa_list[0]["Nasa_Text"])

NASA Title:  NASA Readies Perseverance Mars Rover's Earthly Twin 
***************
NASA Story:  Did you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape.


In [45]:
mars_images_url



['/spaceimages/images/largesize/PIA24088_hires.jpg',
 '/spaceimages/images/largesize/PIA24087_hires.jpg']

In [46]:
mars_facts

Unnamed: 0,Parameter,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [49]:
print(f"Image Title:  {hemi_name[0]}")
print(f"Image URL:  {hemi_url[0]}")


Image Title:  Cerberus Hemisphere Enhanced
Image URL:  https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced
