In [1]:
# Dependencies
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
import requests
import time
from webdriver_manager.chrome import ChromeDriverManager

## Mars News

In [3]:
# Note:  This project has added ChromeDriverManager() to make running the splinter easier  

# setup chrome browser to run splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser("chrome", **executable_path, headless=False)

In [4]:
#  tell browser what page to load
url = "https://mars.nasa.gov/news/"
browser.visit(url)

# allow page to load
time.sleep(1)

# collect content from page
html = browser.html  

#  parse content into soup object
soup = BeautifulSoup(html, "html.parser")

In [1]:
# # view content
# print(soup.prettify())  

In [6]:
# extract content
news_title = soup.find('div', class_='content_title').text
news_title

'Mars Now'

In [7]:
# extract content
news_p = soup.find('div', class_='article_teaser_body').text
news_p

'An electronics issue is suspected to be preventing the sensors from sharing their data about Mars weather with the spacecraft.'

## Mars Space Images

In [8]:
# tell browser to visit new page
url_base = "https://www.jpl.nasa.gov"
url_add = '/spaceimages/?search=&category=Mars'
browser.visit(url_base+url_add)

# set time delay to let page load
time.sleep(1)

# collect content from page
html = browser.html

# make soup object from content
soup = BeautifulSoup(html, "html.parser")

In [9]:
# collect specific part of page
bttn_image_url = soup.find('article', class_='carousel_item').get('style')
bttn_image_url

"background-image: url('/spaceimages/images/wallpaper/PIA18289-1920x1200.jpg');"

In [10]:
# find start of url in characters
start=bttn_image_url.find("url('")
start

18

In [11]:
# find end of url in characters
end=bttn_image_url.find("');")
end

75

In [12]:
# access only url portion of string using start and end calculated above
featured_image_url=url_base+bttn_image_url[start+3+len("('"):end]
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18289-1920x1200.jpg'

## Alternate Image but not as large

In [13]:
# another way of getting this image
bttn_image_url = soup.find('footer')
bttn_image_url

<footer>
<a class="button fancybox" data-description="A new day dawns on Saturn as the part of the planet is seen emerging once more into the Sun's light by NASA's Cassini orbiter. With an estimated rotation period of 10 hours and 40 minutes, Saturn's days and nights are much shorter than those on Earth." data-fancybox-group="images" data-fancybox-href="/spaceimages/images/mediumsize/PIA18289_ip.jpg" data-link="/spaceimages/details.php?id=PIA18289" data-title="Sunrise on Saturn" id="full_image">
					FULL IMAGE
				  </a>
</footer>

In [14]:
# select image attribute
bttn_image_url = bttn_image_url.find('a').attrs['data-fancybox-href']

In [15]:
# combine image relative reference with domain url to get complete url
featured_image_url = url_base + bttn_image_url
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18289_ip.jpg'

## Mars Weather Tweet

In [18]:
# tell browser to nasa news feed
url_base = "https://twitter.com/marswxreport?lang=en"
browser.visit(url_base)

# wait for page to load
time.sleep(1)

# collect content and store as soup object
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [2]:
# # inspect content
# soup

In [17]:
# collect content
mars_weather = soup.find('p', class_='TweetTextSize').text
mars_weather

AttributeError: 'NoneType' object has no attribute 'text'

## Mars Fun Facts

In [61]:
# tell browser to visit new page
url_base = "https://space-facts.com/mars/"
browser.visit(url_base)

# wait for page to load
time.sleep(1)

#  collect content and store in soup object
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [62]:
# use pandas to get the only table on the page
table = pd.read_html(url_base)

In [63]:
# returns list so extract first item in list
table[0]

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [64]:
#  store table as html code
htmltable=table[0].to_html()

# view content
print(htmltable)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>0</th>
      <th>1</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>2</th>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>6</th>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>7</th>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>8</th>
      <td>Recorded

## Mars Hemispheres

In [65]:
# set browser to visit new page
# set lists for later use; look at my terrible naming! ahhh I should know better
link2=[]
link3=[]
link4=[]
url_base = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url_base)
time.sleep(2)

# collect links from page using splinter
links = browser.find_link_by_partial_text('Hemisphere')
# for link in links:
#     link.click()
#     time.sleep(3)
#     url_link = browser.find_link_by_partial_text('enhanced.tif')

# loop through links and extract url part and store in link2
[link2.append(link['href']) for link in links]

# use above links (link2) and visit each page extracting the image url (link3) and title (link4)
for link in link2:
    browser.visit(link)
    time.sleep(2)
    url_link = browser.find_link_by_partial_text('Sample')                #partial_text('enhanced.tif')
    title_text = browser.find_by_css('.title')
    link3.append(url_link['href'])
    link4.append(title_text.html)

In [66]:
# check scrape
# stored original links for each image
link2

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']

In [67]:
# check scrape
# stored the higher definition image for each page visited (from above)
link3

['http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg']

In [68]:
# check scrape
# stored title of each image above
link4

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [69]:
# create dictionary that holds title and image url
hemisphere_image_urls = []
for i in range(len(link3)):
    hemisphere_image_urls.append({"title": link4[i], "img_url": link3[i]})

In [70]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

## Part 2 Flask

* See flask app (app.py)