In [1]:
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
from flask import Markup


### NASA Mars News

In [5]:
# URL to scrape
news_url = 'https://mars.nasa.gov/news/'
response = requests.get(news_url)
news_soup = bs(response.text, 'html.parser')

# Scrape results
news_title = news_soup.find('div', class_='content_title').find('a').text
news_p = news_soup.find('div', class_='rollover_description_inner').text.strip()

print(news_title)
print(news_p)


NASA Invites Students to Name Mars 2020 Rover

Through Nov. 1, K-12 students in the U.S. are encouraged to enter an essay contest to name NASA's next Mars rover.


### JPL Mars Space Images - Featured Image

In [3]:
# URL to scrape
mars_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
response = requests.get(mars_url)
mars_soup = bs(response.text, 'html.parser')

# Scrape results
image_url = mars_soup.find('div', class_='carousel_container')\
    .find('div', class_='carousel_items').article['style']

In [4]:
# Parse out first part of img string
image_url = image_url.split("('")   
image_url = image_url[1]
# Parse out last part of img string
image_url = image_url.strip("');")  

# Add url parts together
base_url = 'https://www.jpl.nasa.gov'
featured_image_url = base_url + image_url
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18273-1920x1200.jpg'

### Mars Weather

In [5]:
# URL to scrape
twitter_url = 'https://twitter.com/marswxreport?lang=en'
response = requests.get(twitter_url)
twitter_soup = bs(response.text, 'html.parser')

# Scrape results
mars_weather = twitter_soup.find('p', class_ = 'js-tweet-text').text
mars_weather = mars_weather.split('pic.twitter.com')
mars_weather = mars_weather[0]
mars_weather

'InSight sol 353 (2019-11-24) low -100.6ºC (-149.1ºF) high -23.3ºC (-10.0ºF)\nwinds from the SSE at 5.8 m/s (13.1 mph) gusting to 21.4 m/s (47.9 mph)\npressure at 6.70 hPa'

### Mars Facts

In [6]:
# URL to scrape
facts_url = 'https://space-facts.com/mars/'
response = requests.get(facts_url)
facts_soup = bs(response.text, 'html.parser')

# Scrape results
facts_table = facts_soup.find('table', id = 'tablepress-p-mars-no-2')

In [7]:
# Convert element tag to string
tmp_table = str(facts_table)

# Convert string to list
facts_list = pd.read_html(tmp_table)

# Slice list to df
facts_df = facts_list[0]
facts_df.columns=['','value']
facts_df

Unnamed: 0,Unnamed: 1,value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [8]:
# Convert df to string of HTML table
mars_facts = facts_df.to_html(index=False, border=None)
mars_facts = mars_facts.replace('\n', '')
mars_facts

mars_html = Markup(mars_facts)
mars_html

Markup('<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>value</th>    </tr>  </thead>  <tbody>    <tr>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <td>Mass:</td>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <td>Surface Temperature:</td>      <td>-87 to -5 °C</td>    </tr>    <tr>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <td>Recorded By:</td>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>')

### Mars Hemispheres

In [9]:
from splinter import Browser

# For Mac
# !which chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}

# For Windows
# executable_path = {'executable_path': 'chromedriver.exe'}

browser = Browser('chrome', **executable_path, headless=True)

In [10]:
# URL to scrape
hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemispheres_url)
html = browser.html
hemi_soup = bs(html, 'html.parser')

# Scrape results
results = hemi_soup.find_all('div', class_ = 'item')

In [11]:
base_url = 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/'
hemisphere_image_urls = []

# Loop through results
for result in results:

    # Find title
    title = result.find('div', class_ = 'description')\
        .find('a', class_ = 'product-item').find('h3').text
    
    # Find full-size img
    img_thumb = result.find('a', class_ = 'product-item')\
        .find('img', class_ = 'thumb')
    img_str = str(img_thumb)                        # convert each element tag to string
    img_split = img_str.split('_', 1)               # drop first part of string
    img_tif = img_split[1].split('_thumb.png"/>')   # drop last part of string
    img_url = base_url + img_tif[0] + '/full.jpg'   # add img location to base_url

    # Create dict and add to list of dicts
    hemi_dict = {'title': title, 'img_url': img_url}
    hemisphere_image_urls.append(hemi_dict)

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]