In [1]:
#dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import requests
import pandas as pd

In [2]:
#chrome soup
browser = Browser('chrome')
url = 'https://mars.nasa.gov/news'
browser.visit(url)
html = browser.html
soup = bs(html, 'lxml')

In [3]:
#get headline and into paragraph
news = soup.find_all('li', class_='slide')
first_headline = news[0].find('div', class_='content_title').text
intro = news[0].find('div', class_='article_teaser_body').text
print(first_headline)
print(intro)

Mars 2020 Rover Gets a Super Instrument
With its rock-zapping laser, the SuperCam will enable the science team to identify the chemical and mineral makeup of its targets on the Red Planet.


In [4]:
# go to jpl url, create html object in soup

jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(jpl_url)
browser.is_element_present_by_id("full_image", 1)
jpl_html = browser.html

soup = bs(jpl_html, 'lxml')

In [5]:
# get featured image

img_src = soup.find(id='full_image')['data-fancybox-href']
img_name = img_src.split('/')[-1].split('_')[0]
featured_image_url = 'https://www.jpl.nasa.gov/spaceimages/images/largesize/' + img_name + '_hires.jpg'
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA20063_hires.jpg


In [6]:
#repeat with twitter

tweet_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(tweet_url)
tweet_html = browser.html

tweet_soup = bs(tweet_html, 'lxml')

In [12]:
#mars weather
tweets_MarsWxReport = tweet_soup.find_all('div', attrs={'data-screen-name': 'MarsWxReport'})

for tweet in tweets_MarsWxReport:
     report = tweet.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text')
mars_weather = report.text


print(mars_weather)

InSight sol 186 (2019-06-05) low -101.7ºC (-151.0ºF) high -21.8ºC (-7.2ºF)
winds from the SSE at 4.6 m/s (10.3 mph) gusting to 16.2 m/s (36.3 mph)
pressure at 7.60 hPa


In [13]:
#scrape mars facts

facts_url = 'https://space-facts.com/mars/'
tables = pd.read_html(facts_url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [14]:

df = tables[0]
df.columns = ['description', 'value']
df

Unnamed: 0,description,value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [15]:

df.set_index('description', inplace=True)
df

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [16]:
mars_facts = df.to_html()

# Strip 
mars_facts.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>value</th>    </tr>    <tr>      <th>description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [17]:
usgs_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(usgs_url)
soup = bs(browser.html, 'html.parser')
hemispheres = soup.select('div.item')


hemisphere_image_urls = []

for h in hemispheres:
    title = (h.find('h3').text).replace(' Enhanced', '')
    browser.click_link_by_partial_text(title)
    soup = bs(browser.html, 'html.parser')
    full = soup.find('a', text='Sample')
    img_url = full['href']
    hemisphere_image_urls.append({'title': title, 'img_url': img_url})
    browser.back()



In [18]:
# Close browser
browser.quit()

In [19]:
mars_dict = {}
mars_dict['first_headline'] = first_headline
mars_dict['intro'] = intro
mars_dict['featured_image_url'] = featured_image_url
mars_dict['mars_weather'] = mars_weather
mars_dict['mars_facts'] = mars_facts
mars_dict['hemisphere_image_urls'] = hemisphere_image_urls

mars_dict


{'first_headline': 'Mars 2020 Rover Gets a Super Instrument',
 'intro': 'With its rock-zapping laser, the SuperCam will enable the science team to identify the chemical and mineral makeup of its targets on the Red Planet.',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA20063_hires.jpg',
 'mars_weather': 'InSight sol 186 (2019-06-05) low -101.7ºC (-151.0ºF) high -21.8ºC (-7.2ºF)\nwinds from the SSE at 4.6 m/s (10.3 mph) gusting to 16.2 m/s (36.3 mph)\npressure at 7.60 hPa',
 'mars_facts': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>value</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </t