In [172]:
from splinter import Browser
from bs4 import BeautifulSoup
import requests
import cssutils
import pandas as pd
import pymongo

In [173]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'html.parser')

In [174]:
# Print formatted version of the soup
#print(soup.prettify())

In [175]:
news_title = soup.title.text.strip()
news_title

'News  – NASA’s Mars Exploration Program'

In [176]:
# Extract the text of the body
news_p = soup.body.p.text
news_p

'Managed by the Mars Exploration Program and the Jet Propulsion Laboratory for NASA’s Science Mission Directorate'

In [177]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [178]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [179]:
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
# Retrieve all articles that contain images
articles = soup.find_all('article', class_='carousel_item')
print(articles)
print('-----------------------------------------------------------------------------------------------------------------------')
background_image = soup.find('article', class_='carousel_item')['style']
print(background_image)

[<article alt="Curiosity Self-Portrait at Martian Sand Dune" class="carousel_item" style="background-image: url('/spaceimages/images/wallpaper/PIA20316-1920x1200.jpg');">
<div class="default floating_text_area ms-layer">
<h2 class="category_title">
</h2>
<h2 class="brand_title">
				  FEATURED IMAGE
				</h2>
<h1 class="media_feature_title">
				  Curiosity Self-Portrait at Martian Sand Dune				</h1>
<div class="description">
</div>
<footer>
<a class="button fancybox" data-description="This self-portrait of NASA's Curiosity Mars rover shows the vehicle at 'Namib Dune,' where the rover's activities included scuffing into the dune with a wheel and scooping samples of sand for laboratory analysis." data-fancybox-group="images" data-fancybox-href="/spaceimages/images/mediumsize/PIA20316_ip.jpg" data-link="/spaceimages/details.php?id=PIA20316" data-title="Curiosity Self-Portrait at Martian Sand Dune" id="full_image">
					FULL IMAGE
				  </a>
</footer>
</div>
<div class="gradient_container

In [180]:
style = cssutils.parseStyle(background_image)
url = style['background-image']

In [181]:
print(url)

url(/spaceimages/images/wallpaper/PIA20316-1920x1200.jpg)


In [182]:
image_url = url.replace('url(', '').replace(')', '')

In [183]:
print(image_url)

/spaceimages/images/wallpaper/PIA20316-1920x1200.jpg


In [184]:
featured_image_url = 'https://www.jpl.nasa.gov' + image_url
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA20316-1920x1200.jpg


In [185]:
# URL of page to be scraped
twitter_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(twitter_url)
html = browser.html
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(html, 'html.parser')

In [186]:
news_title = soup.title.text.strip()
news_title

'Mars Weather (@MarsWxReport) | Twitter'

In [187]:
mars_weather = soup.find_all('p')[4].text
print(mars_weather)

InSight sol 155 (2019-05-04) low -99.3ºC (-146.8ºF) high -18.8ºC (-1.8ºF)
winds from the SW at 4.5 m/s (10.1 mph) gusting to 14.3 m/s (31.9 mph)
pressure at 7.40 hPapic.twitter.com/wEcSHS2b3u


In [188]:
#Visit the Mars Facts webpage https://space-facts.com/mars/ and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
#Use Pandas to convert the data to a HTML table string.
tables = pd.read_html('https://space-facts.com/mars/')
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [189]:
df = tables[0]
df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [190]:
df.columns = ['Fact', 'Value']
df

Unnamed: 0,Fact,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [191]:
html = df.to_html()
html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Fact</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td

In [192]:
table_facts = df.to_dict
table_facts

<bound method DataFrame.to_dict of                    Fact                          Value
0  Equatorial Diameter:                       6,792 km
1       Polar Diameter:                       6,752 km
2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
3                Moons:            2 (Phobos & Deimos)
4       Orbit Distance:       227,943,824 km (1.52 AU)
5         Orbit Period:           687 days (1.9 years)
6  Surface Temperature:                  -153 to 20 °C
7         First Record:              2nd millennium BC
8          Recorded By:           Egyptian astronomers>

In [193]:
type(table_facts)

method

In [194]:
   hemisphere_image_urls = [
    {"title": "Cerberus Hemisphere", "img_url": "http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif"},
    {"title": "Schiaparelli Hemisphere", "img_url": "http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif"},
    {"title": "Schiaparelli Major Hemisphere", "img_url": "http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif"},
    {"title": "Valles Marineris Hemisphere", "img_url": "http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif"},
    ]

In [195]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'},
 {'title': 'Schiaparelli Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}]

In [196]:
# Create connection variable
conn = 'mongodb://localhost:27017'
# Pass connection to the pymongo instance.
client = pymongo.MongoClient(conn)

# Connect to a database. Will create one if not already available.
db = client.scrape_db

In [197]:
# Retrieve the entire collection in lists
listings = list(db.nasalistings.find())
print(listings)

[{'_id': ObjectId('5ccf71f6a759b33d7c7528d2'), 'title': 'Cerberus Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'_id': ObjectId('5ccf71f6a759b33d7c7528d3'), 'title': 'Schiaparelli Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'_id': ObjectId('5ccf71f6a759b33d7c7528d4'), 'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'_id': ObjectId('5ccf71f6a759b33d7c7528d5'), 'title': 'Valles Marineris Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}, {'_id': ObjectId('5ccf71f6a759b33d7c7528d6'), 'news_title': 'Mars Weather (@MarsWxReport) | Twitter'}, {'_id': ObjectId('5ccf71f6a759b33d7c7528d7'), 'news_p': 'Managed by the Mars Exploration Program and the Jet Propul

In [198]:
tablelist = db.tablelisting.find()
tablelist

<pymongo.cursor.Cursor at 0x1c333644470>

In [199]:
table_listings = list(db.tablelisting.find())
print(table_listings)

[{'_id': ObjectId('5ccf71f6a759b33d7c7528da'), 'Equatorial Diameter:': '6,792 km', 'Polar Diameter:': '6,752 km', 'Mass:': '6.42 x 10^23 kg (10.7% Earth)', 'Moons:': '2 (Phobos & Deimos)', 'Orbit Distance:': '227,943,824 km (1.52 AU)', 'Orbit Period:': '687 days (1.9 years)', 'Surface Temperature:': '-153 to 20 °C', 'First Record:': '2nd millennium BC', 'Recorded By:': 'Egyptian astronomers'}]


In [201]:
type(table_listings)

list

In [202]:
for col in table_listings:
    print(col)

{'_id': ObjectId('5ccf71f6a759b33d7c7528da'), 'Equatorial Diameter:': '6,792 km', 'Polar Diameter:': '6,752 km', 'Mass:': '6.42 x 10^23 kg (10.7% Earth)', 'Moons:': '2 (Phobos & Deimos)', 'Orbit Distance:': '227,943,824 km (1.52 AU)', 'Orbit Period:': '687 days (1.9 years)', 'Surface Temperature:': '-153 to 20 °C', 'First Record:': '2nd millennium BC', 'Recorded By:': 'Egyptian astronomers'}
