In [1]:
#import dependencies

from bs4 import BeautifulSoup as bs
import requests
import pymongo
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
from flask import Flask, render_template, redirect
from flask_pymongo import PyMongo
import pandas as pd


In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

## Step 1 - Scraping

### NASA Mars News

Scrape the [Mars News Site](https://redplanetscience.com/) and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [3]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# URLs of pages to be scraped
url = 'https://redplanetscience.com/'

browser.visit(url)
html=browser.html
soup=bs(html,'html.parser')



[WDM] - Current google-chrome version is 89.0.4389
[WDM] - Get LATEST driver version for 89.0.4389






[WDM] - Driver [/Users/AlexGoodman/.wdm/drivers/chromedriver/mac64/89.0.4389.23/chromedriver] found in cache


In [4]:
#Scrap site to collect news title 
news_title=soup.find_all('div', class_='content_title')[0].text

print(news_title)

NASA's Mars 2020 Rover Completes Its First Drive


In [5]:
#Scrap site to collect paragraph text
news_p = soup.find_all('div', class_='article_teaser_body')[0].text

print(news_p)

In a 10-plus-hour marathon, the rover steered, turned and drove in 3-foot (1-meter) increments over small ramps.


### JPL Mars Space Images - Featured Image

* Visit the url for the Featured Space Image site [here](https://spaceimages-mars.com).

* Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called `featured_image_url`.

* Make sure to find the image url to the full size `.jpg` image.

* Make sure to save a complete url string for this image.

In [6]:
#Set up splinter path for image
img_url = 'https://spaceimages-mars.com'

browser.visit(img_url)
html=browser.html
img_soup=bs(html,'html.parser')

In [7]:
featured_image_url=img_soup.find('div', class_='header')


In [8]:
featured_image_url=featured_image_url.find('img', class_='headerimage fade-in')
print(featured_image_url.prettify())

<img class="headerimage fade-in" src="image/featured/mars1.jpg"/>



In [9]:
src = featured_image_url
src['src']

'image/featured/mars1.jpg'

In [10]:
full_url = img_url+'/'+src['src']
full_url

'https://spaceimages-mars.com/image/featured/mars1.jpg'

### Mars Facts

* Visit the Mars Facts webpage [here](https://galaxyfacts-mars.com) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

* Use Pandas to convert the data to a HTML table string.

In [11]:
facts_url = 'https://galaxyfacts-mars.com'

In [12]:
tables = pd.read_html(facts_url)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [13]:
df=pd.DataFrame(tables[0])
df

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [14]:
df2=pd.DataFrame(tables[1])
df2

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [15]:
html_facts=df.to_html()
html_facts

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n      <th>2</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Mars - Earth Comparison</td>\n      <td>Mars</td>\n      <td>Earth</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Diameter:</td>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Distance from Sun:</td>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Length of Year:</td>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Temperature:</td>\n      <td>-87 to -5 °C</

In [16]:
html_facts2=df2.to_html()
html_facts2

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 ( Phobos &amp; Deimos )</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n   

### Mars Hemispheres

* Visit the astrogeology site [here](https://marshemispheres.com/) to obtain high resolution images for each of Mar's hemispheres.

* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.

* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [17]:
hemi_url = 'https://marshemispheres.com/'

In [18]:
browser.visit(hemi_url)
html=browser.html
hemi_soup=bs(html,'html.parser')

### Find Cerberus Hemisphere Image URL

################################################

In [19]:
cerberus_url = hemi_soup.find('a', class_='itemLink product-item')
cerberus_url['href']

'cerberus.html'

In [20]:
cerberus_link = hemi_url+cerberus_url['href']
cerberus_link

'https://marshemispheres.com/cerberus.html'

In [21]:
browser.visit(cerberus_link)
html=browser.html
cerb_soup=bs(html,'html.parser')

In [22]:
cerberus = cerb_soup.find('img', class_='wide-image')
cerberus['src']

'images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'

In [23]:
full_cerberus = hemi_url+cerberus['src']
full_cerberus

'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'

### Find Schiaparelli Hemisphere Image URL

################################################

In [24]:
schia_url = hemi_soup.find_all('a', class_='itemLink product-item')
schia_url[2]['href']

'schiaparelli.html'

In [25]:
schia_link = hemi_url+schia_url[2]['href']
schia_link

'https://marshemispheres.com/schiaparelli.html'

In [26]:
browser.visit(schia_link)
html=browser.html
schia_soup=bs(html,'html.parser')

In [27]:
schiaparelli = schia_soup.find('img', class_='wide-image')
schiaparelli['src']

'images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'

In [28]:
full_schiaparelli = hemi_url+schiaparelli['src']
full_schiaparelli

'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'

### Find Syrtis Hemisphere Image URL

################################################

In [29]:
syrtis_url = hemi_soup.find_all('a', class_='itemLink product-item')
syrtis_url[4]['href']

'syrtis.html'

In [30]:
syrtis_link = hemi_url+syrtis_url[4]['href']
syrtis_link

'https://marshemispheres.com/syrtis.html'

In [31]:
browser.visit(syrtis_link)
html=browser.html
syrtis_soup=bs(html,'html.parser')

In [32]:
syrtis = syrtis_soup.find('img', class_='wide-image')
syrtis['src']

'images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'

In [33]:
full_syrtis = hemi_url+syrtis['src']
full_syrtis

'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'

### Find Valles Hemisphere Image URL

################################################

In [34]:
valles_url = hemi_soup.find_all('a', class_='itemLink product-item')
valles_url[4]['href']

'syrtis.html'

In [35]:
valles_link = hemi_url+valles_url[6]['href']
valles_link

'https://marshemispheres.com/valles.html'

In [36]:
browser.visit(valles_link)
html=browser.html
valles_soup=bs(html,'html.parser')

In [37]:
valles = valles_soup.find('img', class_='wide-image')
valles['src']

'images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'

In [38]:
full_valles = hemi_url+valles['src']
full_valles

'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'

## Hemisphere Dictionary

In [39]:
hemisphere_image_urls = [
    {"title": "Valles Marineris Hemisphere", "img_url": full_valles},
    {"title": "Cerberus Hemisphere", "img_url": full_cerberus},
    {"title": "Schiaparelli Hemisphere", "img_url": full_schiaparelli},
    {"title": "Syrtis Major Hemisphere", "img_url": full_syrtis}
]