# Mission to Mars

In [1]:
# Import dependencies
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as bs
from webdriver_manager.chrome import ChromeDriverManager
# from flask import Flask, render_template, redirect
# from flask_pymongo import PyMongo

In [2]:
# Set up Splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [C:\Users\alever\.wdm\drivers\chromedriver\win32\91.0.4472.19\chromedriver.exe] found in cache


### NASA Mars News

In [3]:
# Visit redplanetscience.com
url = "https://redplanetscience.com/"
browser.visit(url)

# Scrape page into Soup
html = browser.html
soup = bs(html, "html.parser")

In [4]:
# Get the news title
news_title = soup.find_all('div', class_='content_title')[0].text

# Get the paragraph text
news_p = soup.find_all('div', class_='article_teaser_body')[0].text

# Print news title and paragraph text
print(f'Title:- {news_title}')
print(f'Paragraph:- {news_p}')

Title:- NASA's Mars 2020 Rover Closer to Getting Its Name
Paragraph:- 155 students from across the U.S. have been chosen as semifinalists in NASA's essay contest to name the Mars 2020 rover, and see it launch from Cape Canaveral this July.


### JPL Mars Space Images - Featured Image

In [5]:
# Visit spaceimages-mars.com
url = "https://spaceimages-mars.com/"
browser.visit(url)

# Scrape page into Soup
html = browser.html
soup = bs(html, "html.parser")

In [6]:
# Find the url for the featured image
featured_image = soup.find_all('img')[1]["src"]
featured_image_url = url + featured_image

# Print featured image url
print(featured_image_url)

https://spaceimages-mars.com/image/featured/mars2.jpg


### Mars Facts

In [7]:
# Set url to scrape
url = 'https://galaxyfacts-mars.com/'
browser.visit(url)

# Read url's html
tables = pd.read_html(url)

# Show choice of tables to scrape
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [8]:
# Select required table and set into a dataframe using indexing
mars_df = tables[0]

# Show selected dataframe
mars_df

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [9]:
# Set column names
mars_df.columns = ['Description', 'Mars', 'Earth']

# Show selected dataframe
mars_df

Unnamed: 0,Description,Mars,Earth
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [10]:
# Drop top row
mars_df = mars_df.drop([0])
mars_df

Unnamed: 0,Description,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [11]:
# Set index
mars_df = mars_df.set_index('Description')
mars_df

Unnamed: 0_level_0,Mars,Earth
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [12]:
# Convert dataframe data to html data string
mars_table = mars_df.to_html()
mars_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>\n</table>'

In [13]:
# Clean up html data string by removing unwanted new lines (\n)
mars_table.replace('\n', '')
print(mars_table)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Mars</th>
      <th>Earth</th>
    </tr>
    <tr>
      <th>Description</th>
      <th></th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Diameter:</th>
      <td>6,779 km</td>
      <td>12,742 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg</td>
      <td>5.97 × 10^24 kg</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2</td>
      <td>1</td>
    </tr>
    <tr>
      <th>Distance from Sun:</th>
      <td>227,943,824 km</td>
      <td>149,598,262 km</td>
    </tr>
    <tr>
      <th>Length of Year:</th>
      <td>687 Earth days</td>
      <td>365.24 days</td>
    </tr>
    <tr>
      <th>Temperature:</th>
      <td>-87 to -5 °C</td>
      <td>-88 to 58°C</td>
    </tr>
  </tbody>
</table>


In [21]:
# Save the datafram directly to a file in html format
mars_df.to_html('table.html')

 ### Mars Hemispheres

In [15]:
# Visit spaceimages-mars.com
url = "https://marshemispheres.com/"
browser.visit(url)

# Scrape page into Soup
html = browser.html
soup = bs(html, "html.parser")

In [16]:
# Find hemisphere items to extract from html
mars_hemisphere = soup.find('div', class_ = 'collapsible results')
mars_items = mars_hemisphere.find_all('div', class_ = 'item')

# Set empty list to store hemisphere title and image URLs
hemisphere_image_urls = []

# Loop through hemisphere items to extract hemisphere title and to visit each link to obtain full image url, storing both in a dictionary and then appending each dictionary to the empty list
for item in mars_items:

    # Extract hemisphere title
    hemisphere = item.find('div', class_ = 'description')
    title = hemisphere.h3.text

    # Extract image url
    hemisphere_url = hemisphere.a["href"]
    browser.visit(url + hemisphere_url)
    html = browser.html
    soup = bs(html, "html.parser")
    img_url_path = soup.find('li').a["href"]
    img_url = url + img_url_path

    # Store the hemisphere title and image url into a dictionary
    dict = {
        "title" : title,
        "img_url" : img_url
    }

    # Append the dictionary into the previously created list
    hemisphere_image_urls.append(dict)

print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg'}]


In [17]:
# Store all scraped data in a dictionary
mars_data = {
    "news_title": news_title,
    "news_p": news_p,
    "featured_image_url": featured_image_url,
    "mars_table": mars_table,
    "hemisphere_image_urls": hemisphere_image_urls
}

# Show the scraped data
mars_data

{'news_title': "NASA's Mars 2020 Rover Closer to Getting Its Name",
 'news_p': "155 students from across the U.S. have been chosen as semifinalists in NASA's essay contest to name the Mars 2020 rover, and see it launch from Cape Canaveral this July.",
 'featured_image_url': 'https://spaceimages-mars.com/image/featured/mars2.jpg',
 'mars_table': None,
 'hemisphere_image_urls': [{'title': 'Cerberus Hemisphere Enhanced',
   'img_url': 'https://marshemispheres.com/images/full.jpg'},
  {'title': 'Schiaparelli Hemisphere Enhanced',
   'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg'},
  {'title': 'Syrtis Major Hemisphere Enhanced',
   'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg'},
  {'title': 'Valles Marineris Hemisphere Enhanced',
   'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg'}]}

In [18]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg'}]

In [20]:
print(mars_table)

None
