# Web Scraping Homework Mission to Mars

In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
import os
import requests
import pymongo
import pandas as pd
from urllib.parse import urljoin
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

In [4]:
# Initialize PyMongo
mongo = PyMongo(app, uri="mongodb://localhost:27017/mission_to_mars")

In [5]:
# Define database and collection
db = client.mars_news_db
collection = db.articles

In [6]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [/Users/orlandogarciaylopez/.wdm/drivers/chromedriver/mac64/91.0.4472.101/chromedriver] found in cache


## Step 1 Scraping

## Nasa Mars News

In [7]:
# URL opened with Chrome
news_url = 'https://redplanetscience.com'
browser.visit(news_url)

# Scrape Site into Soup
html = browser.html
soup = bs(html,'html.parser')

In [8]:
# News Title
news_title = soup.find('div', class_='content_title').text
news_title

"NASA's Curiosity Mars Rover Takes a New Selfie Before Record Climb"

In [9]:
# News Paragraph Text
news_paragraph = soup.find('div', class_='article_teaser_body').text
news_paragraph

'Along with capturing an image before its steepest ascent ever, the robotic explorer filmed its "selfie stick," or robotic arm, in action.'

## JPL Mars Space Images

In [10]:
# Featured Space Image Site
url_image = 'https://spaceimages-mars.com'
browser.visit(url_image)

In [11]:
# Scrape Site into Soup
html_image = browser.html
soup = bs(browser.html, 'html.parser')

# Find Image
featured_image = soup.find_all('img', class_='headerimage fade-in')

# Save complete url string for this image
featured_image_url = f"{url_image}/{featured_image[0]['src']}"
featured_image_url

https://spaceimages-mars.comimage/featured/mars1.jpg


## Mars Facts

In [12]:
# Mars Facts
url_facts = "https://galaxyfacts-mars.com/"

# Use Pandas to Convert the data to HTML table string

facts_table = pd.read_html(url_facts)
facts_table

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [13]:
# Facts about the planet: Diameter, Mass...
mars_df_facts = facts_table[0]
mars_df_facts

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


## Mars Hemispheres

In [14]:
# Astreology Site
url = 'https://marshemispheres.com'
browser.visit(url)

# Soup for the Site
html = browser.html
soup = bs(html, 'html.parser')

# Hemispheres
hemisphere_items = soup.find_all('div', class_='item')

In [15]:
# List Dictionary Loop

url = 'https://marshemispheres.com/'
hemisphere_images = []


for item in hemisphere_items:
    hemisphere_dict = {}
    link = item.find('a', class_='itemLink')['href']
    title = item.find('h3').text
    browser.visit(url+link)
    html = browser.html
    soup = bs(html, 'html.parser')
    downloads = soup.find('div', class_='downloads')
    img = downloads.ul.li.find('a')['href']
    hemisphere_dict["title"] = title
    hemisphere_dict["img_url"] = urljoin(url,img)
    hemisphere_images.append(hemisphere_dict)

In [16]:
hemisphere_images

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg'}]

# Mars Data

In [17]:
mars_data = {}
mars_data["news_title"] = news_title
mars_data["news_paragraph"] = news_paragraph
mars_data["featured_image"] = featured_image_url
mars_data["mars_facts"] = mars_df_facts
mars_data["mars_hemispheres"] = hemisphere_images

In [18]:
mars_data

{'News Title': "NASA's Curiosity Mars Rover Takes a New Selfie Before Record Climb",
 'News Paragraph': 'Along with capturing an image before its steepest ascent ever, the robotic explorer filmed its "selfie stick," or robotic arm, in action.',
 'Featured Image': 'https://spaceimages-mars.comimage/featured/mars1.jpg',
 'Mars Facts':                          0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
 'Mars Hemispheres': [{'title': 'Cerberus Hemisphere Enhanced',
   'img_url': 'https://marshemispheres.com/images/full.jpg'},
  {'title': 'Schiaparelli Hemisphere Enhanc

In [19]:
browser.quit()