# Setup

In [1]:
#dependencies
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
from flask import Flask, render_template, redirect
from flask_pymongo import PyMongo
import pymongo

In [2]:
#setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 96.0.4664
Get LATEST chromedriver version for 96.0.4664 google-chrome
Driver [C:\Users\Chrismus Time\.wdm\drivers\chromedriver\win32\96.0.4664.45\chromedriver.exe] found in cache


# Scraping Latest News

In [3]:
#url scraping
news_url = 'https://redplanetscience.com/'
browser.visit(news_url)
html = browser.html
soup = bs(html, 'html.parser')

In [4]:
# Checking scrape
print(soup.prettify())

<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <link crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta1/dist/css/bootstrap.min.css" integrity="sha384-giJF6kkoqNQ00vy+HMDP7azOuL0xtbfIcaT9wjKHr8RbDVddVHyTfAAsrekwKmP1" rel="stylesheet"/>
  <link href="css/font.css" rel="stylesheet" type="text/css"/>
  <link href="css/app.css" rel="stylesheet" type="text/css"/>
  <link crossorigin="anonymous" href="https://pro.fontawesome.com/releases/v5.10.0/css/all.css" integrity="sha384-AYmEC3Yw5cVb3ZcuHtOA93w35dYTsvhLPVnYs9eStHfGJvOvKxVfELGroGkvsg+p" rel="stylesheet"/>
  <title>
   News - Mars Exploration Program
  </title>
 </head>
 <body>
  <div class="col-md-12">
   <div class="row">
    <nav class="navbar navbar-expand-lg navbar-light fixed-top">
     <div class="container-fluid">
      <a class="navbar-brand" href="#">
       <img src="image/nasa.png" width="80"/>
       <span class="logo">
        MA

In [5]:
#retrieve latest news title paragraph
news_title = soup.find_all('div', class_ = 'content_title')[0].text
news_p = soup.find_all('div', class_ = 'article_teaser_body')[0].text
print(f"{news_title}: {news_p}")

Sensors on Mars 2020 Spacecraft Answer Long-Distance Call From Earth: Instruments tailored to collect data during the descent of NASA's next rover through the Red Planet's atmosphere have been checked in flight.


# Scraping Featured Image

In [6]:
#visting featured website
image_url = 'https://spaceimages-mars.com/'
browser.visit(image_url)
html = browser.html
soup = bs(html, 'html.parser')

In [7]:
#retrieve featured image
partial_url = soup.find('a', class_ = 'showimg fancybox-thumbs')['href']
print(partial_url)

image/featured/mars1.jpg


In [8]:
#creating final url
featured_image_url = image_url + partial_url
print(featured_image_url)

https://spaceimages-mars.com/image/featured/mars1.jpg


# Mars Facts

In [9]:
#url scraping w pandas
facts_url = 'https://galaxyfacts-mars.com/'
tables = pd.read_html(facts_url)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [10]:
#grabbing only relevant tables
df = tables[1]
df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [11]:
#saving to html
df.to_html('facts_mars.html', index = False)

# Mars Hemispheres

In [12]:
#scraping url
hemi_base_url = 'https://marshemispheres.com/'
browser.visit(hemi_base_url)
html = browser.html
soup = bs(html, 'html.parser')

In [13]:
#grabbing image containers
hemi_data = soup.find_all('div', class_ = 'item')

#empty list to store data
hemi_ls_of_dicts = []
#loop through item to pull data
for i in range(len(hemi_data)):
    #setting up new html
    html = browser.html
    soup = bs(html, 'html.parser')

    #splinter click into each link
    hemi_text = soup.find_all('h3')[i].text
    browser.click_link_by_partial_text(hemi_text)

    #finding specific url
    subhtml = browser.html
    subsoup = bs(subhtml, 'html.parser')
    hemi_partial_url = subsoup.find('div', class_ = 'downloads').find('a')['href']

    #appending info
    temp_dict = {}
    temp_dict['title'] = hemi_text
    temp_dict['img_url'] = hemi_base_url + hemi_partial_url
    hemi_ls_of_dicts.append(temp_dict)

    #back to home page
    browser.back()



In [14]:
hemi_ls_of_dicts

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg'}]

# Closing Browser

In [15]:
#close brower
browser.quit()