In [1]:
# Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import requests
import pymongo
import os
import time 
import pandas as pd

In [2]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 93.0.4577
Get LATEST driver version for 93.0.4577
Driver [C:\Users\fhelm\.wdm\drivers\chromedriver\win32\93.0.4577.63\chromedriver.exe] found in cache


### Nasa Mars News

In [3]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [4]:
# Define database and collection
db = client.commerce_db
collection = db.items

In [5]:
# URL of page to be scraped
url = 'https://redplanetscience.com/'
browser.visit(url)
time.sleep(2)



# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(browser.html, 'lxml')

In [6]:
#Find the columns that contain the title and paragraph texts
results = soup.find_all('div', class_='list_text')
# print(results)

In [7]:
# Loop through returned results
for result in results:
    
    # Retrieve the thread title
    title = result.find('div', class_='content_title')
    
    
    # Access the thread's text content
    title_text = title.text
   

    try:
        # Access the thread with CSS selectors
        thread = result.find('div', class_='article_teaser_body') 
        

        # The number of comments made in the thread
        teaser_body = thread.text.lstrip()
        
        # Run teaser_body & title 
        if (teaser_body):
            print('\n-----------------\n')
            print(title_text)
            print('Body:')
            print(teaser_body)
    except AttributeError as e:
        print(e)


-----------------

A New Video Captures the Science of NASA's Perseverance Mars Rover
Body:
With a targeted launch date of July 30, the next robotic scientist NASA is sending to the to the Red Planet has big ambitions.

-----------------

InSight's 'Mole' Team Peers into the Pit
Body:
Efforts to save the heat probe continue.

-----------------

NASA to Hold Mars 2020 Perseverance Rover Launch Briefing
Body:
Learn more about the agency's next Red Planet mission during a live event on June 17.

-----------------

NASA-JPL Names 'Rolling Stones Rock' on Mars
Body:
NASA's Mars InSight mission honored one of the biggest bands of all time at Pasadena concert.

-----------------

Three New Views of Mars' Moon Phobos
Body:
Taken with the infrared camera aboard NASA's Odyssey orbiter, they reveal temperature variations on the small moon as it drifts into and out of Mars’ shadow.

-----------------

NASA's InSight 'Hears' Peculiar Sounds on Mars
Body:
Listen to the marsquakes and other, less-ex

### JPL Mars Space Images - Featured Image

In [8]:
# website: https://spaceimages-mars.com/
# Use splinter to navigate the site and find the image url for the current Featured 
# Mars Image and assign the url string to a variable called featured_image_url

url = "https://spaceimages-mars.com/"
browser.visit(url)
time.sleep(2)

In [9]:
soup = BeautifulSoup(browser.html, 'lxml')

html = browser.html
img_soup = soup(html, 'html.parser')

In [10]:
header = soup.find_all('div', class_='header')

In [11]:
#Find the row that contain the picture url
header_image = soup.find('img', class_='headerimage fade-in').get('src')
header_image

'image/featured/mars3.jpg'

In [12]:
featured_image_url = url + header_image
print(featured_image_url)

https://spaceimages-mars.com/image/featured/mars3.jpg


### Mars Facts

In [13]:
# url https://galaxyfacts-mars.com/
# Give url variable name
url = 'https://galaxyfacts-mars.com/'

In [14]:
# Create table and merge into a df
tables = pd.read_html(url)
df = tables[0]
df.columns = ["Description","Plant 1","Plant 2"]
df.head()

Unnamed: 0,Description,Plant 1,Plant 2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"


In [15]:
# Cover df into html 
html_table = df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Description</th>\n      <th>Plant 1</th>\n      <th>Plant 2</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Mars - Earth Comparison</td>\n      <td>Mars</td>\n      <td>Earth</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Diameter:</td>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Distance from Sun:</td>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Length of Year:</td>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Temperature:</td>\n  

In [17]:
# Clean up
clean_html = html_table.replace('\n', '')
clean_html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Description</th>      <th>Plant 1</th>      <th>Plant 2</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Mars - Earth Comparison</td>      <td>Mars</td>      <td>Earth</td>    </tr>    <tr>      <th>1</th>      <td>Diameter:</td>      <td>6,779 km</td>      <td>12,742 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg</td>      <td>5.97 × 10^24 kg</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2</td>      <td>1</td>    </tr>    <tr>      <th>4</th>      <td>Distance from Sun:</td>      <td>227,943,824 km</td>      <td>149,598,262 km</td>    </tr>    <tr>      <th>5</th>      <td>Length of Year:</td>      <td>687 Earth days</td>      <td>365.24 days</td>    </tr>    <tr>      <th>6</th>      <td>Temperature:</td>      <td>-87 to -5 °C</td>      <td>-88 to 58°C</td>    </tr>  </tbody></table>'

In [19]:
# Save file as html 
df.to_html('mars_table.html')