### Part  1: Scrape Mars News

In [77]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import requests
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager
import json
import pymongo
#from pymongo import MongoClient

In [78]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [79]:
# Mars News Site to Scrape
url = 'https://redplanetscience.com/'
browser.visit(url)

In [80]:
# Return all the HTML on page
html = browser.html

# Create a Beautiful Soup object, pass in our HTML, and call 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

In [81]:
# Scrape titles and teaser body text
#content_titles = soup.find_all('div', class_='content_title')
#previews = soup.find_all('div', class_='article_teaser_body')

In [82]:

elements = soup.find_all('div', class_='list_text')
elements

[<div class="list_text">
 <div class="list_date">September 26, 2022</div>
 <div class="content_title">Mars 2020 Unwrapped and Ready for More Testing</div>
 <div class="article_teaser_body">In time-lapse video, bunny-suited engineers remove the inner layer of protective foil on NASA's Mars 2020 rover after it was relocated for testing.</div>
 </div>,
 <div class="list_text">
 <div class="list_date">September 26, 2022</div>
 <div class="content_title">6 Things to Know About NASA's Ingenuity Mars Helicopter</div>
 <div class="article_teaser_body">The first helicopter attempting to fly on another planet is a marvel of engineering. Get up to speed with these key facts about its plans.</div>
 </div>,
 <div class="list_text">
 <div class="list_date">September 26, 2022</div>
 <div class="content_title">NASA's Mars 2020 Rover Goes Coast-to-Coast to Prep for Launch</div>
 <div class="article_teaser_body">The agency's first step in returning rocks from Mars just arrived at Kennedy Space Center. T

In [83]:
scraped_data = []

for x in elements:
    try:
        data = {}
        data['content_titles'] = x.find('div', class_='content_title').get_text()
        data['previews'] = x.find('div', class_='article_teaser_body').get_text()
        scraped_data.append(data)
 
        browser.links.find_by_partial_text('More').click()
 
    except Exception as E:
        print(E)   

scraped_data

[{'content_titles': 'Mars 2020 Unwrapped and Ready for More Testing',
  'previews': "In time-lapse video, bunny-suited engineers remove the inner layer of protective foil on NASA's Mars 2020 rover after it was relocated for testing."},
 {'content_titles': "6 Things to Know About NASA's Ingenuity Mars Helicopter",
  'previews': 'The first helicopter attempting to fly on another planet is a marvel of engineering. Get up to speed with these key facts about its plans.'},
 {'content_titles': "NASA's Mars 2020 Rover Goes Coast-to-Coast to Prep for Launch",
  'previews': "The agency's first step in returning rocks from Mars just arrived at Kennedy Space Center. The Mars 2020 team now begins readying for a launch to the Red Planet this July."},
 {'content_titles': 'NASA Adds Return Sample Scientists to Mars 2020 Leadership Team',
  'previews': 'The leadership council for Mars 2020 science added two new members who represent the interests of scientists destined to handle and study the first sam

In [84]:
browser.quit()

##### Optional Activities

In [85]:
with open ('scraped_mars_news.json', 'w') as file:
    json.dump(scraped_data, file)

In [86]:
# Initialise PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Declare the mars news database
mars_news_db = client["mars_news_db"]
mars_news_collection = mars_news_db["news"]

In [87]:
for data1 in scraped_data:
    mars_news_collection.insert_one(data1)

In [88]:
results = mars_news_collection.find()
for result in results:
    print(result)

{'_id': ObjectId('6331b4fe8b5757034f12d774'), 'content_titles': 'Mars 2020 Unwrapped and Ready for More Testing', 'previews': "In time-lapse video, bunny-suited engineers remove the inner layer of protective foil on NASA's Mars 2020 rover after it was relocated for testing."}
{'_id': ObjectId('6331b4fe8b5757034f12d775'), 'content_titles': "A Year of Surprising Science From NASA's InSight Mars Mission", 'previews': "A batch of new papers summarizes the lander's findings above and below the surface of the Red Planet."}
{'_id': ObjectId('6331b4fe8b5757034f12d776'), 'content_titles': 'NASA Updates Mars 2020 Mission Environmental Review', 'previews': 'NASA and the Department of Energy have completed a more detailed risk analysis for the Mars 2020 rover launch from Florida.'}
{'_id': ObjectId('6331b4fe8b5757034f12d777'), 'content_titles': "NASA's Mars Perseverance Rover Gets Its Sample Handling System", 'previews': 'The system will be collecting and storing Martian rock and soil. Its install