In [1]:
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
import time

In [2]:
def init_browser():
    # @NOTE: Replace the path with your actual path to the chromedriver
    executable_path = {"executable_path": "chromedriver.exe"}
    return Browser("chrome", **executable_path, headless=False)

In [3]:
def scrape_news():
    browser = init_browser()
    url="https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
    browser.visit(url)
    time.sleep(1)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")

    headlines= soup.find_all("li", class_="slide")
    news_title=headlines[0].find("h3").text
    news_p=headlines[0].a.text
    results = {"title":news_title,"p":news_p}
   
    # Close the browser after scraping
    browser.quit()

    return results

In [4]:
def scrape_image():
    browser = init_browser()
    url="https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(url)
    time.sleep(1)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")

    results = soup.find_all("article", class_="carousel_item")
    image_url=results[0].find("a")["data-fancybox-href"]
    featured_image_url = "https://www.jpl.nasa.gov"+ image_url
    
    # Close the browser after scraping
    browser.quit()

    return featured_image_url

In [5]:
def scrape_tweets():
    browser = init_browser()
    url="https://twitter.com/marswxreport?lang=en"
    browser.visit(url)
    time.sleep(1)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")

    results = soup.find_all(class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text")
    mars_weather = results[0].text
    
    # Close the browser after scraping
    browser.quit()

    return mars_weather


In [6]:
def scrape_facts():
    browser = init_browser()
    url="https://space-facts.com/mars/"
    browser.visit(url)
    time.sleep(1)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")

    results=soup.find_all("table", class_="tablepress tablepress-id-p-mars")
    result = str(results[0])
    with open("facts_table.html", "w") as file:
        file.write(str(result))
    #Close the browser after scraping
    browser.quit()
    return result

In [7]:
def scrape_hemispheres():
    browser = init_browser()
    url="https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(url)
    time.sleep(1)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    results=soup.find_all("h3")
    #results:
    #[<h3>Cerberus Hemisphere Enhanced</h3>,
     #<h3>Schiaparelli Hemisphere Enhanced</h3>,
     #<h3>Syrtis Major Hemisphere Enhanced</h3>,
     #<h3>Valles Marineris Hemisphere Enhanced</h3>]
    
    hemisphere_image_urls=[]
    
    #Get the link of the image in each name
    for result in results:
        title=result.text
        
        #name =text_str.split(' Hemisphere ')[0].lower().replace(" ","_")
        #enhanced =text_str.split(' Hemisphere ')[1].lower()
        #link_name = name+"_"+ enhanced
        
        #Click the according links to get the image's link
        url="https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
        browser.visit(url)
        time.sleep(1)
        
        browser.click_link_by_partial_text(title)
        
        html = browser.html
        soup = BeautifulSoup(html, "html.parser")
        
        #Get the relative link in the webpage
        link = soup.find("img", class_="wide-image")
        #Create a complete link of the image
        img_url = "https://astrogeology.usgs.gov" + link["src"]
        
        info_dict = {"title":title,"img_url":img_url}
        hemisphere_image_urls.append(info_dict)
        
    #Close the browser after scraping
    browser.quit()
    return hemisphere_image_urls

In [8]:
from flask_pymongo import PyMongo
from flask import Flask, render_template, redirect

In [9]:
app = Flask(__name__)

In [10]:
app.config["MONGO_URI"] = "mongodb://localhost:27017/mars_db"
mongo = PyMongo(app)

In [11]:
mars_info = mongo.db.mars_info

In [12]:
news = scrape_news()
image = scrape_image()
tweets = scrape_tweets()
facts = scrape_facts()
hemispheres = scrape_hemispheres()
update_data ={'news':news,'image':image,'tweets':tweets,'hemispheres':hemispheres}

In [344]:
facts = scrape_facts()

In [13]:
facts

'<table class="tablepress tablepress-id-p-mars" id="tablepress-p-mars"><tbody><tr class="row-1 odd"><td class="column-1"><strong>Equatorial Diameter:</strong></td><td class="column-2">6,792 km<br/></td></tr><tr class="row-2 even"><td class="column-1"><strong>Polar Diameter:</strong></td><td class="column-2">6,752 km<br/></td></tr><tr class="row-3 odd"><td class="column-1"><strong>Mass:</strong></td><td class="column-2">6.39 × 10^23 kg<br/> (0.11 Earths)</td></tr><tr class="row-4 even"><td class="column-1"><strong>Moons:</strong></td><td class="column-2">2 (<a href="https://space-facts.com/moons/phobos/">Phobos</a> &amp; <a href="https://space-facts.com/moons/deimos/">Deimos</a>)</td></tr><tr class="row-5 odd"><td class="column-1"><strong>Orbit Distance:</strong></td><td class="column-2">227,943,824 km<br/> (1.38 AU)</td></tr><tr class="row-6 even"><td class="column-1"><strong>Orbit Period:</strong></td><td class="column-2">687 days (1.9 years)<br/></td></tr><tr class="row-7 odd"><td cl

In [14]:
update_data ={'news':news,'image':image,'tweets':tweets,'facts':facts,'hemispheres':hemispheres}

In [15]:
update_data

{'news': {'title': 'NASA Invites Students to Name Mars 2020 Rover',
  'p': "Through Nov. 1, K-12 students in the U.S. are encouraged to enter an essay contest to name NASA's next Mars rover.NASA Invites Students to Name Mars 2020 Rover"},
 'image': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA14400_ip.jpg',
 'tweets': 'We won’t be hearing from @MarsCuriosity or @NASAInSight for the next 2 weeks during Mars solar conjunction. Read more about why Mars missions go silent every 2 years: https://www.wral.com/mars-spacecraft-go-quiet-during-solar-conjunction/18595551/\xa0…pic.twitter.com/fWruE2v151',
 'facts': '<table class="tablepress tablepress-id-p-mars" id="tablepress-p-mars"><tbody><tr class="row-1 odd"><td class="column-1"><strong>Equatorial Diameter:</strong></td><td class="column-2">6,792 km<br/></td></tr><tr class="row-2 even"><td class="column-1"><strong>Polar Diameter:</strong></td><td class="column-2">6,752 km<br/></td></tr><tr class="row-3 odd"><td class="column-1"

In [29]:
update_data

{'news': {'title': 'NASA Invites Students to Name Mars 2020 Rover',
  'p': "Through Nov. 1, K-12 students in the U.S. are encouraged to enter an essay contest to name NASA's next Mars rover.NASA Invites Students to Name Mars 2020 Rover"},
 'image': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA14400_ip.jpg',
 'tweets': 'We won’t be hearing from @MarsCuriosity or @NASAInSight for the next 2 weeks during Mars solar conjunction. Read more about why Mars missions go silent every 2 years: https://www.wral.com/mars-spacecraft-go-quiet-during-solar-conjunction/18595551/\xa0…pic.twitter.com/fWruE2v151',
 'facts': '<table class="tablepress tablepress-id-p-mars" id="tablepress-p-mars"><tbody><tr class="row-1 odd"><td class="column-1"><strong>Equatorial Diameter:</strong></td><td class="column-2">6,792 km<br/></td></tr><tr class="row-2 even"><td class="column-1"><strong>Polar Diameter:</strong></td><td class="column-2">6,752 km<br/></td></tr><tr class="row-3 odd"><td class="column-1"

In [37]:
update_data["hemispheres"][0]["title"]

'Cerberus Hemisphere Enhanced'

In [348]:
mars_info.update({},update_data, upsert=True)

  """Entry point for launching an IPython kernel.


{'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}

In [276]:
mars_info.update({}, update_data, upsert=True)

  """Entry point for launching an IPython kernel.


{'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}