In [1]:
from bs4 import BeautifulSoup as bs
import lxml.html as lh
import pandas as pd
import requests
from selenium import webdriver
from splinter import Browser
import time

In [2]:
def init_browser():
    # @NOTE: Replace the path with your actual path to the chromedriver
    executable_path = {"executable_path": "/Users/cephra.stuart/Desktop/chromedriver"}
    return Browser("chrome", **executable_path, headless=False)


# NASA Mars News

In [5]:
def mars_news():
    browser = init_browser()

    # Visit mars.nasa.gov
    url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
    browser.visit(url)

    time.sleep(1)

    # Scrape page into Soup
    html = browser.html
    soup = bs(html, "html.parser")

    # Get the title of the first story
    news_title = soup.find("div", {"class": "content_title"}).get_text(strip=True)
    
    # Get the paragraph of the first story
    news_p = soup.find("div", {"class": "article_teaser_body"}).get_text(strip=True)

 
    # Store data in a dictionary
    article_data = {
        "news_title": news_title,
        "news_p": news_p,
    }

    # Close the browser after scraping
    browser.quit()

    # Return results
    return article_data

In [6]:
scrape_info()

{'news_title': "NASA InSight Lander 'Hears' Martian Winds", 'news_p': 'Vibrations picked up by two spacecraft instruments have provided the first sounds of Martian wind.'}


# JPL Mars Space Images - Featured Image

In [12]:
def mars_images():
    browser = init_browser()

    # Visit nasa mars space images
    url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(url)

    time.sleep(1)

    # Scrape page into Soup
    html = browser.html
    soup = bs(html, "html.parser")

    # Get the url of featured image
    featured_image_data = soup.find("a", {'class': 'button fancybox'})
    image_link = featured_image_data['data-fancybox-href']
    featured_image_url = f'https://www.jpl.nasa.gov'+image_link

    # Close the browser after scraping
    browser.quit()

    # Return results
    print(featured_image_url)

In [13]:
mars_images()

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17171_ip.jpg


# Mars Weather

In [15]:
def mars_weather():
    browser = init_browser()

    # Visit mars twitter page
    url = "https://twitter.com/marswxreport?lang=en"
    browser.visit(url)

    time.sleep(1)

    # Scrape page into Soup
    html = browser.html
    soup = bs(html, "html.parser")

    # Get the text of first tweet
    latest_tweet = soup.find("div", {'class': 'js-tweet-text-container'}).get_text(strip=True)

    # Close the browser after scraping
    browser.quit()

    # Return results
    print(latest_tweet)

In [16]:
mars_weather()

Sol 2251 (2018-12-05), high -15C/5F, low -74C/-101F, pressure at 8.47 hPa, daylight 06:34-18:48


# Mars Facts

In [3]:
def mars_facts():
    browser = init_browser()

    # Visit space facts
    url = "http://space-facts.com/mars/"
    browser.visit(url)

    time.sleep(1)

    # Scrape page into Soup
    html = browser.html
    soup = bs(html, "html.parser")

    # Get the tables from page
    tables = pd.read_html(url)
    df = tables[0]

    # Close the browser after scraping
    browser.quit()

    # Return results
    return df

In [4]:
fact_table = mars_facts()

In [5]:
fact_table.columns = ['Description', 'Values']
fact_table

Unnamed: 0,Description,Values
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [7]:
fact_table.set_index('Description', inplace=True)
fact_table.head()

Unnamed: 0_level_0,Values
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"


In [8]:
html_table = fact_table.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    

# Mars Hemispheres

In [36]:
def mars_hemispheres():
    browser = init_browser()

    # Visit mars hemisphere page
    url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(url)

    time.sleep(1)

    # Scrape page into Soup
    html = browser.html
    soup = bs(html, "html.parser")

    # Get the text of first tweet
    hemispheres = soup.find_all("a", {'class': 'itemLink product-item'})
    
    hemisphere_image_urls = []
    for item in hemispheres:
        link = item['href']
        title = item.get_text(strip=True)
        browser.visit(f'https://astrogeology.usgs.gov'+link)
        html = browser.html
        soup = bs(html, 'html.parser')
        img_url_1 = soup.find('img', {'class':'wide-image'})
        img_url = f'https://astrogeology.usgs.gov'+img_url_1['src']
        if title != "":
            hemisphere_image_urls.append({'title': title, 'img_url':img_url})
    
    
   # for item in hemispheres:
        #link = item['a']['href']
        #title = item['']
        #next_link = f'https://www.jpl.nasa.gov'+image_link
    # Close the browser after scraping
    browser.quit()

    # Return results
    print(hemisphere_image_urls)

In [37]:
mars_hemispheres()

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': '/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': '/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': '/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': '/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]
