## Scrape Nasa Mars News Site

In [1]:
#import dependencies 
from splinter import Browser
from bs4 import BeautifulSoup as bs
import time 
import pandas as pd
import re
import time

In [2]:
def init_browser():
    executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
    return Browser('chrome', **executable_path, headless=False)

In [3]:
def scrape_info():
    browser = init_browser()
    
    url = 'https://mars.nasa.gov/news/'
    jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

    browser.visit(url)
    
    time.sleep(1)
    
    #scrape page into Soup 
    html = browser.html
    soup = bs(html, 'html.parser')
    
    #get the news title 
    news_title = soup.find('div', class_='list_text').a.text
    
    #get the news paragraphs
    news_p = soup.find('div', class_='article_teaser_body').text
        
    #visit jpl url 
    browser.visit(jpl_url)
    html = browser.html
    soup = bs(html, 'html.parser')
    
    #get latest image - hi-res 
    jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    browser.visit(jpl_url)

    browser.click_link_by_partial_text('FULL IMAGE')
    browser.click_link_by_partial_text('more info')

    html = browser.html
    soup = bs(html, 'html.parser')

    image = soup.find('figure', class_='lede')
    image_url = image.a['href']
    featured_image_url = f'https://jpl.nasa.gov{image_url}'
    
    #get latest tweet 
    twitter_url = 'https://twitter.com/marswxreport?lang=en'
    browser.visit(twitter_url)

    time.sleep(2)

    pattern = re.compile('InSight')

    html = browser.html
    soup = bs(html, 'html.parser')

    mars_weather = soup.find('span', text=pattern).text
    
    #table 
    facts_url = 'https://space-facts.com/mars/'
    table = pd.read_html(facts_url)
    df = table[0]
    df.columns = ['Description','Values']
    df.set_index('Description', inplace=True)
    df_html = df.to_html()
    
    #Mars Hemispheres 
    hemisphere_image_url = []
    hemisphere_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
    browser.visit(hemisphere_url)

    html = browser.html
    soup = bs(html, 'html.parser')

    #get the title 
    info = soup.find_all('div', class_='item')

    for hemisphere in info: 
        hemisphere_dict = {}
        hemisphere_dict['title'] = hemisphere.find('h3').text
        hemisphere_image_url.append(hemisphere_dict)
        browser.click_link_by_partial_text(hemisphere.find('h3').text)
        html = browser.html
        soup = bs(html, 'html.parser')
        url = soup.find('img', class_='wide-image')['src']
        hemisphere_dict['image_url'] = f'https://astrogeology.usgs.gov{url}'
        browser.visit(hemisphere_url)

        
    #store data in a dictionary
    mars_data= {
        'news_title': news_title,
        'news_paragraph': news_p,
        'featured_image_url': featured_image_url,
        'mars_weather': mars_weather,
        'mars_table': df_html,
        'mars_hemispheres': hemisphere_image_url
    }
        
        
    #close the browser
    browser.quit()
    
    return mars_data 

In [5]:
scrape_info()

{'news_title': "NASA's Perseverance Rover Will Look at Mars Through These 'Eyes'",
 'news_paragraph': 'A pair of zoomable cameras will help scientists and rover drivers with high-resolution color images.',
 'featured_image_url': 'https://jpl.nasa.gov/spaceimages/images/largesize/PIA14762_hires.jpg',
 'mars_weather': 'InSight sol 508 (2020-05-01) low -92.2ºC (-134.0ºF) high -2.4ºC (27.7ºF)\nwinds from the SW at 5.1 m/s (11.3 mph) gusting to 15.8 m/s (35.3 mph)\npressure at 6.80 hPa',
 'mars_table': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Values</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>