In [1]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager



In [2]:
# setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [C:\Users\rod\.wdm\drivers\chromedriver\win32\91.0.4472.101\chromedriver.exe] found in cache



## NASA Mars News Scraping

In [3]:
# URL of page to be scraped
url = 'https://redplanetscience.com/'
browser.visit(url)

In [4]:
# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [5]:
# get title and news article
news_title_obj = soup.find('div',class_="content_title")
news_p_obj = soup.find('div',class_='article_teaser_body')                   

In [6]:
# print latest news title and paragraph text
print(news_title_obj.text)
print(news_p_obj.text)

news_title = news_title_obj.text
news_p = news_p_obj.text

Join NASA for the Launch of the Mars 2020 Perseverance Rover
No matter where you live, choose from a menu of activities to join NASA as we "Countdown to Mars" and launch the Perseverance rover to the Red Planet.


## JPL Mars Space Images - Featured Image

In [7]:
# URL of page to be scraped
image_url = 'https://spaceimages-mars.com/' 
browser.visit(image_url)

In [8]:
# Create BeautifulSoup object; parse with'html.parser'
html1 = browser.html
soup1 = BeautifulSoup(html1, 'html.parser')


In [9]:
# retrieve image url for the featured image
images = soup1.find_all('div', class_='floating_text_area')
print(images)


[<div class="floating_text_area">
<h2 class="brand_title">FEATURED IMAGE</h2>
<h1 class="media_feature_title">Dusty Space Cloud</h1>
<br/>
<a class="showimg fancybox-thumbs" href="image/featured/mars1.jpg" target="_blank"> <button class="btn btn-outline-light"> FULL IMAGE</button></a>
</div>]


In [10]:
# iterate through each
for image in images:
    link = image.find('a')
    href = link['href']
    print('===============')
    print('https://spaceimages-mars.com/' + href)
featured_image_url = 'https://spaceimages-mars.com/' + href

https://spaceimages-mars.com/image/featured/mars1.jpg


## Mars Facts

In [11]:
# dependencies
import pandas as pd


In [12]:
# url to scrape
url ='https://galaxyfacts-mars.com/'


In [13]:
# read_html to scrape tabular data
tables = pd.read_html(url)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [14]:
type(tables)

list

In [15]:
# save dataframe
mars_earth_df = tables[0]
mars_earth_df

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [16]:
# adjust column headers
new_header = mars_earth_df.iloc[0] #grab the first row for the header
mars_earth_df = mars_earth_df[1:] #take the data less the header row
mars_earth_df.columns = new_header #set the header row as the df header
mars_earth_df.rename(columns=mars_earth_df.iloc[0])
mars_earth_df

Unnamed: 0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [17]:
# set_index to 'mars-earth comparison column'
mars_earth_df.set_index('Mars - Earth Comparison', inplace=True)
mars_earth_df

Unnamed: 0_level_0,Mars,Earth
Mars - Earth Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [18]:
mars_facts = mars_earth_df.to_html(justify='left')
mars_facts

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: left;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>\n</table>'

# Mars Hemispheres

In [19]:
# URL of page to be scraped
image_url = 'https://marshemispheres.com/' 
browser.visit(image_url)

In [20]:
# Iterate through all pages

# HTML object
html2 = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html2, 'html.parser')
# Retrieve all elements that contain image link information
hemispheres = soup.find_all('div', class_='description')
# list of links for hemisphere pages
hemispheres_list=[]  


# Iterate through each book
for hemisphere in hemispheres:
    
    # Use Beautiful Soup's find() method to navigate and retrieve attributes
    link = hemisphere.find('a')
    href = link['href']
    hemispheres_list.append('https://marshemispheres.com/' + href)

In [21]:
# list of dictioinaries of hemisphere image urls 
hemisphere_image_urls=[]

# get image url and titles
for hemisphere in hemispheres_list:
    # URL of page to be scraped
    browser.visit(hemisphere)
    
    #HTML object
    html3 = browser.html
    
    #Parse HTML with Beautiful Soup
    soup = BeautifulSoup(html3, 'html.parser')
    
    # Retrieve all elements that contain image link information
    results = soup.find_all('div', class_='cover')
    
    for result in results:
        title = result.find('h2',class_='title').text
    
    # get the image url and title
    images = soup.find_all('div', class_ ='downloads')
    
    for image in images:
        ul = image.find('ul')
        li = ul.find('li')
        link = li.find('a')
        href1 = link['href']
        img_url = 'https://marshemispheres.com/' + href1        
        post = {
            'title': title,
            'img_url': img_url,
            }
        hemisphere_image_urls.append(post)



In [22]:
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg'}]
