In [92]:
# import dependencies
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd

### NASA Mars News

In [93]:
# Initiate splinter
!which chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)


/usr/local/bin/chromedriver


In [94]:
# Set the featured image url and browser
news_url = "https://mars.nasa.gov/news/"

# Visit the URL using splinter 
browser.visit(news_url)

# Create HTML object
html = browser.html

# Parse HTML with Beautiful Soup
news_soup = BeautifulSoup(html, 'html.parser')

In [95]:
# Scrape the URL to find the latest news and get the 'title' and 'paragraph' of the latest news
article = news_soup.find("div", class_='list_text')
news_title = article.find("div", class_="content_title").text
news_p = article.find("div", class_ ="article_teaser_body").text

# print the news title and paragraph text
print(f"news_title: {news_title}")
print(f"news_p: {news_p}")
# print(article)

news_title: NASA Engineers Checking InSight's Weather Sensors
news_p: An electronics issue is suspected to be preventing the sensors from sharing their data about Mars weather with the spacecraft.


### JPL Mars Space Images - Featured Image

In [96]:
# URL for getting the featured image
featured_img_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

# Initiate browser to visit the URL
browser.visit(featured_img_url)


In [97]:
# Click the 'FULL IMAGE' button on the page
browser.click_link_by_partial_text("FULL IMAGE")


In [98]:
# Click the 'more info' button on the page
browser.click_link_by_partial_text("more info")


In [99]:
# Created HTML object
featured_html = browser.html

# Parse HTML with Beautiful Soup
featured_img_soup = BeautifulSoup(featured_html, 'html.parser')

In [100]:
# Scrape the webpage to get the 'Featured Image' source
figure = featured_img_soup.find("figure", class_ = "lede")
a_tag = figure.find("a")
img_tag = figure.find("img")
img_source = img_tag["src"]

# Base URL
base_url = "https://www.jpl.nasa.gov"

# Get the 'featured image URL' by adding the base_url and the featured_image_url
featured_image_url = base_url + img_source
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16919_hires.jpg'

### Mars Facts

In [101]:
# Mars facts web page URL
facts_url = "https://space-facts.com/mars/"

# Read HTML tables from the DataFrame
tables = pd.read_html(facts_url)

In [102]:
# Display the table
tables


[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [103]:
# Scrape the table containing facts about the planet including Diameter, Mass, etc.
df = tables[0]

# Rename the column's header
df.columns = ["Description", "Mars"]

# Set the index to the 'Description' column
mars_df = df.set_index(["Description"])
mars_df


Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [104]:
# Generate HTML tables from the DataFrame
mars_facts_html = mars_df.to_html()
mars_facts_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n

In [105]:
# Strip unwanted '/n' tags to clean up the table
mars_facts_html = mars_facts_html.replace('\n', '')
mars_facts_html


'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Mars</th>    </tr>    <tr>      <th>Description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [106]:
# Save the table directly to HTML file
mars_df.to_html("facts.html")

# We can open the file directly in browser
!open facts.html


### Mars Hemispheres

In [107]:
# Set the featured image url and browser
mars_hemi_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

browser.visit(mars_hemi_url)

In [108]:
# Created HTML object
mars_html = browser.html

# Parse HTML with Beautiful Soup
mars_hemi_soup = BeautifulSoup(mars_html, 'html.parser')

# Click the 'FULL IMAGE' button on the page
# browser.click_link_by_partial_text("Cerberus")

In [109]:
hemisphere_image_urls = []
hemisphere_base_url = "https://astrogeology.usgs.gov/search/"

hemisphere_title = mars_hemi_soup.find_all("h3")


for title in hemisphere_title:
    title_text = title.get_text()
    title = title_text.strip("Enhanced")
    browser.click_link_by_partial_text(title_text)
    download_link = browser.find_link_by_partial_href("download")
    img_url = download_link["href"]
    hemisphere_info_dict = {"title" : title,
                           "img_url" : img_url}
    hemisphere_image_urls.append(hemisphere_info_dict)
    

hemisphere_image_urls   
    



ElementDoesNotExist: no elements could be found with link by partial text "Schiaparelli Hemisphere Enhanced"

In [None]:
# cerb_img = mars_hemi_soup.find("div", class_ = "wide-image-wrapper")
# print(cerb_img)

In [None]:
# Click the 'more info' button on the page
# browser.click_link_by_partial_text("OPEN")

In [None]:
hemisphere_img_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

In [None]:
# browser.visit(hemisphere_img_url)

In [None]:
# import os
# base_url = os.path.dirname('https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars')

# base_url

In [None]:
from urllib.parse import urlparse, urljoin

base_url = urlparse('https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars')

base_url = urljoin("https" , "astrogeology.usgs.gov")
base_url