In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs,  Tag
from selenium import webdriver
import os
import pandas as pd
from splinter import Browser
from IPython.display import HTML
from flask import Flask
from flask_bootstrap import Bootstrap

In [2]:
filepath = os.path.join("News – NASA’s Mars Exploration Program.html")
with open(filepath, encoding='utf-8') as file:
    html = file.read()

In [3]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = bs(html, 'html.parser')

In [4]:
# Examine the results using prettify method, then determine element thant contains sought info
#print(soup.prettify())

In [5]:
#This code searches the soup object for all <div> tags with the attribute class="content_title" & class="rollover_description_inner".
#It returns a special Beautiful Soup objects (called "news_title", news_p) containing the search results.
news_title = soup.find_all('div' , attrs={'class': 'content_title'})
news_p = soup.find_all('div' , attrs={'class': 'rollover_description_inner'})

In [6]:
# let check how many blocks of html tags are in the page where titles and paragraph live
print(f"We have a total of {len(news_title)} html's block of codes containing the titles")
print(f"We have a total of {len(news_p)} html's block of codes containing the paragraphs")

We have a total of 52 html's block of codes containing the titles
We have a total of 52 html's block of codes containing the paragraphs


In [7]:
# Extract the first records in the page that start with then latest article
latest_news_title_block = news_title[0]
latest_news_paragraph_block = news_p[0] 

# Preview both records
print(latest_news_title_block)
print(latest_news_paragraph_block)

<div class="content_title"><a href="https://mars.nasa.gov/news/8585/nasas-mars-2020-rover-closer-to-getting-its-name/" target="_self">NASA's Mars 2020 Rover Closer to Getting Its Name</a></div>
<div class="rollover_description_inner">155 students from across the U.S. have been chosen as semifinalists in NASA's essay contest to name the Mars 2020 rover, and see it launch from Cape Canaveral this July.</div>


In [8]:
# Extract the latest title using find method & slice notation 
latest_news_title = latest_news_title_block.find('a').text[0:]

# Preview the cleaned title 
print(latest_news_title)

NASA's Mars 2020 Rover Closer to Getting Its Name


In [9]:
# Extract the latest title using contents method & slice notation
latest_news_paragraph = latest_news_paragraph_block.contents[0]

# Preview the cleaned title 

print(latest_news_paragraph)

155 students from across the U.S. have been chosen as semifinalists in NASA's essay contest to name the Mars 2020 rover, and see it launch from Cape Canaveral this July.


# Applying a tabular data structure to display the results in a nice format

In [10]:
# Create two list containing the requested results 
record_list_title = [latest_news_title]
record_list_paragraph = [latest_news_paragraph]

# Create the DataFrame 
record_df = pd.DataFrame(list(zip(record_list_title, record_list_paragraph)) , columns=['Lstest News Title', 'Latest News Paragraph '], index=None)

# Formating the DataFrame using set_option & style methods 
pd.set_option('display.max_colwidth', 300)
record_df.style.hide_index()

Lstest News Title,Latest News Paragraph
NASA's Mars 2020 Rover Closer to Getting Its Name,"155 students from across the U.S. have been chosen as semifinalists in NASA's essay contest to name the Mars 2020 rover, and see it launch from Cape Canaveral this July."


# JPL Mars Space Images - Featured Image

In [11]:
# Visit the NASA JPL (Jet Propulsion Laboratory) Site
executable_path = {"executable_path": "C:/webdrivers/chromedriver"}
# Createa browser instance
browser = Browser("chrome", **executable_path)
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
# Use visit method to navegate to the page
browser.visit(url)

In [12]:
# Use Splinter to Go to Site and Click Button "FULL IMAGE" with Class Name full_image
# <button class="full_image">Full Image</button>
#full_image_button.click()
full_image_button = browser.find_by_id("full_image").click()


In [13]:
# Using method 'is_element_present_by_text' to check the presensce of "More Info" Button and Click It
browser.is_element_present_by_text("more info", wait_time=1)
more_info_page = browser.find_link_by_partial_text("more info").click()



In [14]:
# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
image_page_soup = bs(html, "html.parser")

In [15]:
#This code searches the soup object for first <a> tags with the attribute class="lede"
#It returns a special Beautiful Soup objects (called "top_img_class") containing the principal image.
top_img_class = image_page_soup.find('figure' , attrs={'class': 'lede'})

print(top_img_class)



<figure class="lede">
<a href="/spaceimages/images/largesize/PIA20057_hires.jpg"><img alt="Galaxy NGC 1068 is shown in visible light and X-rays in this composite image. High-energy X-rays (magenta) captured by NASA's NuSTAR, are overlaid on visible-light images from both NASA's Hubble Space Telescope and the Sloan Digital Sky Survey." class="main_image" src="/spaceimages/images/largesize/PIA20057_hires.jpg" title="Galaxy NGC 1068 is shown in visible light and X-rays in this composite image. High-energy X-rays (magenta) captured by NASA's NuSTAR, are overlaid on visible-light images from both NASA's Hubble Space Telescope and the Sloan Digital Sky Survey."/></a>
</figure>


In [16]:
#Extracting PARTIAL img src URL
partial_featured_image_url = top_img_class.find('img')['src']
partial_featured_image_url

'/spaceimages/images/largesize/PIA20057_hires.jpg'

In [17]:
#This code searches the soup object for FIRST <div> tags with the attribute class="jpl_logo"
#It returns a special Beautiful Soup objects (called "featured_jpl_logo_class") containing the principal image.
featured_jpl_logo_class =  image_page_soup.find('div', attrs={'class': 'jpl_logo'})


In [18]:
# Extracting the href of jpl site (equivalent to domain site URL)
# Using slice notation to remove the last slash (/)
domain_jpl_site_url =  featured_jpl_logo_class.find('a')['href'][0:-1]
print(domain_jpl_site_url)

//www.jpl.nasa.gov


In [19]:
# Create final featured img url 
featured_image_url = f"https:{domain_jpl_site_url}{partial_featured_image_url}"
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA20057_hires.jpg


# Mars Weather

In [20]:
# Visit the Mars Weather twitter account Site
executable_path = {"executable_path": "C:/webdrivers/chromedriver"}
# Createa browser instance
browser = Browser("chrome", **executable_path)
url = "https://twitter.com/marswxreport?lang=en"
# Use visit method to navegate to the page
browser.visit(url)

In [21]:
# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
weather_twitter_soup = bs(html, "html.parser")

#print(weather_twitter_soup.prettify())

In [22]:
# After Inspecting the page HTML using soup.prettify()
# This code searches the soup object for all <div> tags with the attribute class="css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0"
# It returns a special Beautiful Soup objects (called "all_twitt_class") containing the twitts.
# We also check the amount of <div> tags with that attribute class 
all_twitt_class = weather_twitter_soup.find_all('div' , attrs={'class': 'css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0'})

len(all_twitt_class)

4

In [23]:
# Check all twitts 
all_twitt_class

[<div class="css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0" dir="auto" lang="en"><span class="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0">InSight sol 422 (2020-02-03) low -90.2ºC (-130.3ºF) high -13.1ºC (8.5ºF)
 winds from the SSE at 6.1 m/s (13.5 mph) gusting to 22.1 m/s (49.4 mph)
 pressure at 6.20 hPa</span></div>,
 <div class="css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0" dir="auto" lang="en"><span class="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0">InSight sol 421 (2020-02-02) low -93.1ºC (-135.6ºF) high -13.8ºC (7.2ºF)
 winds from the SSE at 5.6 m/s (12.5 mph) gusting to 21.0 m/s (46.9 mph)
 pressure at 6.30 hPa</span></div>,
 <div class="css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0" dir="auto" lang="en"><span class="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0">InSight sol 420 (2020-01-31) low -93.7ºC (-136.7ºF) hi

In [24]:
# Iterate through all twittsRetrieve all elements that contain news title in the specified range
# Retrieve all elements inside span tags & convert to text
# Look for entries that display weather related words such as 'sol' and 'pressure' to exclude non weather related tweets
# Break the loop to get the first element in the page with the previous condition which is the latest Weather Twitt
for tweet in all_twitt_class: 
    weather_tweet = tweet.find('span').text
    if 'sol' and 'pressure' in weather_tweet:
        print(weather_tweet)
        break
    else: 
        pass

InSight sol 422 (2020-02-03) low -90.2ºC (-130.3ºF) high -13.1ºC (8.5ºF)
winds from the SSE at 6.1 m/s (13.5 mph) gusting to 22.1 m/s (49.4 mph)
pressure at 6.20 hPa


# Mars Facts

In [25]:
# Read with Pandas Mars Facts page using read_html method
mars_facts_df = pd.read_html('https://space-facts.com/mars/')


In [26]:
# Let check how many table pandas found in Mars Facts page
len(mars_facts_df)


3

In [27]:
# Let print the first data frame or table found in Mars Facts page (information required)
# Let rename columns and remove index 

mars_planet_profile_df = mars_facts_df[0]
mars_planet_profile_df.columns=['Metrics', 'Values']

mars_planet_profile_df

Unnamed: 0,Metrics,Values
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [28]:
# Convert previous data frame to a HTML table string
# Use a Bootstrap nice table template & remove index 

HTML(mars_planet_profile_df.to_html(classes=['table table-striped'], index=False))


Metrics,Values
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


# Mars Hemispheres

In [29]:
# Visit the Mars Weather twitter account Site
executable_path = {"executable_path": "C:/webdrivers/chromedriver"}
# Create a browser instance
browser = Browser("chrome", **executable_path)
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
# Use visit method to navegate to the page
browser.visit(url)

In [30]:
# Initialize an empty list to collect all the image urls
hemisphere_image_urls = []

# Get a List of All the Hemispheres
links = browser.find_by_css("a.product-item h3")
for item in range(len(links)):
    hemisphere = {}
    
    # Find Element on Each Loop to Avoid a Stale Element Exception
    browser.find_by_css("a.product-item h3")[item].click()
    
    # Find Sample Image Anchor Tag & Extract <href>
    sample_element = browser.find_link_by_text("Sample").first
    hemisphere["img_url"] = sample_element["href"]
    
    # Get Hemisphere Title
    hemisphere["title"] = browser.find_by_css("h2.title").text
    
    # Append Hemisphere Object to List
    hemisphere_image_urls.append(hemisphere)
    
    # Navigate Backwards
    browser.back()



In [31]:
hemisphere_image_urls

[{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]