In [2]:
# Dependencies
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd

In [3]:
# 1. MARS NEWS SITE

# URL of page to be scraped -- we're going to SCRAP the WEB PAGE. Instead of using a path to a File.
url = 'https://mars.nasa.gov/news/'

# Retrieve page with the requests module
response = requests.get(url)


In [4]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')
#print(soup.prettify())

In [5]:
# first Title
news_title = soup.find("div",class_="content_title").text
print(news_title)



Mars Helicopter Attached to NASA's Perseverance Rover




In [6]:
# first Parragraph text
news_p = soup.find("div", class_="rollover_description_inner").text 
print(news_p)



The team also fueled the rover's sky crane to get ready for this summer's history-making launch.



In [7]:
# 2. JPL MARS SPACE IMAGE

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [8]:
#Click on button "FULL IMAGE"
browser.find_by_id('full_image').first.click()

In [9]:
#Click on button "MORE INFO"
browser.links.find_by_partial_text('more info').first.click()

In [10]:
# HTML object
html = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

# Retrieve full size image partial link (JPG format)
feature_image = soup.find("figure", class_="lede").a["href"]
print(feature_image)

/spaceimages/images/largesize/PIA01320_hires.jpg


In [11]:
#Link of full size image - with HTTPS:// link
featured_image_url ="https://www.jpl.nasa.gov/" + feature_image
print(featured_image_url)

https://www.jpl.nasa.gov//spaceimages/images/largesize/PIA01320_hires.jpg


In [12]:
#Close Browser
browser.quit()

In [13]:
# 3. MARS WEATHER TWEET

url = 'https://twitter.com/marswxreport?lang=en'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')


In [14]:
#Get the first Tweet Parragraph
body_content =soup.body.find("p", class_="tweet-text")

tweets=[]
for body in body_content:
    tweets.append(body)

mars_weather=tweets[0]
print(mars_weather)


InSight sol 488 (2020-04-10) low -93.8ºC (-136.8ºF) high -19.7ºC (-3.4ºF)
winds from the WNW at 4.6 m/s (10.4 mph) gusting to 13.0 m/s (29.1 mph)
pressure at 6.60 hPa


In [15]:
# 4. MARS FACTS

url = 'https://space-facts.com/mars/'

tables = pd.read_html(url)
df=tables[0]
df.columns = ["Mars characteristics","Value"]
df

Unnamed: 0,Mars characteristics,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [16]:
#Remove INDEX column
df.set_index("Mars characteristics", inplace = True)


In [17]:
# Convert Dataframe into HTML HTML table string 
html_table = df.to_html()


In [18]:
# 5. MARS HEMISPHERES

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)

# Parse HTML with Beautiful Soup to get links to click
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [19]:
# Retrieve LIST of 4 links 
links_path = soup.find_all("div", class_="description")

Links_to_Click=[]

for links in links_path:
    partial_link= links.a["href"]
    final_url = "https://astrogeology.usgs.gov/"+partial_link
    Links_to_Click.append(final_url)

print("------------------LIST OF LINKS-----------------------")
#print(Links_to_Click)
Links_to_Click

------------------LIST OF LINKS-----------------------


['https://astrogeology.usgs.gov//search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/valles_marineris_enhanced']

In [20]:
# Get information of each Hemisphere, TITLE and IMAGE URL. Create List of 4 Dictionaries.
hemisphere_image_urls =[]

for i in range(len(Links_to_Click)):
    #visit/click new urls
    browser.visit(Links_to_Click[i])
    html2 = browser.html
    soup2 = BeautifulSoup(html2, 'html.parser')

    img_url = soup2.find("div",class_="downloads").a["href"]
    title = soup2.find("h2",class_="title").text

    mars_dict={"title": title,"img_url": img_url}
    hemisphere_image_urls.append(mars_dict)

    # Go back to the original page
    browser.back()

#Close Browser
browser.quit()

print("-----LIST OF DICTIONARIES-------")
#print(hemisphere_image_urls)
hemisphere_image_urls


-----LIST OF DICTIONARIES-------


[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]