# Mission to Mars Scraping and Analysis

In [1]:
# Dependencies
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup
# import requests
import pandas as pd

In [2]:
# URL of NASA Mars News Site
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

In [3]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [4]:
# Retrieve page with the requests module
# response = requests.get(url)
browser.visit(url)

In [5]:
# Create BeautifulSoup object; parse with 'html.parser'
# soup = BeautifulSoup(response.text, 'html.parser')
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [6]:
# Extract the text of the title
results = soup.find_all("div", class_="content_title")

title_list = []
for result in results:
    title = result.a.text
    title_list.append(title)

news_title  = title_list[0]
news_title 

'After a Reset, Curiosity Is Operating Normally'

In [7]:
# Extract Text of the first article's teaser
results = soup.find_all("div", class_="article_teaser_body")

teaser_list = []
for result in results:
    teaser = result.text
    teaser_list.append(teaser)

news_p = teaser_list[0]
news_p

"NASA's Mars rover Curiosity is in good health but takes a short break while engineers diagnose why it reset its computer. "

In [8]:
#getting the latest Mars Image
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(image_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [9]:
#extracting link to picutre of latest Mars image
results = soup.find_all("a", class_="fancybox")

Mars_Image_List = []
for result in results:
    Image_link = result["data-fancybox-href"]
    Mars_Image_List.append(Image_link)

featured_image_url = "https://www.jpl.nasa.gov/" + Mars_Image_List[1]
featured_image_url

'https://www.jpl.nasa.gov//spaceimages/images/largesize/PIA23047_hires.jpg'

In [10]:
#getting the latest tweet from Mars Weather account
tweet_url = "https://twitter.com/marswxreport?lang=en"
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(tweet_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [11]:
#extract latest Mars Weather tweet
results = soup.find_all("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text")

Mars_Weather_Tweets = []
for result in results:
    tweet = result.text
    if "Sol" and "high" and "low" and "hPa" in tweet:
        Mars_Weather_Tweets.append(tweet)
        
Mars_Weather_Tweets_New = []
for tweet in Mars_Weather_Tweets:
    tweet_pic_split = tweet.split("pic")
    Mars_Weather_Tweets_New.append(tweet_pic_split)
    
mars_weather  = Mars_Weather_Tweets_New[0][0]
mars_weather

'InSight sol 83 (2019-02-19) low -95.0ºC (-139.0ºF) high -11.7ºC (11.0ºF)\nwinds from the SW at 4.1 m/s (9.2 mph) gusting to 10.5 m/s (23.6 mph)\npressure at 7.20 hPa'

In [12]:
#getting Mars Facts
Mars_Facts_URL = "https://space-facts.com/mars/"
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(Mars_Facts_URL)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [13]:
Mars_Facts_Table = pd.read_html(Mars_Facts_URL)
Mars_Facts_Table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [14]:
Mars_Facts_DF = Mars_Facts_Table[0]
Mars_Facts_DF.columns = ["Variable", "Value"]
Mars_Facts_DF

Unnamed: 0,Variable,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [15]:
#Check HTML format of Mars Facts data frame
Mars_Facts_HTML_Table = Mars_Facts_DF.to_html()
Mars_Facts_HTML_Table = Mars_Facts_HTML_Table.replace('\n', '')
Mars_Facts_HTML_Table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Variable</th>      <th>Value</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronomer

In [16]:
#export pandas table to html
Mars_Facts_DF.to_html('Mars_Facts_HTML_Table.html')

In [17]:
#getting Mars Hemispheres Images

#Cerberus Hemisphere
Cerberus_Hem_url = "https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced"
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(Cerberus_Hem_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [18]:
#extract image link of Cerberus Hemisphere
results = soup.find_all("img", class_="wide-image")

Img_list = []
for result in results:
    img_link = result['src']
    Img_list.append(img_link)

Cerberus_Hem_Image_url = "https://astrogeology.usgs.gov" + Img_list[0]
Cerberus_Hem_Image_url

'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'

In [19]:
#extract image link of Schiaparelli Hemisphere
Schiaparelli_Hem_url = "https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced"
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(Schiaparelli_Hem_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [20]:
#extract image link of Schiaparelli Hemisphere
results = soup.find_all("img", class_="wide-image")

Img_list = []
for result in results:
    img_link = result['src']
    Img_list.append(img_link)

Schiaparelli_Hem_Image_url = "https://astrogeology.usgs.gov" + Img_list[0]
Schiaparelli_Hem_Image_url

'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'

In [21]:
#extract image link of Syrtis Major Hemisphere
SyrtisMajor_Hem_url = "https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced"
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(SyrtisMajor_Hem_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [22]:
#extract image link of Syrtis Major Hemisphere
results = soup.find_all("img", class_="wide-image")

Img_list = []
for result in results:
    img_link = result['src']
    Img_list.append(img_link)

SyrtisMajor_Hem_Image_url = "https://astrogeology.usgs.gov" + Img_list[0]
SyrtisMajor_Hem_Image_url

'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'

In [23]:
#extract image link of Valles Marineris Hemisphere
VallesMarineris_Hem_url = "https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced"
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(VallesMarineris_Hem_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [24]:
#extract image link of Valles Marineris Hemisphere
results = soup.find_all("img", class_="wide-image")

Img_list = []
for result in results:
    img_link = result['src']
    Img_list.append(img_link)

VallesMarineris_Hem_Image_url = "https://astrogeology.usgs.gov" + Img_list[0]
VallesMarineris_Hem_Image_url

'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'

In [25]:
hemisphere_image_urls = [
    {"title": "Cerberus Hemisphere", "img_url": Cerberus_Hem_Image_url},
    {"title": "Schiaparelli Hemisphere", "img_url": Schiaparelli_Hem_Image_url},
    {"title": "Syrtis Major Hemisphere", "img_url": SyrtisMajor_Hem_Image_url},
    {"title": "Valles Marineris Hemisphere", "img_url": VallesMarineris_Hem_Image_url},
]