# Import Dependencies

In [1]:
import pandas as pd
from bs4 import BeautifulSoup as bs
from splinter import Browser
import time
from IPython.display import display_html

# Initialize Browser

In [2]:
executable_path = {'executable_path':'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# News Scrapping

In [3]:
#Defines the url from which the information is obtained
news_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

In [4]:
#The browser accesses the desired site and wait for it to load the complete information
browser.visit(news_url)
time.sleep(1)

In [5]:
#The site's html is loaded into a variable
news_html = browser.html

In [6]:
#Beautiful soup and html parser are used to sort the information
news_soup = bs(news_html, 'html.parser')

In [7]:
#Finds the first news and displays the title and the paragraph
latest_news = {}
title = news_soup.find('div', class_='list_text').a.text
paragraph = news_soup.find('div', class_='article_teaser_body').text
display(title)
display(paragraph)

"NASA's Perseverance Rover Will Peer Beneath Mars' Surface "

"The agency's newest rover will use the first ground-penetrating radar instrument on the Martian surface to help search for signs of past microbial life. "

# Featured Image Scrapping

In [8]:
#Defines the url from which the image is obtained
img_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

In [9]:
#The browser accesses the desired site and wait for it to load the complete information
browser.visit(img_url)
time.sleep(1)

In [10]:
#The instruction is given to click on a text to see the full image
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(1)



In [11]:
#The instruction is given to click on a text to see the full image
browser.click_link_by_partial_text('more info')
time.sleep(1)

In [12]:
#The site's html is loaded into a variable
img_html = browser.html

In [13]:
#Beautiful soup and html parser are used to sort the information
img_soup = bs(img_html, 'html.parser')

In [14]:
#Finds the featured image and gets the url
feat_img = img_soup.find('img', class_='main_image')['src']
display(feat_img)
feat_img_url=f'https://www.jpl.nasa.gov{feat_img}'
display(feat_img_url)

'/spaceimages/images/largesize/PIA19168_hires.jpg'

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19168_hires.jpg'

# Tweet Scrapping

In [15]:
#Defines the url from the Mars weather report twitter page
tweet_url = 'https://twitter.com/marswxreport'

In [16]:
#The browser accesses the desired site and wait for it to load the complete information
browser.visit(tweet_url)
time.sleep(5)

In [17]:
#The site's html is loaded into a variable
tweet_html = browser.html

In [18]:
#Beautiful soup and html parser are used to sort the information
tweet_soup = bs(tweet_html, 'html.parser')

In [19]:
#Finds the latest tweet with the Mars weather report
tweets = tweet_soup.find_all('div', {'data-testid':'tweet'})
weather = ""

for t in tweets:
    spans = t.find_all('span')
    for s in spans:
        text = s.get_text()
        if 'InSight' in text:
            weather = text
            break
        if weather != "":
            break

display(weather)

'InSight sol 663 (2020-10-07) low -96.7ºC (-142.1ºF) high -16.6ºC (2.0ºF)\nwinds from the WNW at 8.1 m/s (18.0 mph) gusting to 23.4 m/s (52.3 mph)\npressure at 7.60 hPa'

# Table Scrapping

In [20]:
#Defines the url from which the information is obtained
table_url = 'https://space-facts.com/mars/'

In [21]:
#The browser accesses the desired site and wait for it to load the complete information
browser.visit(table_url)
time.sleep(1)

In [22]:
#The site's html is loaded into a variable
table_html = browser.html

In [23]:
#Beautiful soup and html parser are used to sort the information
table_soup = bs(table_html, 'html.parser')

In [24]:
#Finds the table with information about Mars and stores the html of that table
table = table_soup.find('table', id='tablepress-p-mars')
table_string = str(table)
table_df = pd.read_html(table_string)
table_df = table_df[0]
facts_string = table_df.to_html(header=False, index=False)
print(facts_string)

<table border="1" class="dataframe">
  <tbody>
    <tr>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <td>Recorded By:</td>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


# Hemispheres Images Scrapping

In [25]:
#Defines the url from which the information is obtained
hemisphere_url='https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

In [26]:
#The browser accesses the desired site and wait for it to load the complete information
browser.visit(hemisphere_url)
time.sleep(1)

In [27]:
#The site's html is loaded into a variable
hemisphere_html = browser.html

In [28]:
#Beautiful soup and html parser are used to sort the information
hemisphere_soup = bs(hemisphere_html, 'html.parser')

In [29]:
#Finds all of the images of the Mars hemispheres and displays the url of each image
links = hemisphere_soup.find('div', class_='collapsible results')
links = links.find_all('div', class_='item')
images =[]
for link in links:
    image_dictt={}
    text = link.find('h3').text
    browser.click_link_by_partial_text(text)
    time.sleep(1)
    image_html = browser.html
    image_soup = bs(image_html, 'html.parser')
    image_url = image_soup.find('a', string='Sample')['href']
    image_dict = {'title':text,'img_url':image_url}
    images.append(image_dict)
    browser.back()
display(images)

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]