In [9]:
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import requests

In [10]:
def visit_check_url(url, browser_to_use = 'chrome', text_check = 'Mars'):
    #set up the browser to use
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser(browser_to_use, **executable_path, headless=False)
    #visit the given url
    browser.visit(url)
    #it returns a positive message if there is text in the url and a negative one if it is empty
    if browser.is_text_present(text_check):
        print("Yes, the official website was found!")
    else:
        print("No, it wasn't found...")
    return(browser)

# NASA Mars News

In [11]:
url_nasa = 'https://mars.nasa.gov/news/'
#check if you can find the website
browser = visit_check_url(url_nasa)
#creating a BEautifulSoup object
soup = bs(browser.html, 'html.parser')
#reading the latest News Title
news_title = soup.find_all('div', class_='content_title')[0].text
#reading the latest news body
paragraph = soup.find_all('div', class_="article_teaser_body")[0].text
#printing the title and body just to see them and check that they are correct
print(f'{news_title}\n{paragraph}')
    
browser.quit()

Yes, the official website was found!
Mars Now
Now uncocooned from its protective carbon-fiber shield, the helicopter is being readied for its next steps.  


# JPL Mars Space Images

In [12]:
url_jpl = 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html'
#check if you can find the website
browser = visit_check_url(url_jpl)
#clicking on the button FULL IMAGE to see the full image featured this month  class="btn btn-outline-light"
browser.links.find_by_partial_text('FULL IMAGE').click()
#scraping the page with BS
soup = bs(browser.html, 'html.parser')
#saving the image url, sometimes it does not find mars3.jpg I am not sure why
featured_image_url = url_jpl.\
    replace("index.html", soup.find(class_="fancybox-image")["src"]) 
print(featured_image_url)
browser.quit()

Yes, the official website was found!
https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/featured/mars3.jpg


# Mars Facts

In [13]:
url_mars_facts = 'https://space-facts.com/mars/'
#check if you can find the website
browser = visit_check_url(url_mars_facts)
browser.quit()
# reading the table from the url using pandas
table = pd.read_html(url_mars_facts)
#selecting the table containing Mars information
df_facts = table[0]
#renaming the columns
df_facts.columns = ['Description', 'Value']
#saving the table as an HTML and stripping \n character not known by html.
html_facts = df_facts.to_html(classes="table table-dark")
html_facts = html_facts.replace('\n','')
df_facts.head()

Yes, the official website was found!


Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"


In [14]:
html_facts

'<table border="1" class="dataframe table table-dark">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Description</th>      <th>Value</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>

# Mars Hemispheres

In [15]:
url_hemisphere = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser = visit_check_url(url_hemisphere)
html = browser.html
#check if you can find the website
soup = bs(html, 'html.parser')
#finding all the items and possible images
items = soup.find_all(class_='item')
#getting information on the images such as title and urls to find the full image
#titles = []
#images_urls = []
hemispheres = []
for item in items:
    item_dic = {}
    #finding the titles of the images
    title = item.find('h3').text
    item_dic['Title'] = title
    #clicking in every image to retrieve the full size picture
    browser.links.find_by_partial_text(title).click()
    html = browser.html
    soup = bs(html, 'html.parser')
    #finding the urls of the picture
    url_1 = soup.find('div', class_='downloads')
    url = url_1.find('a')['href']
    item_dic['img_url'] = url
    browser.back()
    hemispheres.append(item_dic)
    
browser.quit()

Yes, the official website was found!


In [16]:
for hemisphere in hemispheres:
    print(hemisphere)

{'Title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}
{'Title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}
{'Title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}
{'Title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}
