In [1]:
from bs4 import BeautifulSoup as bs
import requests
import pymongo
from splinter import Browser
import time
import pandas as pd

In [2]:
# initialize browser
executable_path = {"executable_path": "chromedriver.exe"}
browser = Browser("chrome", **executable_path, headless=False)

In [3]:
# URL of page to be scraped
nasa_url = 'https://mars.nasa.gov/news/'

#visit page
browser.visit(nasa_url)

In [4]:
html = browser.html
soup = bs(html, "html.parser")

### NASA Mars News

In [5]:
#scrape first news title and paragraph
news_title = soup.find("div",class_="content_title").text
news_paragraph = soup.find("div", class_="article_teaser_body").text
print(news_title)
print(news_paragraph)

Small Satellite Mission of the Year
The first interplanetary CubeSats were recognized by the engineering community with the 2019 Small Satellite Mission of the Year award.


### JPL Mars Space Images - Featured Image 

In [6]:
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

#visit page
browser.visit(jpl_url)


In [7]:
html = browser.html
soup = bs(html, "html.parser")

In [8]:
base_path = "https://www.jpl.nasa.gov"

# use beatiful soup and splinter to find url image
image_path = soup.find("article")['style'].replace("background-image: url('", "").replace("');", "")

featured_image_url = base_path + image_path

featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA16105-1920x1200.jpg'

### Mars Weather

In [9]:
twitter_url = 'https://twitter.com/marswxreport?lang=en'

#visit page
browser.visit(twitter_url)

In [10]:
html = browser.html
soup = bs(html, "html.parser")

In [11]:
# use beatiful soup and splinter to loop through each tweet
# use for loop to find the first tweet from mars weather using 'insight' as a keyword to look for

tweets = soup.find_all("div", class_="js-tweet-text-container")
for tweet in tweets:
    if 'InSight' in tweet.text:
        weather = tweet.text
        break

In [12]:
# clean up text by removing \n and unneeded information
weather_n = weather.replace('\n','')
mars_weather = weather_n[:weather_n.find("pic")]
mars_weather

'InSight sol 250 (2019-08-10) low -100.0ºC (-148.1ºF) high -26.2ºC (-15.1ºF)winds from the SSE at 4.4 m/s (9.8 mph) gusting to 16.2 m/s (36.2 mph)pressure at 7.60 hPa'

### Mars Facts

In [19]:
facts_url = 'https://space-facts.com/mars/'

# use pandas to read the tables on the site
table = pd.read_html(facts_url)

# find table that we are looking for and rename and reindex columns for easier understanding once scraping is complete
mars_table = table[1].rename(columns = {0:"", 1: "value"})
mars_table.set_index("", inplace = True)
mars_table

[  Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [14]:
# use to_html to convert table to html text. Clean up by removing \n
mars_html = mars_table.to_html(index=True)
facts_html = mars_html.replace('\n', '')
facts_html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>value</th>    </tr>    <tr>      <th></th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

### Mars Hemispheres

In [15]:
hemisphere_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

#visit page
browser.visit(hemisphere_url)

In [16]:
html = browser.html
soup = bs(html, "html.parser")

In [17]:
hemi_base = "https://astrogeology.usgs.gov"

# use beatiful soup and splinter to find the links to each hemisphere
# once found, add links to a list to be referenced later

links = []
results = soup.find_all("div", class_="item")
for result in results:
    link = result.find('a')
    hemi_link = link['href']
    hemi_url_2 = hemi_base + hemi_link
    links.append(hemi_url_2)
    

In [18]:
hemisphere_image_urls = []

# using the link found above, use beatiful soup and splinter search through 
# each link and find the correct image url and name

# when this information is found, add to a dictionary using 'title' and 'img_url' as keys
for link in links:
    img_dict = {}
    browser.visit(link)
    html = browser.html
    soup = bs(html, "html.parser")
    
    img = soup.find("img", class_="wide-image")
    #print(img['src'])
    img_link = hemi_base + img['src']
    
    img_title = soup.find("h2", class_="title")
    #print(img_title.text)
    img_dict["title"] = img_title.text
    img_dict["img_url"] = img_link
    hemisphere_image_urls.append(img_dict)

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]