In [1]:
from bs4 import BeautifulSoup
import requests
import pymongo
import time
from splinter import Browser
import pandas as pd

In [2]:
marsDict={}
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=True)

In [3]:
nasa_url = 'https://mars.nasa.gov/news/'
browser.visit(nasa_url)
time.sleep(1)
html = browser.html

In [4]:
# Create BeautifulSoup object; parse with 'html.parser'
nasa_soup = BeautifulSoup(html, 'html.parser')
news_title = nasa_soup.find("div", class_="content_title").get_text()
news_p = nasa_soup.find("div", class_="rollover_description").get_text()

In [5]:
news_title

'Mars Mission Sheds Light on Habitability of Distant Planets'

In [6]:
news_p

'How long might a rocky, Mars-like planet be habitable if it were orbiting a red dwarf star?'

In [7]:
marsDict['news_title'] = news_title
marsDict['news_teaser'] = news_p
print("News scraped")

News scraped


In [8]:
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
baseUrl = 'https://www.jpl.nasa.gov'
browser.visit(jpl_url)
browser.click_link_by_partial_text('FULL IMAGE')
jplhtml = browser.html

In [9]:
jpl_soup = BeautifulSoup(jplhtml, 'html.parser')
more_info = jpl_soup.find('a', class_='button fancybox').get('data-link')
more_info = baseUrl + more_info
browser.visit(more_info)
moreinfohtml = browser.html
moreinfosoup = BeautifulSoup(moreinfohtml, 'html.parser')
figure = moreinfosoup.find('figure', class_='lede')
featured_image_url = figure.find('a').get('href')
featured_image_url = baseUrl + featured_image_url
marsDict['featured_image_url'] = featured_image_url
print("Featured image scraped")

Featured image scraped


In [10]:
marsDict

{'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18899_hires.jpg',
 'news_teaser': 'How long might a rocky, Mars-like planet be habitable if it were orbiting a red dwarf star?',
 'news_title': 'Mars Mission Sheds Light on Habitability of Distant Planets'}

In [11]:
mars_weather_url ='https://twitter.com/marswxreport?lang=en'
browser.visit(mars_weather_url)
mars_weather_html = browser.html
mars_weather_soup = BeautifulSoup(mars_weather_html, 'html.parser')
mars_weather_tweet = mars_weather_soup.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").get_text()
mars_weather = mars_weather_tweet

In [12]:
mars_weather_tweet

'Sol 1924 (Jan 03, 2018), Sunny, high -21C/-5F, low -78C/-108F, pressure at 7.77 hPa, daylight 05:47-17:30'

In [13]:
marsDict['mars_weather'] = mars_weather_tweet
print("Weather scraped")

Weather scraped


In [14]:
imageurl = 'https://space-facts.com/mars/'
browser.visit(imageurl)
soup = BeautifulSoup(browser.html,'html5lib')
table = soup.find('table',class_="tablepress tablepress-id-mars")
df = pd.read_html(str(table))
tableHTML = df[0].to_html(index=False, escape=True, header=None)

In [15]:
htmlTable = tableHTML.replace('\n', '')
marsDict['factTable'] = htmlTable
print("Fact table scraped")

Fact table scraped


In [16]:
marsDict

{'factTable': '<table border="1" class="dataframe">  <tbody>    <tr>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <td>Recorded By:</td>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18899_hires.jpg',
 'mars_weather': 'Sol 1924 (Jan 03, 2018), Sunny, high -21C/-5F, low -78C/-108F, pressure at 7.77 hPa, daylight 05:4

In [17]:
hemispheresurl = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
hemisphereBaseUrl = 'https://astrogeology.usgs.gov'
browser.visit(hemispheresurl)
soup = BeautifulSoup(browser.html,'html5lib')
hemispheres = soup.find('div', class_='collapsible results').find_all('a')
hemisphere_image_urls = []
hemispheredict = {}

In [18]:
for hemisphere in hemispheres:
    hemisphereLink = hemisphere.get('href')
    browser.visit(hemisphereBaseUrl + hemisphereLink)
    soup = BeautifulSoup(browser.html, 'html.parser')
    title = soup.find('title').text
    hemisphereTitle = title.split('|')
    hemisphereTitle = hemisphereTitle[0].replace(' Enhanced ','')
    imgUrl = soup.find('img',class_='wide-image').get('src')
    imgUrl = hemisphereBaseUrl + imgUrl
    hemispheredict = {"title": hemisphereTitle, "img_url":imgUrl}
    hemisphere_image_urls.append(hemispheredict)

In [19]:
marsDict['HemisphereImages'] = hemisphere_image_urls
print("Hemispheres scraped")
print(marsDict)

Hemispheres scraped
{'news_title': 'Mars Mission Sheds Light on Habitability of Distant Planets', 'news_teaser': 'How long might a rocky, Mars-like planet be habitable if it were orbiting a red dwarf star?', 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18899_hires.jpg', 'mars_weather': 'Sol 1924 (Jan 03, 2018), Sunny, high -21C/-5F, low -78C/-108F, pressure at 7.77 hPa, daylight 05:47-17:30', 'factTable': '<table border="1" class="dataframe">  <tbody>    <tr>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <td>Surface Temperature:</td