In [1]:
# ------------------------------------------------------------
#  Step 1: Import all required modules and initialize all tools
# ------------------------------------------------------------
from bs4 import BeautifulSoup as soup
from splinter import Browser
import pandas as pd
import aux_func as aux

# initialize splinter browser
browser = Browser('chrome', 
                  **{"executable_path": "/usr/local/bin/chromedriver"}, 
                  headless=False)

In [2]:
# ------------------------------------------------------------
#  Step 2: Scrape Nasa Mars News website for recent headlines
#  with headlines, dates, and content preview
# ------------------------------------------------------------
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
webpage = aux.getParsedWebpage(browser, url)

# pull the most recent headlines + info from the website
headlines_grouped = soup.find_all(webpage, 'h3', class_=None)
dates_grouped = soup.find_all(webpage, 'div', class_='list_date')
text_grouped = soup.find_all(webpage, 'div', class_='article_teaser_body')

# iterate through and generate lists of all individual items
zipped_headlines = list(zip(aux.getParsedTextList(headlines_grouped),
                            aux.getParsedTextList(dates_grouped),
                            aux.getParsedTextList(text_grouped)))

# generate a readable dataframe
headline_df = pd.DataFrame(zipped_headlines)
headline_df.rename(columns={0:'headline', 
                            1:'date', 
                            2:'text'}, 
                   inplace=True)
headline_df

Unnamed: 0,headline,date,text
0,NASA Engineers Dream Big with Small Spacecraft,"April 19, 2018",The first CubeSat mission to deep space will l...
1,Bound for Mars: Countdown to First Interplanet...,"April 6, 2018","On May 5, millions of Californians may witness..."
2,NASA Invests in Visionary Technology,"March 30, 2018","NASA is investing in technology concepts, incl..."
3,NASA is Ready to Study the Heart of Mars,"March 29, 2018",NASA is about to go on a journey to study the ...
4,‘Marsquakes’ Could Shake Up Planetary Science,"March 28, 2018","InSight, the next mission to the Red Planet, w..."
5,"Mars Curiosity Celebrates Sol 2,000","March 22, 2018",NASA's Mars Curiosity rover just hit a new mil...
6,NASA Briefing on First Mission to Study Mars I...,"March 22, 2018",NASA’s next mission to Mars will be the topic ...
7,New 'AR' Mobile App Features 3-D NASA Spacecraft,"March 20, 2018",NASA spacecraft travel to far-off destinations...
8,NASA Mars Mission Tours California,"March 14, 2018",Scientists and engineers with NASA's next miss...
9,Next NASA Mars Rover Reaches Key Manufacturing...,"March 13, 2018",NASA's Mars 2020 mission has begun the assembl...


In [3]:
# ------------------------------------------------------------
#  Step 3: Scrape Nasa Mars News website for featured image
# ------------------------------------------------------------
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
webpage = aux.getParsedWebpage(browser, url)

# get title and description
featured_title = soup.find(webpage, 'h1', class_='media_feature_title').get_text()
featured_description = soup.find(webpage, 'a', class_='button fancybox').get('data-description')

# get and construct url for largest size of featured image available
featured_url = soup.find(webpage, 'a', class_='button fancybox').get('data-fancybox-href')
featured_filename = featured_url.split('/')[4].split('_')[0]
featured_url = f'https://www.jpl.nasa.gov/spaceimages/images/largesize/{featured_filename}_hires.jpg'
print(featured_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA14884_hires.jpg


In [4]:
# ------------------------------------------------------------
#  Step 4: Scrape Mars Twitter for the most recent weather 
#  update
# ------------------------------------------------------------
url = 'https://twitter.com/marswxreport?lang=en'
webpage = aux.getParsedWebpage(browser, url)

std_tweet_class = 'TweetTextSize TweetTextSize--normal js-tweet-text tweet-text'

# pull text of most recent tweet about the weather
recent_weather = soup.find_all(webpage, 'p', class_= std_tweet_class)[0].get_text()

# create split string to pull apart and add to dataframe
recent_weather_split = recent_weather.split(',')
recent_weather_split = [i.split(' ') for i in recent_weather_split][2:]

# create dictionary to turn into a dataframe
weather_dict = {'mars_date':f'{recent_weather.split("(")[0]}',
                'earth_date':f'{recent_weather.split("(")[1].split(")")[0]}',
                'cur_weather':f'{recent_weather_split[0][1]}',
                'temp_high':f'{recent_weather_split[1][2]}',
                'temp_low':f'{recent_weather_split[2][2]}',
                'pressure':f'{recent_weather_split[3][3]} {recent_weather_split[3][4]}',
                'daylight':f'{recent_weather_split[4][2]}'}
weather_df = pd.DataFrame.from_dict(weather_dict, orient='index')
weather_df = weather_df.rename(columns={0:'Most Recent Weather on Mars'})
weather_df

Unnamed: 0,Most Recent Weather on Mars
mars_date,Sol 2029
earth_date,"April 21, 2018"
cur_weather,Sunny
temp_high,-11C/12F
temp_low,-72C/-97F
pressure,7.22 hPa
daylight,05:25-17:21


In [5]:
# ------------------------------------------------------------
#  Step 5: Scrape Space Facts website for data on Mars
# ------------------------------------------------------------
url = 'https://space-facts.com/mars/'
webpage = aux.getParsedWebpage(browser, url)

# create dict to hold facts
fact_dict = {}

# get all rows in the facts table and parse into dict
facts_all = soup.find(webpage, 
                      'table', 
                      class_='tablepress tablepress-id-mars').find_all('tr')
for fact in facts_all:
    fact_dict[soup.find(fact, 'strong').get_text()] = (soup.find(fact, class_='column-2').get_text())

# convert to Dataframe and to HTML table
fact_df = pd.DataFrame.from_dict(fact_dict, orient='index')
fact_df.rename(columns={0:'Facts about Mars'}, inplace=True)
fact_html = pd.DataFrame.to_html(fact_df)

fact_df

Unnamed: 0,Facts about Mars
Equatorial Diameter:,"6,792 km\n"
Polar Diameter:,"6,752 km\n"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)\n
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [49]:
# ------------------------------------------------------------
#  Step 6: Scrape images and titles from Astrogeology site
# ------------------------------------------------------------
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
webpage = aux.getParsedWebpage(browser, url)

# store the base link for the page
base_url = 'https://astrogeology.usgs.gov/'

# get all unique links to the photo pages first
page_links_list = []
page_links = soup.find_all(webpage, 'a', class_='itemLink product-item')
[page_links_list.append(page.get('href')) for page in page_links]
page_links_list = list(set(page_links_list))

image_list = []

# iterate through links and pull URL for full size images
for link in page_links_list:
    url = f'https://astrogeology.usgs.gov{link}'
    webpage = aux.getParsedWebpage(browser, url)
    
    # get image title
    title = soup.find(webpage, 'h2', class_='title').get_text()
    
    # get full size image link
    downloads_section = soup.find(webpage, 'div', class_='downloads')
    image_link = soup.find(downloads_section, 'a').get('href')
    
    # add title and full-size image url to dict
    image_list.append({'title':title,
                       'image_url':image_link})

# create dataframe
image_df = pd.DataFrame(image_list, columns=['title', 'image_url'])
image_df

Unnamed: 0,title,image_url
0,Schiaparelli Hemisphere Enhanced,http://astropedia.astrogeology.usgs.gov/downlo...
1,Valles Marineris Hemisphere Enhanced,http://astropedia.astrogeology.usgs.gov/downlo...
2,Cerberus Hemisphere Enhanced,http://astropedia.astrogeology.usgs.gov/downlo...
3,Syrtis Major Hemisphere Enhanced,http://astropedia.astrogeology.usgs.gov/downlo...
