# In this project we scrape several webpages for data on Mars.

In [1]:
import os

from bs4 import BeautifulSoup as bs
import requests


import pandas as pd

## In this section we scrape the NASA Mars news site:
https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest

In [2]:
nasa_mars_file_path = os.path.join("NewsNASAMarsExplorationProgram", "News_NASA_Mars_Exploration_Program.html")
nasa_mars_html = open(nasa_mars_file_path, "r").read()

In [3]:
nasa_news_site_soup = bs(nasa_mars_html, "html.parser")

In [4]:
nasa_news_article_titles_html = nasa_news_site_soup.find_all("div", class_="content_title")
nasa_news_article_paragraphs_html = nasa_news_site_soup.find_all("div", class_="article_teaser_body")

In [5]:
nasa_news_article_titles_list = []
nasa_news_article_paragraphs_list = []

In [6]:
nasa_news_article_title = nasa_news_article_titles_html[0].find("a").text
nasa_news_article_title

'NASA InSight Landing on Mars: Milestones'

In [7]:
nasa_news_article_paragraph = nasa_news_article_paragraphs_html[0].text.replace("\n", "")
nasa_news_article_paragraph

"On Nov. 26, NASA's InSight spacecraft will blaze through the Martian atmosphere and set a lander gently on the surface in less time than it takes to cook a hard-boiled egg."

In [8]:
# I misunderstood the directions and created a list of all the article titles.
# Here I comment out the code that made the list.

# for nasa_news_article_title in nasa_news_article_titles_html:
#     try:
#         nasa_news_article_titles_list.append(nasa_news_article_title.find("a").text.strip())

        
#     except:
#         print("Error")

In [9]:
# I misunderstood the directions and created a list of all the article paragraphs.
# Here I comment out the code that made the list.

# for nasa_news_article_paragraph in nasa_news_article_paragraphs_html:
#     try:
#         nasa_news_article_paragraphs_list.append(nasa_news_article_paragraph.text.replace("\n", ""))
        
#     except:
#         print("Error")

In [10]:
# I misunderstood the directions and created a list of all the article titles and paragraphs.
# Here I comment out the code that makes a dictionary from the two lists.

# nasa_news_articles_dictionary = dict(zip(nasa_news_article_titles_list, nasa_news_article_paragraphs_list))
# nasa_news_articles_dictionary

## In this section we scrape the URL for a featured image

In [11]:
featured_mage_base_url = "https://www.jpl.nasa.gov"
featured_image_starting_site_extension = "/spaceimages/?search=&category=Mars"

site_with_featured_image_url = featured_mage_base_url + featured_image_starting_site_extension
featured_image_site = requests.get(site_with_featured_image_url)
featured_image_soup = bs(featured_image_site.text, "html.parser")

In [12]:
featured_image_url_extension = featured_image_soup.find("a", class_=["button", "fancybox"]).get("data-fancybox-href")
featured_image_url_extension

'/spaceimages/images/mediumsize/PIA16837_ip.jpg'

In [13]:
featured_image_url = featured_mage_base_url + featured_image_url_extension
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA16837_ip.jpg'

## In this section we scrape tweets from the @MarsWxReport Twitter account to get the latest weather on Mars: https://twitter.com/marswxreport?lang=en

In [14]:
mars_twitter_page = requests.get("https://twitter.com/marswxreport?lang=en")
mars_twitter_page

<Response [200]>

In [15]:
mars_twitter_page_soup = bs(mars_twitter_page.text, "html.parser")

In [16]:
mars_twitter_page_latest_tweet = mars_twitter_page_soup.find("p", class_="TweetTextSize").text
mars_twitter_page_latest_tweet

'Well done! That 30 minutes of EDL dust settling was very effective. Shame #InSight can’t act as a supercharger for @marsrovers Oppy, she sure could use a boost right now.https://twitter.com/nasainsight/status/1068661716756516864\xa0…'

In [17]:
mars_twitter_page_latest_tweet_dictionary = {"latest_tweet": mars_twitter_page_latest_tweet}
mars_twitter_page_latest_tweet_dictionary

{'latest_tweet': 'Well done! That 30 minutes of EDL dust settling was very effective. Shame #InSight can’t act as a supercharger for @marsrovers Oppy, she sure could use a boost right now.https://twitter.com/nasainsight/status/1068661716756516864\xa0…'}

## In this section we use pandas to directly scrape data from the Mars Facts website: https://space-facts.com/mars/

In [18]:
mars_space_facts_url = "https://space-facts.com/mars/"

In [19]:
mars_facts_tables = pd.read_html(mars_space_facts_url)

In [20]:
mars_facts_tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [21]:
type(mars_facts_tables)

list

In [22]:
len(mars_facts_tables)

1

In [23]:
mars_facts_tables[0]

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [24]:
mars_facts_table_dataframe = mars_facts_tables[0]
mars_facts_table_dataframe

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [25]:
mars_facts_table_json = mars_facts_table_dataframe.to_json()
type(mars_facts_table_json)

str

In [26]:
mars_facts_html_table_string = mars_facts_tables[0].to_html()

In [27]:
mars_facts_html_table_string

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    

## In this section we visit the USGS Astrogeology site to obtain high resolution images for each of Mars's hemispheres.

In [28]:
cerebrus_hemisphere_site = requests.get("https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced")
cerebrus_hemisphere_site

<Response [200]>

In [29]:
cerebrus_hemisphere_soup = bs(cerebrus_hemisphere_site.text, "html.parser")

In [30]:
schiaparelli_hemisphere_site = requests.get("https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced")
schiaparelli_hemisphere_site

<Response [200]>

In [31]:
schiaparelli_hemisphere_soup = bs(schiaparelli_hemisphere_site.text, "html.parser")

In [32]:
syrtis_major_hemisphere_site = requests.get("https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced")
syrtis_major_hemisphere_site

<Response [200]>

In [33]:
syrtis_major_hemisphere_soup = bs(syrtis_major_hemisphere_site.text, "html.parser")

In [34]:
valles_marineris_hemisphere_site = requests.get("https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced")
valles_marineris_hemisphere_site

<Response [200]>

In [35]:
valles_marineris_hemisphere_soup = bs(valles_marineris_hemisphere_site.text, "html.parser")

In [36]:
mars_hemisphere_soups_list = [cerebrus_hemisphere_soup, schiaparelli_hemisphere_soup, syrtis_major_hemisphere_soup, valles_marineris_hemisphere_soup]

In [37]:
mars_hemisphere_images_list = [mars_hemisphere.find("div", class_="downloads") for mars_hemisphere in mars_hemisphere_soups_list]

In [38]:
mars_hemisphere_image_urls_list = [mars_hemisphere_image.find("a").get("href") for mars_hemisphere_image in mars_hemisphere_images_list]
mars_hemisphere_image_urls_list

['http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg']

In [39]:
mars_hemisphere_names_list = ["Cerberus", "Schiaparelli", "Syrtis_Major", "Valles_Marineris"] 

In [40]:
mars_hemisphere_name_and_image_urls_dictionary = dict(zip(mars_hemisphere_names_list, mars_hemisphere_image_urls_list))
mars_hemisphere_name_and_image_urls_dictionary

{'Cerberus': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
 'Schiaparelli': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
 'Syrtis_Major': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
 'Valles_Marineris': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}

In [41]:
# I tried to make a dictionary of the hemisphere names and image URLs by doing this.
# This did not have the desired results, so I will comment it out for now.

# mars_hemisphere_name_and_image_urls_dictionary = {"Name": hemisphere_name, "image_url": hemisphere_image_url \
#                                                   for hemisphere_name, hemisphere_image_url \
#                                                   in mars_hemisphere_names_list, mars_hemisphere_image_urls_list}

## In this section, we create a dictionary of all the data we have scraped hitherto.

In [42]:
mars_data_dictionary = {"latest_article_title": nasa_news_article_title,
                        "latest_artitle_paragraph": nasa_news_article_paragraph,
                        "featured_image_url": featured_image_url,
                        "latest_weather_tweet": mars_twitter_page_latest_tweet,
                        "mars_facts_table": mars_facts_table_json,
                        "cerberus_hemisphere_image_url": mars_hemisphere_image_urls_list[0],
                        "schiaparelli_hemisphere_image_url": mars_hemisphere_image_urls_list[1],
                        "syrtis_major_hemisphere_image_url": mars_hemisphere_image_urls_list[2],
                        "valles_marineris_hemisphere_image_url": mars_hemisphere_image_urls_list[3]                       
                       }
mars_data_dictionary

{'latest_article_title': 'NASA InSight Landing on Mars: Milestones',
 'latest_artitle_paragraph': "On Nov. 26, NASA's InSight spacecraft will blaze through the Martian atmosphere and set a lander gently on the surface in less time than it takes to cook a hard-boiled egg.",
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA16837_ip.jpg',
 'latest_weather_tweet': 'Well done! That 30 minutes of EDL dust settling was very effective. Shame #InSight can’t act as a supercharger for @marsrovers Oppy, she sure could use a boost right now.https://twitter.com/nasainsight/status/1068661716756516864\xa0…',
 'mars_facts_table': '{"0":{"0":"Equatorial Diameter:","1":"Polar Diameter:","2":"Mass:","3":"Moons:","4":"Orbit Distance:","5":"Orbit Period:","6":"Surface Temperature:","7":"First Record:","8":"Recorded By:"},"1":{"0":"6,792 km","1":"6,752 km","2":"6.42 x 10^23 kg (10.7% Earth)","3":"2 (Phobos & Deimos)","4":"227,943,824 km (1.52 AU)","5":"687 days (1.9 years)","