### Web Scraping
This code extracts the latest information on Mars from 5 web sources: 
1. NASA News Site https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest
2. NASA Jet Propulsion Laboratory, California Institute of Technology https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars
3. Mars Twitter Account https://twitter.com/marswxreport?lang=en
4. Mars Fact Page https://space-facts.com/mars/
5. Mars Hemispheres https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars

In [1]:
from os import getcwd
from os.path import join
from bs4 import BeautifulSoup as bs
from splinter import Browser
import time
import re
import pandas as pd

In [2]:
mars_dict={"stories":[],"image":[],"weather":[],"facts":[],"hemispheres":[]}

#### 1. Nasa News Site

In [3]:
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)
url1 = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url1)

In [4]:
html=browser.html
soup=bs(html,'html.parser')

Reference for how to write an html file if necessary:

    Html_file= open("Nasa_News_Site.html","w")
    Html_file.write(soup.prettify())
    Html_file.close()
    
    with open('Nasa_News_Site.html') as file:
        html = file.read()
        nasa_html = bs(html, 'lxml')
    stories=nasa_html.find_all('div',class_='list_text')

In [5]:
for x in range(2):
    html = browser.html
    soup = bs(html, 'html.parser')
    
    stories=soup.find_all('div', class_='list_text') 

In [6]:
for s in range(5):
        headline_date=stories[s].find('div',class_='list_date').text
        headline_title=stories[s].find('div',class_='content_title').text
        headline_p=stories[s].find('div',class_='article_teaser_body').text
        print("Date Posted:",headline_date)
        print("Story Title:",headline_title)
        print("Description:",headline_p)
        print("--------------------\n")
        mars_dict["stories"].append({"dateposted":headline_date,"title":headline_title,"paragraph":headline_p})
       

Date Posted: June 20, 2018
Story Title: Curiosity Captures Photos of Thickening Dust
Description: A storm of tiny dust particles has engulfed much of Mars over the last two weeks.
--------------------

Date Posted: June 20, 2018
Story Title: Opportunity Hunkers Down During Dust Storm
Description: As of Tuesday morning, June 19, the Martian dust storm had grown in size and was officially a "planet-encircling" (or "global") dust event. 
--------------------

Date Posted: June 13, 2018
Story Title: NASA Encounters the Perfect Storm for Science
Description: One of the most intense Martian dust storms ever observed is being studied by a record number of NASA spacecraft.
--------------------

Date Posted: June 12, 2018
Story Title: Media Telecon About Mars Dust Storm, Opportunity
Description: NASA will host a media telecon on Wednesday, June 13, about a massive Martian dust storm affecting the Opportunity rover, and how various missions can obtain unique science.
--------------------

Date P

In [7]:
for story in mars_dict["stories"]:
    print(story["dateposted"])
    print(story["title"])
    print(story["paragraph"])


June 20, 2018
Curiosity Captures Photos of Thickening Dust
A storm of tiny dust particles has engulfed much of Mars over the last two weeks.
June 20, 2018
Opportunity Hunkers Down During Dust Storm
As of Tuesday morning, June 19, the Martian dust storm had grown in size and was officially a "planet-encircling" (or "global") dust event. 
June 13, 2018
NASA Encounters the Perfect Storm for Science
One of the most intense Martian dust storms ever observed is being studied by a record number of NASA spacecraft.
June 12, 2018
Media Telecon About Mars Dust Storm, Opportunity
NASA will host a media telecon on Wednesday, June 13, about a massive Martian dust storm affecting the Opportunity rover, and how various missions can obtain unique science.
June  7, 2018
NASA Finds Ancient Organic Material, Mysterious Methane on Mars
NASA’s Curiosity rover has found evidence on Mars with implications for NASA’s search for life.


#### 2. Mars Space Images

In [8]:
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)
url2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url2)

In [9]:
html=browser.html
soup=bs(html,'html.parser')

In [10]:
inspect=soup.find_all('a',class_="button fancybox")
inspect

[<a class="button fancybox" data-description="This infrared image taken by NASA's Wide-field Infrared Survey Explorer shows a star-forming cloud teeming with gas, dust and massive newborn stars." data-fancybox-group="images" data-fancybox-href="/spaceimages/images/mediumsize/PIA12831_ip.jpg" data-link="/spaceimages/details.php?id=PIA12831" data-title="Stellar Storm of Infrared Light" id="full_image">
 					FULL IMAGE
 				  </a>]

In [11]:
for link in soup.find_all('a',class_="button fancybox"):
    partial_link=link.get('data-fancybox-href')
print(partial_link)


/spaceimages/images/mediumsize/PIA12831_ip.jpg


In [12]:
full_link=url2.split("/spaceimages")[0]+partial_link
full_link

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA12831_ip.jpg'

In [13]:
mars_dict["image"]=full_link
mars_dict["image"]

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA12831_ip.jpg'

#### 3. Mars Twitter Account - Get Latest Tweet

In [14]:
url3="https://twitter.com/marswxreport?lang=en"

In [15]:
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)
url3="https://twitter.com/marswxreport?lang=en"
browser.visit(url3)

In [16]:
html=browser.html
soup=bs(html,'html.parser')

In [17]:
first_tweet=soup.find("p",class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
first_tweet

'New VMC Images direct from Mars! 4 images taken 19:40:26 25.06.2018 http://bit.ly/1wJXI8c\xa0 #marswebcampic.twitter.com/sFiXAn3kMo'

In [18]:
mars_dict["weather"]=first_tweet
mars_dict["weather"]

'New VMC Images direct from Mars! 4 images taken 19:40:26 25.06.2018 http://bit.ly/1wJXI8c\xa0 #marswebcampic.twitter.com/sFiXAn3kMo'

#### 4. Mars Facts

In [19]:
url4="https://space-facts.com/mars/"

In [20]:
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)
url4="https://space-facts.com/mars/"
browser.visit(url4)

In [21]:
html=browser.html
soup=bs(html,'html.parser')

In [22]:
facts=[]
for fact in soup.find_all('td', class_='column-1'):
    facts.append(fact.find("strong").text)
facts

['Equatorial Diameter:',
 'Polar Diameter:',
 'Mass:',
 'Moons:',
 'Orbit Distance:',
 'Orbit Period:',
 'Surface Temperature: ',
 'First Record:',
 'Recorded By:']

In [23]:
values=[]
for value in soup.find_all('td',class_="column-2"):
    values.append(value.text)
values

['6,792 km\n',
 '6,752 km\n',
 '6.42 x 10^23 kg (10.7% Earth)',
 '2 (Phobos & Deimos)',
 '227,943,824 km (1.52 AU)',
 '687 days (1.9 years)\n',
 '-153 to 20 °C',
 '2nd millennium BC',
 'Egyptian astronomers']

In [24]:
mars_facts_df=pd.DataFrame({"Fact":facts,"Value":values})
mars_facts_df.set_index("Fact",inplace=True)
mars_facts_df

Unnamed: 0_level_0,Value
Fact,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km\n"
Polar Diameter:,"6,752 km\n"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)\n
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [25]:
mars_dict["facts"]=mars_facts_df.to_html()
mars_dict["facts"]

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Fact</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km\\n</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km\\n</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)\\n</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </t

#### 5. Mars Hemispheres

In [26]:
url5="https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

In [27]:
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)
url5="https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url5)

In [28]:
html=browser.html
soup=bs(html,'html.parser')

In [29]:
hemispheres=[]
for hemi in soup.find_all("div",class_="item"):
    for link in hemi.find_all("img",class_="thumb"):
        part_link=link.get("src")
       
    hemispheres.append({"title": hemi.find("h3").text,"image":url5.split("/search")[0]+part_link})    
mars_dict["hemispheres"]=hemispheres
mars_dict["hemispheres"]

[{'image': 'https://astrogeology.usgs.gov/cache/images/dfaf3849e74bf973b59eb50dab52b583_cerberus_enhanced.tif_thumb.png',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'image': 'https://astrogeology.usgs.gov/cache/images/7677c0a006b83871b5a2f66985ab5857_schiaparelli_enhanced.tif_thumb.png',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'image': 'https://astrogeology.usgs.gov/cache/images/aae41197e40d6d4f3ea557f8cfe51d15_syrtis_major_enhanced.tif_thumb.png',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'image': 'https://astrogeology.usgs.gov/cache/images/04085d99ec3713883a9a57f42be9c725_valles_marineris_enhanced.tif_thumb.png',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [30]:
mars_dict
    

{'facts': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Fact</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km\\n</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km\\n</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)\\n</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td