# Mission to Mars

## 1.) Dependencies and Setup

In [17]:
# Import dependencies
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

In [9]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [C:\Users\12015\.wdm\drivers\chromedriver\win32\91.0.4472.19\chromedriver.exe] found in cache


## 2.) Scrape [mars news site](https://redplanetscience.com/) - collect latest news title and paragraph text

In [4]:
# Use splinter browser variable to navigate to the mars news site
url = 'https://redplanetscience.com/'
browser.visit(url)

In [5]:
# Scrape using BeautifulSoup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Use div with class 'content_title' to find list of all news titles
article_titles = soup.find_all('div', class_='content_title')

# Pull first one to get latest title
latest_article_title = article_titles[0].text

print(latest_article_title)

# Use div with class 'article_teaser_body' to find list of news paragraph text
article_paragraphs = soup.find_all('div', class_='article_teaser_body')

# Pull first one to get latest article paragraph
latest_article_paragraph = article_paragraphs[0].text

print(latest_article_paragraph)


Mars InSight Lander to Push on Top of the 'Mole'
Engineers have a plan for pushing down on the heat probe, which has been stuck at the Martian surface for a year.


## 3.) Scrape [JPL mars space images site](https://spaceimages-mars.com/) - collect featured image path

In [10]:
# Use splinter browser variable to navigate to the mars news site
url = 'https://spaceimages-mars.com/'
browser.visit(url)

In [15]:
# Scrape using BeautifulSoup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Use div with class 'content_title' to find list of all news titles
featured_image = soup.find('img', class_='headerimage')

# Pull first one to get latest title
featured_image_url =  url + featured_image['src']

print(featured_image_path)

<img class="headerimage fade-in" src="image/featured/mars1.jpg"/>
https://spaceimages-mars.com/image/featured/mars1.jpg


## 4.) Scrape [mars fact site](https://galaxyfacts-mars.com/) - use pandas to scrable facts table

In [18]:
# Define url for pandas to scrape
url = 'https://galaxyfacts-mars.com/'

In [23]:
# Read in tables from url defined above
tables = pd.read_html(url)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [42]:
# Convert list returned above into dataframe
table_df = tables[0]
table_df

Unnamed: 0,Mars - Earth Comparison,Mars,Earth
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [43]:
# Rename columns and reset index
table_df.columns = table_df.iloc[0]
table_df = table_df.drop(table_df.index[0])
# table_df = table_df.set_index('Mars - Earth Comparison')
table_df

Unnamed: 0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [40]:
# Output scraped dataframe to html string
table_html_string = table_df.to_html(index=False).replace('\n', '')
table_html_string

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Mars</th>      <th>Earth</th>    </tr>    <tr>      <th>Mars - Earth Comparison</th>      <th></th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Diameter:</th>      <td>6,779 km</td>      <td>12,742 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg</td>      <td>5.97 × 10^24 kg</td>    </tr>    <tr>      <th>Moons:</th>      <td>2</td>      <td>1</td>    </tr>    <tr>      <th>Distance from Sun:</th>      <td>227,943,824 km</td>      <td>149,598,262 km</td>    </tr>    <tr>      <th>Length of Year:</th>      <td>687 Earth days</td>      <td>365.24 days</td>    </tr>    <tr>      <th>Temperature:</th>      <td>-87 to -5 °C</td>      <td>-88 to 58°C</td>    </tr>  </tbody></table>'

In [41]:
# Validate html string by exporting to html file and opening in browser
table_df.to_html('table.html')

In [16]:
# Quit browser
browser.quit()